Skip to content

Commit ed4cd52

Browse files
committed
replace compressor in vortex-btrblocks
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent a36ed5a commit ed4cd52

30 files changed

+2939
-5263
lines changed

vortex-btrblocks/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ rust-version = { workspace = true }
1414
version = { workspace = true }
1515

1616
[dependencies]
17-
enum-iterator = { workspace = true }
1817
getrandom_v03 = { workspace = true }
1918
itertools = { workspace = true }
2019
num-traits = { workspace = true }
@@ -25,6 +24,7 @@ tracing = { workspace = true }
2524
vortex-alp = { workspace = true }
2625
vortex-array = { workspace = true }
2726
vortex-buffer = { workspace = true }
27+
vortex-compressor = { workspace = true }
2828
vortex-datetime-parts = { workspace = true }
2929
vortex-decimal-byte-parts = { workspace = true }
3030
vortex-error = { workspace = true }

vortex-btrblocks/benches/dict_encode.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use vortex_array::arrays::BoolArray;
99
use vortex_array::arrays::PrimitiveArray;
1010
use vortex_array::builders::dict::dict_encode;
1111
use vortex_array::validity::Validity;
12-
use vortex_btrblocks::CompressorStats;
1312
use vortex_btrblocks::IntegerStats;
1413
use vortex_btrblocks::integer_dictionary_encode;
1514
use vortex_buffer::BufferMut;

vortex-btrblocks/benches/stats_calc.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ mod benchmarks {
1010
use divan::Bencher;
1111
use vortex_array::arrays::PrimitiveArray;
1212
use vortex_array::validity::Validity;
13-
use vortex_btrblocks::CompressorStats;
1413
use vortex_btrblocks::GenerateStatsOptions;
1514
use vortex_btrblocks::IntegerStats;
1615
use vortex_buffer::Buffer;

vortex-btrblocks/public-api.lock

Lines changed: 465 additions & 165 deletions
Large diffs are not rendered by default.

vortex-btrblocks/src/builder.rs

Lines changed: 114 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -3,156 +3,165 @@
33

44
//! Builder for configuring `BtrBlocksCompressor` instances.
55
6-
use itertools::Itertools;
76
use vortex_utils::aliases::hash_set::HashSet;
87

98
use crate::BtrBlocksCompressor;
10-
use crate::FloatCode;
11-
use crate::IntCode;
12-
use crate::StringCode;
13-
use crate::compressor::float::ALL_FLOAT_SCHEMES;
14-
use crate::compressor::float::FloatScheme;
15-
use crate::compressor::integer::ALL_INT_SCHEMES;
16-
use crate::compressor::integer::IntegerScheme;
17-
use crate::compressor::string::ALL_STRING_SCHEMES;
18-
use crate::compressor::string::StringScheme;
9+
use crate::CascadingCompressor;
10+
use crate::Scheme;
11+
use crate::SchemeExt;
12+
use crate::SchemeId;
13+
use crate::schemes::decimal;
14+
use crate::schemes::float;
15+
use crate::schemes::integer;
16+
use crate::schemes::rle;
17+
use crate::schemes::string;
18+
use crate::schemes::temporal;
19+
20+
/// All available compression schemes.
21+
///
22+
/// This list is order-sensitive: the builder preserves this order when constructing
23+
/// the final scheme list, so that tie-breaking is deterministic.
24+
pub const ALL_SCHEMES: &[&dyn Scheme] = &[
25+
////////////////////////////////////////////////////////////////////////////////////////////////
26+
// Integer schemes.
27+
////////////////////////////////////////////////////////////////////////////////////////////////
28+
&integer::IntConstantScheme,
29+
// NOTE: FoR must precede BitPacking to avoid unnecessary patches.
30+
&integer::FoRScheme,
31+
// NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
32+
&integer::ZigZagScheme,
33+
&integer::BitPackingScheme,
34+
&integer::SparseScheme,
35+
&integer::IntDictScheme,
36+
&integer::RunEndScheme,
37+
&integer::SequenceScheme,
38+
&rle::RLE_INTEGER_SCHEME,
39+
#[cfg(feature = "pco")]
40+
&integer::PcoScheme,
41+
////////////////////////////////////////////////////////////////////////////////////////////////
42+
// Float schemes.
43+
////////////////////////////////////////////////////////////////////////////////////////////////
44+
&float::FloatConstantScheme,
45+
&float::ALPScheme,
46+
&float::ALPRDScheme,
47+
&float::FloatDictScheme,
48+
&float::NullDominatedSparseScheme,
49+
&rle::RLE_FLOAT_SCHEME,
50+
#[cfg(feature = "pco")]
51+
&float::PcoScheme,
52+
////////////////////////////////////////////////////////////////////////////////////////////////
53+
// String schemes.
54+
////////////////////////////////////////////////////////////////////////////////////////////////
55+
&string::StringDictScheme,
56+
&string::FSSTScheme,
57+
&string::StringConstantScheme,
58+
&string::NullDominatedSparseScheme,
59+
#[cfg(feature = "zstd")]
60+
&string::ZstdScheme,
61+
#[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
62+
&string::ZstdBuffersScheme,
63+
// Decimal schemes.
64+
&decimal::DecimalScheme,
65+
// Temporal schemes.
66+
&temporal::TemporalScheme,
67+
];
68+
69+
/// Returns the set of scheme IDs excluded by default (behind feature gates or known-expensive).
70+
pub fn default_excluded() -> HashSet<SchemeId> {
71+
#[allow(unused_mut, reason = "depends on enabled feature flags")]
72+
let mut excluded = HashSet::new();
73+
#[cfg(feature = "pco")]
74+
{
75+
excluded.insert(integer::PcoScheme.id());
76+
excluded.insert(float::PcoScheme.id());
77+
}
78+
#[cfg(feature = "zstd")]
79+
excluded.insert(string::ZstdScheme.id());
80+
#[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
81+
excluded.insert(string::ZstdBuffersScheme.id());
82+
excluded
83+
}
1984

2085
/// Builder for creating configured [`BtrBlocksCompressor`] instances.
2186
///
22-
/// Use this builder to configure which compression schemes are allowed for each data type.
23-
/// By default, all schemes are enabled.
87+
/// Use this builder to configure which compression schemes are allowed.
88+
/// By default, all schemes are enabled except those in [`default_excluded`].
2489
///
2590
/// # Examples
2691
///
2792
/// ```rust
28-
/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, IntCode, FloatCode};
93+
/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
94+
/// use vortex_btrblocks::schemes::integer::IntDictScheme;
2995
///
30-
/// // Default compressor - all schemes allowed
96+
/// // Default compressor - all non-excluded schemes allowed.
3197
/// let compressor = BtrBlocksCompressorBuilder::default().build();
3298
///
33-
/// // Exclude specific schemes
99+
/// // Exclude specific schemes.
34100
/// let compressor = BtrBlocksCompressorBuilder::default()
35-
/// .exclude_int([IntCode::Dict])
101+
/// .exclude([IntDictScheme.id()])
36102
/// .build();
37103
///
38-
/// // Exclude then re-include
104+
/// // Exclude then re-include.
39105
/// let compressor = BtrBlocksCompressorBuilder::default()
40-
/// .exclude_int([IntCode::Dict, IntCode::Rle])
41-
/// .include_int([IntCode::Dict])
106+
/// .exclude([IntDictScheme.id()])
107+
/// .include([IntDictScheme.id()])
42108
/// .build();
43109
/// ```
44110
#[derive(Debug, Clone)]
45111
pub struct BtrBlocksCompressorBuilder {
46-
int_schemes: HashSet<&'static dyn IntegerScheme>,
47-
float_schemes: HashSet<&'static dyn FloatScheme>,
48-
string_schemes: HashSet<&'static dyn StringScheme>,
112+
schemes: HashSet<&'static dyn Scheme>,
49113
}
50114

51115
impl Default for BtrBlocksCompressorBuilder {
52116
fn default() -> Self {
117+
let excluded = default_excluded();
53118
Self {
54-
int_schemes: ALL_INT_SCHEMES
55-
.iter()
56-
.copied()
57-
.filter(|s| s.code() != IntCode::Pco)
58-
.collect(),
59-
float_schemes: ALL_FLOAT_SCHEMES
119+
schemes: ALL_SCHEMES
60120
.iter()
61121
.copied()
62-
.filter(|s| s.code() != FloatCode::Pco)
63-
.collect(),
64-
string_schemes: ALL_STRING_SCHEMES
65-
.iter()
66-
.copied()
67-
.filter(|s| s.code() != StringCode::Zstd && s.code() != StringCode::ZstdBuffers)
122+
.filter(|s| !excluded.contains(&s.id()))
68123
.collect(),
69124
}
70125
}
71126
}
72127

73128
impl BtrBlocksCompressorBuilder {
74-
/// Create a new builder with no encodings enabled.
75-
pub fn empty() -> Self {
76-
Self {
77-
int_schemes: Default::default(),
78-
float_schemes: Default::default(),
79-
string_schemes: Default::default(),
80-
}
81-
}
82-
83-
/// Excludes the specified integer compression schemes.
84-
pub fn exclude_int(mut self, codes: impl IntoIterator<Item = IntCode>) -> Self {
85-
let codes: HashSet<_> = codes.into_iter().collect();
86-
self.int_schemes.retain(|s| !codes.contains(&s.code()));
129+
/// Excludes the specified compression schemes by their [`SchemeId`].
130+
pub fn exclude(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
131+
let ids: HashSet<_> = ids.into_iter().collect();
132+
self.schemes.retain(|s| !ids.contains(&s.id()));
87133
self
88134
}
89135

90-
/// Excludes the specified float compression schemes.
91-
pub fn exclude_float(mut self, codes: impl IntoIterator<Item = FloatCode>) -> Self {
92-
let codes: HashSet<_> = codes.into_iter().collect();
93-
self.float_schemes.retain(|s| !codes.contains(&s.code()));
94-
self
95-
}
96-
97-
/// Excludes the specified string compression schemes.
98-
pub fn exclude_string(mut self, codes: impl IntoIterator<Item = StringCode>) -> Self {
99-
let codes: HashSet<_> = codes.into_iter().collect();
100-
self.string_schemes.retain(|s| !codes.contains(&s.code()));
101-
self
102-
}
103-
104-
/// Includes the specified integer compression schemes.
105-
pub fn include_int(mut self, codes: impl IntoIterator<Item = IntCode>) -> Self {
106-
let codes: HashSet<_> = codes.into_iter().collect();
107-
for scheme in ALL_INT_SCHEMES {
108-
if codes.contains(&scheme.code()) {
109-
self.int_schemes.insert(*scheme);
110-
}
111-
}
112-
self
113-
}
114-
115-
/// Includes the specified float compression schemes.
116-
pub fn include_float(mut self, codes: impl IntoIterator<Item = FloatCode>) -> Self {
117-
let codes: HashSet<_> = codes.into_iter().collect();
118-
for scheme in ALL_FLOAT_SCHEMES {
119-
if codes.contains(&scheme.code()) {
120-
self.float_schemes.insert(*scheme);
136+
/// Includes the specified compression schemes by their [`SchemeId`].
137+
///
138+
/// Only schemes present in [`ALL_SCHEMES`] can be included.
139+
pub fn include(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
140+
let ids: HashSet<_> = ids.into_iter().collect();
141+
for scheme in ALL_SCHEMES {
142+
if ids.contains(&scheme.id()) {
143+
self.schemes.insert(*scheme);
121144
}
122145
}
123146
self
124147
}
125148

126-
/// Includes the specified string compression schemes.
127-
pub fn include_string(mut self, codes: impl IntoIterator<Item = StringCode>) -> Self {
128-
let codes: HashSet<_> = codes.into_iter().collect();
129-
for scheme in ALL_STRING_SCHEMES {
130-
if codes.contains(&scheme.code()) {
131-
self.string_schemes.insert(*scheme);
132-
}
133-
}
149+
/// Adds a single scheme to the builder.
150+
pub fn with_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
151+
self.schemes.insert(scheme);
134152
self
135153
}
136154

137-
/// Builds the configured `BtrBlocksCompressor`.
155+
/// Builds the configured [`BtrBlocksCompressor`].
156+
///
157+
/// The resulting scheme list preserves the order of [`ALL_SCHEMES`] for deterministic
158+
/// tie-breaking.
138159
pub fn build(self) -> BtrBlocksCompressor {
139-
// Note we should apply the schemes in the same order, in case try conflict.
140-
BtrBlocksCompressor {
141-
int_schemes: self
142-
.int_schemes
143-
.into_iter()
144-
.sorted_by_key(|s| s.code())
145-
.collect_vec(),
146-
float_schemes: self
147-
.float_schemes
148-
.into_iter()
149-
.sorted_by_key(|s| s.code())
150-
.collect_vec(),
151-
string_schemes: self
152-
.string_schemes
153-
.into_iter()
154-
.sorted_by_key(|s| s.code())
155-
.collect_vec(),
156-
}
160+
let schemes = ALL_SCHEMES
161+
.iter()
162+
.copied()
163+
.filter(|s| self.schemes.contains(s))
164+
.collect();
165+
BtrBlocksCompressor(CascadingCompressor::new(schemes))
157166
}
158167
}

0 commit comments

Comments
 (0)