|
3 | 3 |
|
4 | 4 | //! Builder for configuring `BtrBlocksCompressor` instances. |
5 | 5 |
|
6 | | -use itertools::Itertools; |
7 | 6 | use vortex_utils::aliases::hash_set::HashSet; |
8 | 7 |
|
9 | 8 | use crate::BtrBlocksCompressor; |
10 | | -use crate::FloatCode; |
11 | | -use crate::IntCode; |
12 | | -use crate::StringCode; |
13 | | -use crate::compressor::float::ALL_FLOAT_SCHEMES; |
14 | | -use crate::compressor::float::FloatScheme; |
15 | | -use crate::compressor::integer::ALL_INT_SCHEMES; |
16 | | -use crate::compressor::integer::IntegerScheme; |
17 | | -use crate::compressor::string::ALL_STRING_SCHEMES; |
18 | | -use crate::compressor::string::StringScheme; |
| 9 | +use crate::CascadingCompressor; |
| 10 | +use crate::Scheme; |
| 11 | +use crate::SchemeExt; |
| 12 | +use crate::SchemeId; |
| 13 | +use crate::schemes::decimal; |
| 14 | +use crate::schemes::float; |
| 15 | +use crate::schemes::integer; |
| 16 | +use crate::schemes::rle; |
| 17 | +use crate::schemes::string; |
| 18 | +use crate::schemes::temporal; |
| 19 | + |
| 20 | +/// All available compression schemes. |
| 21 | +/// |
| 22 | +/// This list is order-sensitive: the builder preserves this order when constructing |
| 23 | +/// the final scheme list, so that tie-breaking is deterministic. |
| 24 | +pub const ALL_SCHEMES: &[&dyn Scheme] = &[ |
| 25 | + //////////////////////////////////////////////////////////////////////////////////////////////// |
| 26 | + // Integer schemes. |
| 27 | + //////////////////////////////////////////////////////////////////////////////////////////////// |
| 28 | + &integer::IntConstantScheme, |
| 29 | + // NOTE: FoR must precede BitPacking to avoid unnecessary patches. |
| 30 | + &integer::FoRScheme, |
| 31 | + // NOTE: ZigZag should precede BitPacking because we don't want negative numbers. |
| 32 | + &integer::ZigZagScheme, |
| 33 | + &integer::BitPackingScheme, |
| 34 | + &integer::SparseScheme, |
| 35 | + &integer::IntDictScheme, |
| 36 | + &integer::RunEndScheme, |
| 37 | + &integer::SequenceScheme, |
| 38 | + &rle::RLE_INTEGER_SCHEME, |
| 39 | + #[cfg(feature = "pco")] |
| 40 | + &integer::PcoScheme, |
| 41 | + //////////////////////////////////////////////////////////////////////////////////////////////// |
| 42 | + // Float schemes. |
| 43 | + //////////////////////////////////////////////////////////////////////////////////////////////// |
| 44 | + &float::FloatConstantScheme, |
| 45 | + &float::ALPScheme, |
| 46 | + &float::ALPRDScheme, |
| 47 | + &float::FloatDictScheme, |
| 48 | + &float::NullDominatedSparseScheme, |
| 49 | + &rle::RLE_FLOAT_SCHEME, |
| 50 | + #[cfg(feature = "pco")] |
| 51 | + &float::PcoScheme, |
| 52 | + //////////////////////////////////////////////////////////////////////////////////////////////// |
| 53 | + // String schemes. |
| 54 | + //////////////////////////////////////////////////////////////////////////////////////////////// |
| 55 | + &string::StringDictScheme, |
| 56 | + &string::FSSTScheme, |
| 57 | + &string::StringConstantScheme, |
| 58 | + &string::NullDominatedSparseScheme, |
| 59 | + #[cfg(feature = "zstd")] |
| 60 | + &string::ZstdScheme, |
| 61 | + #[cfg(all(feature = "zstd", feature = "unstable_encodings"))] |
| 62 | + &string::ZstdBuffersScheme, |
| 63 | + // Decimal schemes. |
| 64 | + &decimal::DecimalScheme, |
| 65 | + // Temporal schemes. |
| 66 | + &temporal::TemporalScheme, |
| 67 | +]; |
| 68 | + |
| 69 | +/// Returns the set of scheme IDs excluded by default (behind feature gates or known-expensive). |
| 70 | +pub fn default_excluded() -> HashSet<SchemeId> { |
| 71 | + #[allow(unused_mut, reason = "depends on enabled feature flags")] |
| 72 | + let mut excluded = HashSet::new(); |
| 73 | + #[cfg(feature = "pco")] |
| 74 | + { |
| 75 | + excluded.insert(integer::PcoScheme.id()); |
| 76 | + excluded.insert(float::PcoScheme.id()); |
| 77 | + } |
| 78 | + #[cfg(feature = "zstd")] |
| 79 | + excluded.insert(string::ZstdScheme.id()); |
| 80 | + #[cfg(all(feature = "zstd", feature = "unstable_encodings"))] |
| 81 | + excluded.insert(string::ZstdBuffersScheme.id()); |
| 82 | + excluded |
| 83 | +} |
19 | 84 |
|
20 | 85 | /// Builder for creating configured [`BtrBlocksCompressor`] instances. |
21 | 86 | /// |
22 | | -/// Use this builder to configure which compression schemes are allowed for each data type. |
23 | | -/// By default, all schemes are enabled. |
| 87 | +/// Use this builder to configure which compression schemes are allowed. |
| 88 | +/// By default, all schemes are enabled except those in [`default_excluded`]. |
24 | 89 | /// |
25 | 90 | /// # Examples |
26 | 91 | /// |
27 | 92 | /// ```rust |
28 | | -/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, IntCode, FloatCode}; |
| 93 | +/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt}; |
| 94 | +/// use vortex_btrblocks::schemes::integer::IntDictScheme; |
29 | 95 | /// |
30 | | -/// // Default compressor - all schemes allowed |
| 96 | +/// // Default compressor - all non-excluded schemes allowed. |
31 | 97 | /// let compressor = BtrBlocksCompressorBuilder::default().build(); |
32 | 98 | /// |
33 | | -/// // Exclude specific schemes |
| 99 | +/// // Exclude specific schemes. |
34 | 100 | /// let compressor = BtrBlocksCompressorBuilder::default() |
35 | | -/// .exclude_int([IntCode::Dict]) |
| 101 | +/// .exclude([IntDictScheme.id()]) |
36 | 102 | /// .build(); |
37 | 103 | /// |
38 | | -/// // Exclude then re-include |
| 104 | +/// // Exclude then re-include. |
39 | 105 | /// let compressor = BtrBlocksCompressorBuilder::default() |
40 | | -/// .exclude_int([IntCode::Dict, IntCode::Rle]) |
41 | | -/// .include_int([IntCode::Dict]) |
| 106 | +/// .exclude([IntDictScheme.id()]) |
| 107 | +/// .include([IntDictScheme.id()]) |
42 | 108 | /// .build(); |
43 | 109 | /// ``` |
44 | 110 | #[derive(Debug, Clone)] |
45 | 111 | pub struct BtrBlocksCompressorBuilder { |
46 | | - int_schemes: HashSet<&'static dyn IntegerScheme>, |
47 | | - float_schemes: HashSet<&'static dyn FloatScheme>, |
48 | | - string_schemes: HashSet<&'static dyn StringScheme>, |
| 112 | + schemes: HashSet<&'static dyn Scheme>, |
49 | 113 | } |
50 | 114 |
|
51 | 115 | impl Default for BtrBlocksCompressorBuilder { |
52 | 116 | fn default() -> Self { |
| 117 | + let excluded = default_excluded(); |
53 | 118 | Self { |
54 | | - int_schemes: ALL_INT_SCHEMES |
55 | | - .iter() |
56 | | - .copied() |
57 | | - .filter(|s| s.code() != IntCode::Pco) |
58 | | - .collect(), |
59 | | - float_schemes: ALL_FLOAT_SCHEMES |
| 119 | + schemes: ALL_SCHEMES |
60 | 120 | .iter() |
61 | 121 | .copied() |
62 | | - .filter(|s| s.code() != FloatCode::Pco) |
63 | | - .collect(), |
64 | | - string_schemes: ALL_STRING_SCHEMES |
65 | | - .iter() |
66 | | - .copied() |
67 | | - .filter(|s| s.code() != StringCode::Zstd && s.code() != StringCode::ZstdBuffers) |
| 122 | + .filter(|s| !excluded.contains(&s.id())) |
68 | 123 | .collect(), |
69 | 124 | } |
70 | 125 | } |
71 | 126 | } |
72 | 127 |
|
73 | 128 | impl BtrBlocksCompressorBuilder { |
74 | | - /// Create a new builder with no encodings enabled. |
75 | | - pub fn empty() -> Self { |
76 | | - Self { |
77 | | - int_schemes: Default::default(), |
78 | | - float_schemes: Default::default(), |
79 | | - string_schemes: Default::default(), |
80 | | - } |
81 | | - } |
82 | | - |
83 | | - /// Excludes the specified integer compression schemes. |
84 | | - pub fn exclude_int(mut self, codes: impl IntoIterator<Item = IntCode>) -> Self { |
85 | | - let codes: HashSet<_> = codes.into_iter().collect(); |
86 | | - self.int_schemes.retain(|s| !codes.contains(&s.code())); |
| 129 | + /// Excludes the specified compression schemes by their [`SchemeId`]. |
| 130 | + pub fn exclude(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self { |
| 131 | + let ids: HashSet<_> = ids.into_iter().collect(); |
| 132 | + self.schemes.retain(|s| !ids.contains(&s.id())); |
87 | 133 | self |
88 | 134 | } |
89 | 135 |
|
90 | | - /// Excludes the specified float compression schemes. |
91 | | - pub fn exclude_float(mut self, codes: impl IntoIterator<Item = FloatCode>) -> Self { |
92 | | - let codes: HashSet<_> = codes.into_iter().collect(); |
93 | | - self.float_schemes.retain(|s| !codes.contains(&s.code())); |
94 | | - self |
95 | | - } |
96 | | - |
97 | | - /// Excludes the specified string compression schemes. |
98 | | - pub fn exclude_string(mut self, codes: impl IntoIterator<Item = StringCode>) -> Self { |
99 | | - let codes: HashSet<_> = codes.into_iter().collect(); |
100 | | - self.string_schemes.retain(|s| !codes.contains(&s.code())); |
101 | | - self |
102 | | - } |
103 | | - |
104 | | - /// Includes the specified integer compression schemes. |
105 | | - pub fn include_int(mut self, codes: impl IntoIterator<Item = IntCode>) -> Self { |
106 | | - let codes: HashSet<_> = codes.into_iter().collect(); |
107 | | - for scheme in ALL_INT_SCHEMES { |
108 | | - if codes.contains(&scheme.code()) { |
109 | | - self.int_schemes.insert(*scheme); |
110 | | - } |
111 | | - } |
112 | | - self |
113 | | - } |
114 | | - |
115 | | - /// Includes the specified float compression schemes. |
116 | | - pub fn include_float(mut self, codes: impl IntoIterator<Item = FloatCode>) -> Self { |
117 | | - let codes: HashSet<_> = codes.into_iter().collect(); |
118 | | - for scheme in ALL_FLOAT_SCHEMES { |
119 | | - if codes.contains(&scheme.code()) { |
120 | | - self.float_schemes.insert(*scheme); |
| 136 | + /// Includes the specified compression schemes by their [`SchemeId`]. |
| 137 | + /// |
| 138 | + /// Only schemes present in [`ALL_SCHEMES`] can be included. |
| 139 | + pub fn include(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self { |
| 140 | + let ids: HashSet<_> = ids.into_iter().collect(); |
| 141 | + for scheme in ALL_SCHEMES { |
| 142 | + if ids.contains(&scheme.id()) { |
| 143 | + self.schemes.insert(*scheme); |
121 | 144 | } |
122 | 145 | } |
123 | 146 | self |
124 | 147 | } |
125 | 148 |
|
126 | | - /// Includes the specified string compression schemes. |
127 | | - pub fn include_string(mut self, codes: impl IntoIterator<Item = StringCode>) -> Self { |
128 | | - let codes: HashSet<_> = codes.into_iter().collect(); |
129 | | - for scheme in ALL_STRING_SCHEMES { |
130 | | - if codes.contains(&scheme.code()) { |
131 | | - self.string_schemes.insert(*scheme); |
132 | | - } |
133 | | - } |
| 149 | + /// Adds a single scheme to the builder. |
| 150 | + pub fn with_scheme(mut self, scheme: &'static dyn Scheme) -> Self { |
| 151 | + self.schemes.insert(scheme); |
134 | 152 | self |
135 | 153 | } |
136 | 154 |
|
137 | | - /// Builds the configured `BtrBlocksCompressor`. |
| 155 | + /// Builds the configured [`BtrBlocksCompressor`]. |
| 156 | + /// |
| 157 | + /// The resulting scheme list preserves the order of [`ALL_SCHEMES`] for deterministic |
| 158 | + /// tie-breaking. |
138 | 159 | pub fn build(self) -> BtrBlocksCompressor { |
139 | | - // Note we should apply the schemes in the same order, in case try conflict. |
140 | | - BtrBlocksCompressor { |
141 | | - int_schemes: self |
142 | | - .int_schemes |
143 | | - .into_iter() |
144 | | - .sorted_by_key(|s| s.code()) |
145 | | - .collect_vec(), |
146 | | - float_schemes: self |
147 | | - .float_schemes |
148 | | - .into_iter() |
149 | | - .sorted_by_key(|s| s.code()) |
150 | | - .collect_vec(), |
151 | | - string_schemes: self |
152 | | - .string_schemes |
153 | | - .into_iter() |
154 | | - .sorted_by_key(|s| s.code()) |
155 | | - .collect_vec(), |
156 | | - } |
| 160 | + let schemes = ALL_SCHEMES |
| 161 | + .iter() |
| 162 | + .copied() |
| 163 | + .filter(|s| self.schemes.contains(s)) |
| 164 | + .collect(); |
| 165 | + BtrBlocksCompressor(CascadingCompressor::new(schemes)) |
157 | 166 | } |
158 | 167 | } |
0 commit comments