clean up

connortsui20 · connortsui20 · commit 414149eb7f27 · 2026-04-13T16:56:02.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/benchmarks/compress-bench/src/main.rs b/benchmarks/compress-bench/src/main.rs
@@ -70,13 +70,19 @@ struct Args {
     output_path: Option<PathBuf>,
     #[arg(long)]
     tracing: bool,
+    /// Format for the primary stderr log sink. `text` is the default human-readable format;
+    /// `json` emits one JSON object per event, suitable for piping into `jq`.
+    ///
+    /// See [`LogFormat`] for the full details.
+    #[arg(long, value_enum, default_value_t = LogFormat::Text)]
+    log_format: LogFormat,
 }
 
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
     let args = Args::parse();
 
-    setup_logging_and_tracing(args.verbose, args.tracing, LogFormat::Text)?;
+    setup_logging_and_tracing(args.verbose, args.tracing, args.log_format)?;
 
     run_compress(
         args.iterations,
diff --git a/vortex-bench/src/bin/data-gen.rs b/vortex-bench/src/bin/data-gen.rs
@@ -38,8 +38,9 @@ struct Args {
     #[arg(long)]
     tracing: bool,
 
-    /// Format for the primary stderr log sink. `text` is the default human
-    /// readable format; `json` emits newline-delimited JSON suitable for `jq`.
+    /// Format for the primary stderr log sink. `text` is the default
+    /// human-readable format; `json` emits one JSON object per event, suitable
+    /// for piping into `jq`. See [`LogFormat`] for the full details.
     #[arg(long, value_enum, default_value_t = LogFormat::Text)]
     log_format: LogFormat,
 
diff --git a/vortex-btrblocks/Cargo.toml b/vortex-btrblocks/Cargo.toml
@@ -20,7 +20,7 @@ num-traits = { workspace = true }
 pco = { workspace = true, optional = true }
 rand = { workspace = true }
 rustc-hash = { workspace = true }
-tracing = { workspace = true, features = ["std", "attributes"] }
+tracing = { workspace = true }
 vortex-alp = { workspace = true }
 vortex-array = { workspace = true }
 vortex-buffer = { workspace = true }
diff --git a/vortex-btrblocks/src/canonical_compressor.rs b/vortex-btrblocks/src/canonical_compressor.rs
@@ -39,23 +39,9 @@ pub struct BtrBlocksCompressor(
 impl BtrBlocksCompressor {
     /// Compresses an array using BtrBlocks-inspired compression.
     ///
-    /// This call is instrumented with a `vortex_compressor::cascade` span named
-    /// `BtrBlocksCompressor::compress` so that downstream tracing consumers
-    /// (e.g. `tracing-perfetto`) have a distinct entry frame to pivot on, nested
-    /// above the generic [`CascadingCompressor::compress`] span that actually
-    /// runs the pipeline. See the `Observability` section of the
-    /// [`vortex_compressor`] crate docs for the full tracing reference.
-    #[tracing::instrument(
-        target = "vortex_compressor::cascade",
-        name = "BtrBlocksCompressor::compress",
-        level = "trace",
-        skip_all,
-        fields(
-            len = array.len(),
-            nbytes = array.nbytes(),
-            dtype = %array.dtype(),
-        ),
-    )]
+    /// This is a thin delegate to [`CascadingCompressor::compress`], which owns the tracing
+    /// instrumentation. See the `Observability` section of the [`vortex_compressor`] crate
+    /// docs for the full tracing reference.
     pub fn compress(&self, array: &ArrayRef) -> VortexResult<ArrayRef> {
         self.0.compress(array)
     }
diff --git a/vortex-btrblocks/src/lib.rs b/vortex-btrblocks/src/lib.rs
@@ -54,16 +54,12 @@
 //!
 //! # Observability
 //!
-//! [`BtrBlocksCompressor`] participates in the [`vortex_compressor`] tracing target system.
-//! See the [`vortex_compressor` crate docs](vortex_compressor#observability) for the full
-//! reference on targets, spans, and events.
+//! [`BtrBlocksCompressor`] is a thin delegate to [`CascadingCompressor`], which owns all
+//! tracing instrumentation. See the
+//! [`vortex_compressor` crate docs](vortex_compressor#observability) for the full reference
+//! on targets, spans, and events.
 //!
-//! The top-level [`BtrBlocksCompressor::compress`] call adds its own
-//! `vortex_compressor::cascade` span (named `BtrBlocksCompressor::compress`) that nests
-//! above the generic cascading-compressor pipeline, giving downstream trace consumers a
-//! distinct entry frame.
-//!
-//! Quick start — one line per leaf with scheme, estimated ratio, actual ratio, accepted?:
+//! Quick start: one line per leaf with scheme, estimated ratio, actual ratio, accepted?:
 //!
 //! ```text
 //! RUST_LOG=vortex_compressor::encode=debug cargo test -p vortex-btrblocks
diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs
@@ -294,10 +294,6 @@ impl Scheme for ZigZagScheme {
 
         let compressed = compressor.compress_child(&encoded.into_array(), &ctx, self.id(), 0)?;
 
-        // NOTE: scheme-level compression results are emitted centrally as the
-        // `scheme.compress_result` event on the `vortex_compressor::encode`
-        // target. See the `Observability` section of the `vortex_compressor` crate docs.
-
         Ok(ZigZag::try_new(compressed)?.into_array())
     }
 }
diff --git a/vortex-compressor/src/compressor.rs b/vortex-compressor/src/compressor.rs
@@ -38,6 +38,7 @@ use crate::builtins::IntDictScheme;
 use crate::ctx::CompressorContext;
 use crate::estimate::CompressionEstimate;
 use crate::estimate::DeferredEstimate;
+use crate::estimate::EstimateFn;
 use crate::estimate::EstimateVerdict;
 use crate::estimate::estimate_compression_ratio_with_sampling;
 use crate::estimate::is_better_ratio;
@@ -72,13 +73,15 @@ const TARGET_CASCADE: &str = "vortex_compressor::cascade";
 /// Emits a structured `scheme.evaluated` trace event on [`TARGET_SELECT`] for one scheme's
 /// initial estimation verdict.
 ///
-/// For `Ratio(r)` the numeric estimate is recorded directly. For `Sample` and `Estimate`
-/// the ratio is not yet known at this point; a follow-up `scheme.evaluated.resolved` event
-/// is emitted by the caller after the deferred computation finishes.
+/// For [`CompressionEstimate::Verdict(EstimateVerdict::Ratio)`] the numeric estimate is recorded
+/// directly as a typed `f64`, so JSON subscribers get a proper number. For all other variants the
+/// `ratio` field is omitted entirely. The `kind` field distinguishes the variants. For deferred
+/// estimates a follow-up `scheme.evaluated.resolved` event is emitted by the caller once the
+/// deferred computation finishes.
 ///
-/// Defined as a standalone helper (rather than inlined) because the `match` expression that
-/// extracts `kind` and the optional `ratio` field is the only repetition worth factoring out
-/// of [`CascadingCompressor::choose_best_scheme`].
+/// Defined as a standalone helper (rather than inlined) because the `match` expression
+/// that extracts `kind` is the only repetition worth factoring out of
+/// [`CascadingCompressor::choose_best_scheme`].
 fn emit_scheme_evaluated(scheme: &'static dyn Scheme, estimate: &CompressionEstimate) {
     let kind: &'static str = match estimate {
         CompressionEstimate::Verdict(EstimateVerdict::Skip) => "Skip",
@@ -542,7 +545,6 @@ impl CascadingCompressor {
     /// registration order (earlier in the list wins).
     ///
     /// [`expected_compression_ratio`]: Scheme::expected_compression_ratio
-    #[allow(clippy::cognitive_complexity, reason = "tracing sometimes enabled")]
     fn choose_best_scheme(
         &self,
         schemes: &[&'static dyn Scheme],
@@ -571,53 +573,16 @@ impl CascadingCompressor {
                     }
                 }
                 CompressionEstimate::Deferred(DeferredEstimate::Sample) => {
-                    let sample_ratio = estimate_compression_ratio_with_sampling(
-                        scheme,
-                        self,
-                        data.array(),
-                        ctx.clone(),
-                    )?;
-
-                    tracing::trace!(
-                        target: TARGET_SELECT,
-                        scheme = %scheme.id(),
-                        kind = "Sample",
-                        ratio = sample_ratio,
-                        "scheme.evaluated.resolved",
-                    );
-
-                    if is_better_ratio(sample_ratio, &best) {
-                        best = Some((scheme, sample_ratio));
-                    }
+                    self.check_sample_scheme(data, &ctx, &mut best, scheme)?;
                 }
                 CompressionEstimate::Deferred(DeferredEstimate::Callback(estimate_callback)) => {
-                    let verdict = estimate_callback(self, data, ctx.clone())?;
-                    let resolved_kind = match verdict {
-                        EstimateVerdict::Skip => "Skip",
-                        EstimateVerdict::AlwaysUse => "AlwaysUse",
-                        EstimateVerdict::Ratio(_) => "Ratio",
-                    };
-                    if let EstimateVerdict::Ratio(ratio) = verdict {
-                        tracing::trace!(
-                            target: TARGET_SELECT,
-                            scheme = %scheme.id(),
-                            kind = "Estimate",
-                            resolved_kind,
-                            ratio,
-                            "scheme.evaluated.resolved",
-                        );
-                    } else {
-                        tracing::trace!(
-                            target: TARGET_SELECT,
-                            scheme = %scheme.id(),
-                            kind = "Estimate",
-                            resolved_kind,
-                            "scheme.evaluated.resolved",
-                        );
-                    }
-                    if let Some(winner_estimate) =
-                        Self::check_and_update_estimate_verdict(&mut best, scheme, verdict)
-                    {
+                    if let Some(winner_estimate) = self.check_estimate_callback(
+                        data,
+                        &ctx,
+                        &mut best,
+                        scheme,
+                        estimate_callback,
+                    )? {
                         return Ok(Some((scheme, winner_estimate)));
                     }
                 }
@@ -627,6 +592,71 @@ impl CascadingCompressor {
         Ok(best.map(|(scheme, ratio)| (scheme, WinnerEstimate::Ratio(ratio))))
     }
 
+    /// Helper function for sampling a scheme to get an estimated compression ratio.
+    fn check_sample_scheme(
+        &self,
+        data: &mut ArrayAndStats,
+        ctx: &CompressorContext,
+        best: &mut Option<(&'static dyn Scheme, f64)>,
+        scheme: &'static dyn Scheme,
+    ) -> VortexResult<()> {
+        let sample_ratio =
+            estimate_compression_ratio_with_sampling(scheme, self, data.array(), ctx.clone())?;
+
+        tracing::trace!(
+            target: TARGET_SELECT,
+            scheme = %scheme.id(),
+            kind = "Sample",
+            ratio = sample_ratio,
+            "scheme.evaluated.resolved",
+        );
+
+        if is_better_ratio(sample_ratio, &*best) {
+            *best = Some((scheme, sample_ratio));
+        }
+
+        Ok(())
+    }
+
+    /// Helper function for running a custom compression ratio estimation callback for a scheme.
+    fn check_estimate_callback(
+        &self,
+        data: &mut ArrayAndStats,
+        ctx: &CompressorContext,
+        best: &mut Option<(&'static dyn Scheme, f64)>,
+        scheme: &'static dyn Scheme,
+        estimate_callback: Box<EstimateFn>,
+    ) -> VortexResult<Option<WinnerEstimate>> {
+        let verdict = estimate_callback(self, data, ctx.clone())?;
+        let resolved_kind = match verdict {
+            EstimateVerdict::Skip => "Skip",
+            EstimateVerdict::AlwaysUse => "AlwaysUse",
+            EstimateVerdict::Ratio(_) => "Ratio",
+        };
+        if let EstimateVerdict::Ratio(ratio) = verdict {
+            tracing::trace!(
+                target: TARGET_SELECT,
+                scheme = %scheme.id(),
+                kind = "Estimate",
+                resolved_kind,
+                ratio,
+                "scheme.evaluated.resolved",
+            );
+        } else {
+            tracing::trace!(
+                target: TARGET_SELECT,
+                scheme = %scheme.id(),
+                kind = "Estimate",
+                resolved_kind,
+                "scheme.evaluated.resolved",
+            );
+        }
+
+        Ok(Self::check_and_update_estimate_verdict(
+            best, scheme, verdict,
+        ))
+    }
+
     /// Updates `best` from a terminal estimate verdict.
     fn check_and_update_estimate_verdict(
         best: &mut Option<(&'static dyn Scheme, f64)>,
diff --git a/vortex-compressor/src/lib.rs b/vortex-compressor/src/lib.rs
@@ -82,16 +82,33 @@
 //!
 //! | Event                       | Target            | Level | Fields                                                                                          |
 //! |-----------------------------|-------------------|-------|-------------------------------------------------------------------------------------------------|
-//! | `scheme.evaluated`          | select            | trace | `scheme`, `kind`, `ratio` (Option)                                                              |
+//! | `scheme.evaluated`          | select            | trace | `scheme`, `kind`, `ratio` (only when `kind = "Ratio"`)                                          |
 //! | `scheme.evaluated.resolved` | select            | trace | `scheme`, `kind`, `resolved_kind`?, `ratio`?                                                    |
-//! | `scheme.winner`             | select            | debug | `scheme`, `estimated_ratio`, `candidate_count`                                                  |
-//! | `scheme.compress_result`    | encode            | debug | `scheme`, `before_nbytes`, `after_nbytes`, `estimated_ratio`, `actual_ratio`, `accepted`        |
+//! | `scheme.winner`             | select            | debug | `scheme`, `candidate_count`, and either `estimated_ratio` or `always_use = true`                |
+//! | `scheme.compress_result`    | encode            | debug | `scheme`, `before_nbytes`, `after_nbytes`, `actual_ratio`, `accepted`, and either `estimated_ratio` or `always_use = true` |
 //! | `sample.collected`          | estimate          | trace | `scheme`, `sample_count`, `sample_size`, `sampled_len`, `source_len`                            |
 //! | `sample.result`             | estimate          | debug | `scheme`, `sampled_before`, `sampled_after`, `sampled_ratio`                                    |
-//! | `short_circuit`             | select / cascade  | debug | `reason` (`cascade_exhausted` \| `no_schemes` \| `empty` \| `all_null` \| `fell_through` \| `larger_output`), scheme?/parent? |
-//!
-//! An `estimated_ratio` of [`f64::INFINITY`] indicates a scheme that returned
-//! [`CompressionEstimate::AlwaysUse`](estimate::CompressionEstimate::AlwaysUse).
+//! | `short_circuit`             | select / cascade  | debug | `reason` plus reason-specific fields (see below)                                                |
+//!
+//! ### `short_circuit` reasons and fields
+//!
+//! The `short_circuit` event reports six distinct reasons, each carrying a different set of
+//! fields. Downstream tooling should branch on `reason` before reading the other fields.
+//!
+//! | `reason`            | Target    | Additional fields                                                                     |
+//! |---------------------|-----------|---------------------------------------------------------------------------------------|
+//! | `cascade_exhausted` | cascade   | `parent`, `child_index`                                                               |
+//! | `no_schemes`        | select    | — (no additional fields)                                                              |
+//! | `empty`             | select    | — (no additional fields)                                                              |
+//! | `all_null`          | select    | — (no additional fields)                                                              |
+//! | `fell_through`      | select    | `candidate_count`                                                                     |
+//! | `larger_output`     | select    | `scheme`, `before_nbytes`, `after_nbytes`, `actual_ratio`, plus either `estimated_ratio` or `always_use = true` |
+//!
+//! The `always_use` boolean (emitted on `scheme.winner`, `scheme.compress_result`, and the
+//! `larger_output` short-circuit) indicates the winner was a scheme that returned
+//! [`EstimateVerdict::AlwaysUse`](estimate::EstimateVerdict::AlwaysUse) and therefore
+//! did not produce a numeric estimate. It is emitted in place of `estimated_ratio` so that
+//! JSON subscribers never see a non-finite number in the `estimated_ratio` slot.
 //!
 //! Field names are considered stable and are meant to be matched directly by downstream
 //! observability tooling. This means `tracing-opentelemetry`, `tracing-perfetto`, and
diff --git a/vortex-compressor/tests/tracing.rs b/vortex-compressor/tests/tracing.rs

Original file line number	Diff line number	Diff line change
`@@ -294,10 +294,6 @@ impl Scheme for ZigZagScheme {`
`294`	`294`
`295`	`295`	`let compressed = compressor.compress_child(&encoded.into_array(), &ctx, self.id(), 0)?;`
`296`	`296`
`297`		`- // NOTE: scheme-level compression results are emitted centrally as the`
`298`		- // `scheme.compress_result` event on the `vortex_compressor::encode`
`299`		- // target. See the `Observability` section of the `vortex_compressor` crate docs.
`300`		`-`
`301`	`297`	`Ok(ZigZag::try_new(compressed)?.into_array())`
`302`	`298`	`}`
`303`	`299`	`}`