Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion benchmarks/compress-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use regex::Regex;
use vortex::utils::aliases::hash_map::HashMap;
use vortex_bench::Engine;
use vortex_bench::Format;
use vortex_bench::LogFormat;
use vortex_bench::Target;
use vortex_bench::compress::CompressMeasurements;
use vortex_bench::compress::CompressOp;
Expand Down Expand Up @@ -69,13 +70,19 @@ struct Args {
output_path: Option<PathBuf>,
#[arg(long)]
tracing: bool,
/// Format for the primary stderr log sink. `text` is the default human-readable format;
/// `json` emits one JSON object per event, suitable for piping into `jq`.
///
/// See [`LogFormat`] for the full details.
#[arg(long, value_enum, default_value_t = LogFormat::Text)]
log_format: LogFormat,
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
let args = Args::parse();

setup_logging_and_tracing(args.verbose, args.tracing)?;
setup_logging_and_tracing(args.verbose, args.tracing, args.log_format)?;

run_compress(
args.iterations,
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/datafusion-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use vortex_bench::BenchmarkArg;
use vortex_bench::CompactionStrategy;
use vortex_bench::Engine;
use vortex_bench::Format;
use vortex_bench::LogFormat;
use vortex_bench::Opt;
use vortex_bench::Opts;
use vortex_bench::SESSION;
Expand Down Expand Up @@ -110,7 +111,7 @@ async fn main() -> anyhow::Result<()> {
let opts = Opts::from(args.options);

set_join_set_tracer(get_static_tracer())?;
setup_logging_and_tracing(args.verbose, args.tracing)?;
setup_logging_and_tracing(args.verbose, args.tracing, LogFormat::Text)?;

let benchmark = create_benchmark(args.benchmark, &opts)?;

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/duckdb-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use vortex_bench::BenchmarkArg;
use vortex_bench::CompactionStrategy;
use vortex_bench::Engine;
use vortex_bench::Format;
use vortex_bench::LogFormat;
use vortex_bench::Opt;
use vortex_bench::Opts;
use vortex_bench::conversions::convert_parquet_directory_to_vortex;
Expand Down Expand Up @@ -87,7 +88,7 @@ fn main() -> anyhow::Result<()> {
let args = Args::parse();
let opts = Opts::from(args.options);

setup_logging_and_tracing(args.verbose, args.tracing)?;
setup_logging_and_tracing(args.verbose, args.tracing, LogFormat::Text)?;

let benchmark = create_benchmark(args.benchmark, &opts)?;

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/lance-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use vortex_bench::Benchmark;
use vortex_bench::BenchmarkArg;
use vortex_bench::Engine;
use vortex_bench::Format;
use vortex_bench::LogFormat;
use vortex_bench::Opt;
use vortex_bench::Opts;
use vortex_bench::create_benchmark;
Expand Down Expand Up @@ -74,7 +75,7 @@ async fn main() -> anyhow::Result<()> {
let args = Args::parse();
let opts = Opts::from(args.options);

setup_logging_and_tracing(args.verbose, args.tracing)?;
setup_logging_and_tracing(args.verbose, args.tracing, LogFormat::Text)?;

let benchmark = create_benchmark(args.benchmark, &opts)?;

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/random-access-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use rand_distr::Distribution;
use rand_distr::Exp;
use vortex_bench::Engine;
use vortex_bench::Format;
use vortex_bench::LogFormat;
use vortex_bench::Target;
use vortex_bench::create_output_writer;
use vortex_bench::datasets::feature_vectors::FeatureVectorsData;
Expand Down Expand Up @@ -190,7 +191,7 @@ struct Args {
async fn main() -> Result<()> {
let args = Args::parse();

setup_logging_and_tracing(args.verbose, args.tracing)?;
setup_logging_and_tracing(args.verbose, args.tracing, LogFormat::Text)?;

let datasets: Vec<Box<dyn BenchDataset>> = args
.datasets
Expand Down
1 change: 1 addition & 0 deletions vortex-bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ tracing = { workspace = true }
tracing-perfetto = { workspace = true }
tracing-subscriber = { workspace = true, features = [
"env-filter",
"json",
"tracing-log",
] }
url = { workspace = true }
Expand Down
9 changes: 8 additions & 1 deletion vortex-bench/src/bin/data-gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use vortex_bench::Benchmark;
use vortex_bench::BenchmarkArg;
use vortex_bench::CompactionStrategy;
use vortex_bench::Format;
use vortex_bench::LogFormat;
use vortex_bench::Opt;
use vortex_bench::Opts;
use vortex_bench::conversions::convert_parquet_directory_to_vortex;
Expand All @@ -37,6 +38,12 @@ struct Args {
#[arg(long)]
tracing: bool,

/// Format for the primary stderr log sink. `text` is the default
/// human-readable format; `json` emits one JSON object per event, suitable
/// for piping into `jq`. See [`LogFormat`] for the full details.
#[arg(long, value_enum, default_value_t = LogFormat::Text)]
log_format: LogFormat,

#[arg(long, value_delimiter = ',', value_parser = value_parser!(Format))]
formats: Vec<Format>,

Expand All @@ -49,7 +56,7 @@ async fn main() -> anyhow::Result<()> {
let args = Args::parse();
let opts = Opts::from(args.options);

setup_logging_and_tracing(args.verbose, args.tracing)?;
setup_logging_and_tracing(args.verbose, args.tracing, args.log_format)?;

let benchmark = create_benchmark(args.benchmark, &opts)?;

Expand Down
7 changes: 6 additions & 1 deletion vortex-bench/src/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ pub async fn convert_parquet_directory_to_vortex(
let parquet_file_path = parquet_path.join(format!("{filename}.parquet"));
let output_path = vortex_dir.join(format!("{filename}.{}", format.ext()));

let span = tracing::info_span!(
"compress_file",
file = %filename,
strategy = ?compaction,
);
tokio::spawn(
async move {
idempotent_async(output_path.as_path(), move |vtx_file| async move {
Expand All @@ -192,7 +197,7 @@ pub async fn convert_parquet_directory_to_vortex(
.await
.expect("Failed to write Vortex file")
}
.in_current_span(),
.instrument(span),
)
})
.buffer_unordered(concurrency)
Expand Down
90 changes: 72 additions & 18 deletions vortex-bench/src/utils/logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,88 @@
use std::fs::File;
use std::io::IsTerminal;

use clap::ValueEnum;
use tracing::level_filters::LevelFilter;
use tracing_perfetto::PerfettoLayer;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::Layer;
use tracing_subscriber::prelude::*;

/// Initialize logging/tracing for a benchmark
pub fn setup_logging_and_tracing(verbose: bool, tracing: bool) -> anyhow::Result<()> {
/// Format for the primary stderr log sink.
///
/// Selects between a human-oriented text formatter and a newline-delimited JSON
/// formatter. The two are mutually exclusive — pick whichever matches how you'll
/// consume the output. The choice is orthogonal to
/// [`setup_logging_and_tracing`]'s `tracing` flag, which controls a separate
/// Perfetto trace file and is unaffected by the format selected here.
///
/// See the crate-level documentation of `vortex-compressor` for the full
/// inventory of fields emitted under each tracing target.
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, ValueEnum)]
pub enum LogFormat {
/// Human-readable single-line records, with level, file, and line prefixes.
/// ANSI coloring is enabled automatically when stderr is a terminal. This
/// is the default and matches the historical behavior of this crate.
#[default]
Text,
/// Newline-delimited JSON — one complete JSON object per event, written to
/// stderr. Each record includes the event fields as typed values plus the
/// full span stack under the `spans` key, making it suitable for piping
/// into `jq` or ingestion into a log aggregator.
///
/// Not to be confused with the Perfetto trace format emitted when the
/// `tracing` flag is set — that is a single-document Chrome Trace Event
/// file designed to be loaded into the Perfetto UI, and cannot be
/// meaningfully filtered line-by-line.
Json,
}

/// Initialize logging/tracing for a benchmark.
///
/// - `verbose`: when `RUST_LOG` is unset, raises the default filter from `INFO`
/// to `TRACE`. Has no effect when `RUST_LOG` is set (the env var wins).
/// - `perfetto`: when `true`, additionally attaches a
/// [`tracing_perfetto::PerfettoLayer`] that writes span begin/end events to
/// `trace.json` in the current directory. Intended to be loaded into the
/// Perfetto UI for flamegraph visualization.
/// - `format`: controls the primary stderr sink's formatting. See [`LogFormat`].
pub fn setup_logging_and_tracing(
verbose: bool,
perfetto: bool,
format: LogFormat,
) -> anyhow::Result<()> {
let filter = default_env_filter(verbose);

let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(std::io::stderr)
.with_level(true)
.with_file(true)
.with_line_number(true)
.with_ansi(std::io::stderr().is_terminal());
let perfetto_layer = perfetto
.then(|| {
Ok::<_, anyhow::Error>(
PerfettoLayer::new(File::create("trace.json")?).with_debug_annotations(true),
)
})
.transpose()?;

// `fmt::layer()` and `fmt::layer().json()` produce different concrete
// types, so we erase each to a `dyn Layer` via `.boxed()` and keep the
// registry chain uniform.
let fmt_layer: Box<dyn Layer<_> + Send + Sync> = match format {
LogFormat::Text => tracing_subscriber::fmt::layer()
.with_writer(std::io::stderr)
.with_level(true)
.with_file(true)
.with_line_number(true)
.with_ansi(std::io::stderr().is_terminal())
.boxed(),
LogFormat::Json => tracing_subscriber::fmt::layer()
.json()
.with_writer(std::io::stderr)
.with_current_span(true)
.with_span_list(true)
.boxed(),
};

tracing_subscriber::registry()
.with(filter)
.with(
tracing
.then(|| {
Ok::<_, anyhow::Error>(
PerfettoLayer::new(File::create("trace.json")?)
.with_debug_annotations(true),
)
})
.transpose()?,
)
.with(perfetto_layer)
.with(fmt_layer)
.init();

Expand Down
4 changes: 4 additions & 0 deletions vortex-btrblocks/src/canonical_compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ pub struct BtrBlocksCompressor(

impl BtrBlocksCompressor {
/// Compresses an array using BtrBlocks-inspired compression.
///
/// This is a thin delegate to [`CascadingCompressor::compress`], which owns the tracing
/// instrumentation. See the `Observability` section of the [`vortex_compressor`] crate
/// docs for the full tracing reference.
pub fn compress(&self, array: &ArrayRef) -> VortexResult<ArrayRef> {
self.0.compress(array)
}
Expand Down
13 changes: 13 additions & 0 deletions vortex-btrblocks/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@
//! .build();
//! ```
//!
//! # Observability
//!
//! [`BtrBlocksCompressor`] is a thin delegate to [`CascadingCompressor`], which owns all
//! tracing instrumentation. See the
//! [`vortex_compressor` crate docs](vortex_compressor#observability) for the full reference
//! on targets, spans, and events.
//!
//! Quick start: one line per leaf with scheme, estimated ratio, actual ratio, accepted?:
//!
//! ```text
//! RUST_LOG=vortex_compressor::encode=debug cargo test -p vortex-btrblocks
//! ```
//!
//! [BtrBlocks]: https://www.cs.cit.tum.de/fileadmin/w00cfj/dis/papers/btrblocks.pdf

mod builder;
Expand Down
2 changes: 0 additions & 2 deletions vortex-btrblocks/src/schemes/integer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,6 @@ impl Scheme for ZigZagScheme {

let compressed = compressor.compress_child(&encoded.into_array(), &ctx, self.id(), 0)?;

tracing::debug!("zigzag output: {}", compressed.encoding_id());

Ok(ZigZag::try_new(compressed)?.into_array())
}
}
Expand Down
3 changes: 2 additions & 1 deletion vortex-compressor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ num-traits = { workspace = true }
parking_lot = { workspace = true }
rand = { workspace = true }
rustc-hash = { workspace = true }
tracing = { workspace = true }
tracing = { workspace = true, features = ["std", "attributes"] }
vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-error = { workspace = true }
Expand All @@ -29,6 +29,7 @@ vortex-utils = { workspace = true }
[dev-dependencies]
divan = { workspace = true }
rstest = { workspace = true }
tracing-subscriber = { workspace = true, features = ["env-filter"] }
vortex-array = { workspace = true, features = ["_test-harness"] }

[lints]
Expand Down
Loading
Loading