From 2bf0f3c931efe492e761ae2d8de06a977c9ca5ee Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 26 Mar 2026 13:00:53 -0400 Subject: [PATCH 1/3] Phase 1b Signed-off-by: Nicholas Gates --- vortex-array/public-api.lock | 18 ++- vortex-array/src/array/mod.rs | 184 +++++++++++++++++++++++++++---- vortex-array/src/stats/array.rs | 7 ++ vortex-array/src/vtable/dyn_.rs | 20 +++- vortex-array/src/vtable/mod.rs | 101 +++-------------- vortex-array/src/vtable/typed.rs | 84 ++++++++++---- 6 files changed, 279 insertions(+), 135 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index dbeeba43d13..6af9fb87d60 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -20414,6 +20414,8 @@ pub fn vortex_array::stats::StatsSetRef<'_>::inherit_from(&self, stats: vortex_a pub fn vortex_array::stats::StatsSetRef<'_>::set_iter(&self, iter: vortex_array::stats::StatsSetIntoIter) +pub fn vortex_array::stats::StatsSetRef<'_>::to_array_stats(&self) -> vortex_array::stats::ArrayStats + pub fn vortex_array::stats::StatsSetRef<'_>::to_owned(&self) -> vortex_array::stats::StatsSet pub fn vortex_array::stats::StatsSetRef<'_>::with_iter core::ops::function::FnOnce(&mut dyn core::iter::traits::iterator::Iterator)>) -> R, R>(&self, f: F) -> R @@ -20686,11 +20688,25 @@ pub struct vortex_array::vtable::Array impl vortex_array::vtable::Array +pub fn vortex_array::vtable::Array::array_stats(&self) -> &vortex_array::stats::ArrayStats + +pub fn vortex_array::vtable::Array::dtype(&self) -> &vortex_array::dtype::DType + +pub fn vortex_array::vtable::Array::encoding_id(&self) -> vortex_array::vtable::ArrayId + pub fn vortex_array::vtable::Array::inner(&self) -> &::Array pub fn vortex_array::vtable::Array::into_inner(self) -> ::Array -pub fn vortex_array::vtable::Array::new(vtable: V, array: ::Array) -> Self +pub fn vortex_array::vtable::Array::is_empty(&self) -> bool + +pub fn vortex_array::vtable::Array::len(&self) -> usize + +pub fn vortex_array::vtable::Array::new(vtable: V, dtype: vortex_array::dtype::DType, len: usize, array: ::Array, stats: vortex_array::stats::ArrayStats) -> Self + +pub fn vortex_array::vtable::Array::statistics(&self) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::vtable::Array::to_array_ref(&self) -> vortex_array::ArrayRef pub fn vortex_array::vtable::Array::typed_vtable(&self) -> &V diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 97333781e65..7945701c4fc 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -403,9 +403,8 @@ mod private { /// DynArray implementation for [`Array`]. /// -/// Identity methods (as_any, to_array, vtable, encoding_id) use the `Array` wrapper. -/// All other methods delegate to the inner `V::Array`'s DynArray impl (accessed through -/// its `Deref` provided by the `vtable!` macro). +/// This is self-contained: identity methods use `Array`'s own fields (dtype, len, stats), +/// while data-access methods delegate to VTable methods on the inner `V::Array`. impl DynArray for Array { fn as_any(&self) -> &dyn Any { self @@ -420,11 +419,11 @@ impl DynArray for Array { } fn len(&self) -> usize { - V::len(&self.array) + self.len } fn dtype(&self) -> &DType { - V::dtype(&self.array) + &self.dtype } fn vtable(&self) -> &dyn DynVTable { @@ -436,56 +435,169 @@ impl DynArray for Array { } fn slice(&self, range: Range) -> VortexResult { - // Delegate to inner's DynArray impl (through V::Array's Deref to dyn DynArray). - DynArray::slice(&*self.array, range) + let start = range.start; + let stop = range.end; + + if start == 0 && stop == self.len { + return Ok(self.to_array()); + } + + vortex_ensure!( + start <= self.len, + "OutOfBounds: start {start} > length {}", + self.len + ); + vortex_ensure!( + stop <= self.len, + "OutOfBounds: stop {stop} > length {}", + self.len + ); + + vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})"); + + if start == stop { + return Ok(Canonical::empty(&self.dtype).into_array()); + } + + let sliced = SliceArray::try_new(self.to_array(), range)? + .into_array() + .optimize()?; + + // Propagate some stats from the original array to the sliced array. + if !sliced.is::() { + self.statistics().with_iter(|iter| { + sliced.statistics().inherit(iter.filter(|(stat, value)| { + matches!( + stat, + Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted + ) && value.as_ref().as_exact().is_some_and(|v| { + Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone())) + .vortex_expect("A stat that was expected to be a boolean stat was not") + .as_bool() + .value() + .unwrap_or_default() + }) + })); + }); + } + + Ok(sliced) } fn filter(&self, mask: Mask) -> VortexResult { - DynArray::filter(&*self.array, mask) + FilterArray::try_new(self.to_array(), mask)? + .into_array() + .optimize() } fn take(&self, indices: ArrayRef) -> VortexResult { - DynArray::take(&*self.array, indices) + DictArray::try_new(indices, self.to_array())? + .into_array() + .optimize() } fn scalar_at(&self, index: usize) -> VortexResult { - DynArray::scalar_at(&*self.array, index) + vortex_ensure!(index < self.len, OutOfBounds: index, 0, self.len); + if self.is_invalid(index)? { + return Ok(Scalar::null(self.dtype.clone())); + } + let scalar = >::scalar_at(&self.array, index)?; + vortex_ensure!(&self.dtype == scalar.dtype(), "Scalar dtype mismatch"); + Ok(scalar) } fn is_valid(&self, index: usize) -> VortexResult { - DynArray::is_valid(&*self.array, index) + vortex_ensure!(index < self.len, OutOfBounds: index, 0, self.len); + match self.validity()? { + Validity::NonNullable | Validity::AllValid => Ok(true), + Validity::AllInvalid => Ok(false), + Validity::Array(a) => a + .scalar_at(index)? + .as_bool() + .value() + .ok_or_else(|| vortex_err!("validity value at index {} is null", index)), + } } fn is_invalid(&self, index: usize) -> VortexResult { - DynArray::is_invalid(&*self.array, index) + Ok(!self.is_valid(index)?) } fn all_valid(&self) -> VortexResult { - DynArray::all_valid(&*self.array) + match self.validity()? { + Validity::NonNullable | Validity::AllValid => Ok(true), + Validity::AllInvalid => Ok(false), + Validity::Array(a) => Ok(a.statistics().compute_min::().unwrap_or(false)), + } } fn all_invalid(&self) -> VortexResult { - DynArray::all_invalid(&*self.array) + match self.validity()? { + Validity::NonNullable | Validity::AllValid => Ok(false), + Validity::AllInvalid => Ok(true), + Validity::Array(a) => Ok(!a.statistics().compute_max::().unwrap_or(true)), + } } fn valid_count(&self) -> VortexResult { - DynArray::valid_count(&*self.array) + if let Some(Precision::Exact(invalid_count)) = + self.statistics().get_as::(Stat::NullCount) + { + return Ok(self.len - invalid_count); + } + + let count = match self.validity()? { + Validity::NonNullable | Validity::AllValid => self.len, + Validity::AllInvalid => 0, + Validity::Array(a) => { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let array_sum = sum(&a, &mut ctx)?; + array_sum + .as_primitive() + .as_::() + .ok_or_else(|| vortex_err!("sum of validity array is null"))? + } + }; + vortex_ensure!(count <= self.len, "Valid count exceeds array length"); + + self.statistics() + .set(Stat::NullCount, Precision::exact(self.len - count)); + + Ok(count) } fn invalid_count(&self) -> VortexResult { - DynArray::invalid_count(&*self.array) + Ok(self.len - self.valid_count()?) } fn validity(&self) -> VortexResult { - DynArray::validity(&*self.array) + if self.dtype.is_nullable() { + let validity = >::validity(&self.array)?; + if let Validity::Array(array) = &validity { + vortex_ensure!(array.len() == self.len, "Validity array length mismatch"); + vortex_ensure!( + matches!(array.dtype(), DType::Bool(Nullability::NonNullable)), + "Validity array is not non-nullable boolean: {}", + self.typed_vtable().id(), + ); + } + Ok(validity) + } else { + Ok(Validity::NonNullable) + } } fn validity_mask(&self) -> VortexResult { - DynArray::validity_mask(&*self.array) + match self.validity()? { + Validity::NonNullable | Validity::AllValid => Ok(Mask::new_true(self.len)), + Validity::AllInvalid => Ok(Mask::new_false(self.len)), + Validity::Array(a) => Ok(a.to_bool().to_mask()), + } } fn to_canonical(&self) -> VortexResult { - DynArray::to_canonical(&*self.array) + self.to_array() + .execute(&mut LEGACY_SESSION.create_execution_ctx()) } fn append_to_builder( @@ -493,23 +605,47 @@ impl DynArray for Array { builder: &mut dyn ArrayBuilder, ctx: &mut ExecutionCtx, ) -> VortexResult<()> { - DynArray::append_to_builder(&*self.array, builder, ctx) + if builder.dtype() != &self.dtype { + vortex_panic!( + "Builder dtype mismatch: expected {}, got {}", + self.dtype, + builder.dtype(), + ); + } + let len = builder.len(); + + V::append_to_builder(&self.array, builder, ctx)?; + + assert_eq!( + len + self.len, + builder.len(), + "Builder length mismatch after writing array for encoding {}", + self.typed_vtable().id(), + ); + Ok(()) } fn statistics(&self) -> StatsSetRef<'_> { - V::stats(&self.array) + self.stats.to_ref(self) } fn with_children(&self, children: Vec) -> VortexResult { let mut inner = self.array.clone(); V::with_children(&mut inner, children)?; - Ok(Array::new(self.typed_vtable().clone(), inner).into_array()) + Ok(Array::new( + self.typed_vtable().clone(), + self.dtype.clone(), + self.len, + inner, + self.stats.clone(), + ) + .into_array()) } } impl ArrayHash for Array { fn array_hash(&self, state: &mut H, precision: hash::Precision) { - self.encoding_id().hash(state); + self.typed_vtable().id().hash(state); V::array_hash(&self.array, state, precision); } } @@ -604,7 +740,7 @@ impl ReduceNode for Array { } fn node_dtype(&self) -> VortexResult { - Ok(V::dtype(&self.array).clone()) + Ok(self.dtype.clone()) } fn scalar_fn(&self) -> Option<&ScalarFnRef> { diff --git a/vortex-array/src/stats/array.rs b/vortex-array/src/stats/array.rs index 863d6a94531..bdc8b33cf7c 100644 --- a/vortex-array/src/stats/array.rs +++ b/vortex-array/src/stats/array.rs @@ -132,6 +132,13 @@ impl StatsSetRef<'_> { self.array_stats.inner.read().clone() } + /// Returns a clone of the underlying [`ArrayStats`]. + /// + /// Since [`ArrayStats`] uses `Arc` internally, this is a cheap reference-count increment. + pub fn to_array_stats(&self) -> ArrayStats { + self.array_stats.clone() + } + pub fn with_iter< F: for<'a> FnOnce(&mut dyn Iterator)>) -> R, R, diff --git a/vortex-array/src/vtable/dyn_.rs b/vortex-array/src/vtable/dyn_.rs index 0d9bfc2e699..be45ef3c70f 100644 --- a/vortex-array/src/vtable/dyn_.rs +++ b/vortex-array/src/vtable/dyn_.rs @@ -21,6 +21,7 @@ use crate::buffer::BufferHandle; use crate::dtype::DType; use crate::executor::ExecutionCtx; use crate::serde::ArrayChildren; +use crate::stats::ArrayStats; use crate::vtable::Array; use crate::vtable::VTable; @@ -92,10 +93,21 @@ impl DynVTable for V { ) -> VortexResult { let metadata = V::deserialize(metadata, dtype, len, buffers, session)?; let inner = V::build(dtype, len, &metadata, buffers, children)?; - // Wrap in Array for safe downcasting (new path). - let array = Array::new(self.clone(), inner); - assert_eq!(array.len(), len, "Array length mismatch after building"); - assert_eq!(array.dtype(), dtype, "Array dtype mismatch after building"); + // Validate the inner array's properties before wrapping. + assert_eq!(V::len(&inner), len, "Array length mismatch after building"); + assert_eq!( + V::dtype(&inner), + dtype, + "Array dtype mismatch after building" + ); + // Wrap in Array for safe downcasting. + let array = Array::new( + self.clone(), + dtype.clone(), + len, + inner, + ArrayStats::default(), + ); Ok(array.into_array()) } diff --git a/vortex-array/src/vtable/mod.rs b/vortex-array/src/vtable/mod.rs index 6e8f1235800..6a13f8fc372 100644 --- a/vortex-array/src/vtable/mod.rs +++ b/vortex-array/src/vtable/mod.rs @@ -109,9 +109,6 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { fn child_name(array: &Self::Array, idx: usize) -> String; /// Exports metadata for an array. - /// - /// * If the array does not contain metadata, it should return - /// [`crate::metadata::EmptyMetadata`]. fn metadata(array: &Self::Array) -> VortexResult; /// Serialize metadata into a byte buffer for IPC or file storage. @@ -119,10 +116,6 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { fn serialize(metadata: Self::Metadata) -> VortexResult>>; /// Deserialize array metadata from a byte buffer. - /// - /// To reduce the serialized form, arrays do not store their own DType and length. Instead, - /// this is passed down from the parent array during deserialization. These properties are - /// exposed here for use during deserialization. fn deserialize( bytes: &[u8], _dtype: &DType, @@ -132,9 +125,6 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { ) -> VortexResult; /// Writes the array into a canonical builder. - /// - /// ## Post-conditions - /// - The length of the builder is incremented by the length of the input array. fn append_to_builder( array: &Self::Array, builder: &mut dyn ArrayBuilder, @@ -146,37 +136,6 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { } /// Build an array from components. - /// - /// This is called on the file and IPC deserialization pathways, to reconstruct the array from - /// type-erased components. - /// - /// Encoding implementers should take note that all validation necessary to ensure the encoding - /// is safe to read should happen inside of this method. - /// - /// # Safety and correctness - /// - /// This method should *never* panic, it must always return an error or else it returns a - /// valid `Array` that meets all the encoding's preconditions. - /// - /// For example, the `build` implementation for a dictionary encoding should ensure that all - /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they - /// are all valid string data bytes. Any corrupt files or malformed data buffers should be - /// caught here, before returning the deserialized array. - /// - /// # Validation - /// - /// Validation is mainly meant to ensure that all internal pointers in the encoding reference - /// valid ranges of data, and that all data conforms to its DType constraints. These ensure - /// that no array operations will panic at runtime, or yield undefined behavior when unsafe - /// operations like `get_unchecked` use indices in the array buffer. - /// - /// Examples of the kinds of validation that should be part of the `build` step: - /// - /// * Checking that any offsets buffers point to valid offsets in some other child array - /// * Checking that any buffers for data or validity have the appropriate size for the - /// encoding - /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data - // TODO(ngates): take the parts by ownership, since most arrays need them anyway fn build( dtype: &DType, len: usize, @@ -185,35 +144,13 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { children: &dyn ArrayChildren, ) -> VortexResult; - /// Replaces the children in `array` with `children`. The count must be the same and types - /// of children must be expected. + /// Replaces the children in `array` with `children`. fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()>; - /// Execute this array by returning an [`ExecutionResult`] that tells the scheduler what to - /// do next. - /// - /// Instead of recursively executing children, implementations should return - /// [`ExecutionResult::execute_child`] to request that the scheduler execute a child first, - /// or [`ExecutionResult::done`] when the encoding can produce a result directly. - /// - /// Array execution is designed such that repeated execution of an array will eventually - /// converge to a canonical representation. Implementations of this function should therefore - /// ensure they make progress towards that goal. - /// - /// The returned array (in `Done`) must be logically equivalent to the input array. In other - /// words, the recursively canonicalized forms of both arrays must be equal. - /// - /// Debug builds will panic if the returned array is of the wrong type, wrong length, or - /// incorrectly contains null values. + /// Execute this array by returning an [`ExecutionResult`]. fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult; /// Attempt to execute the parent of this array. - /// - /// This function allows arrays to plug in specialized execution logic for their parent. For - /// example, strings compressed as FSST arrays can implement a custom equality comparison when - /// the comparing against a scalar string. - /// - /// Returns `Ok(None)` if no specialized execution is possible. fn execute_parent( array: &Self::Array, parent: &ArrayRef, @@ -224,20 +161,13 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { Ok(None) } - /// Attempt to reduce the array to a more simple representation. - /// - /// Returns `Ok(None)` if no reduction is possible. + /// Attempt to reduce the array to a simpler representation. fn reduce(array: &Self::Array) -> VortexResult> { _ = array; Ok(None) } /// Attempt to perform a reduction of the parent of this array. - /// - /// This function allows arrays to plug in reduction rules to their parents, for example - /// run-end arrays can pull-down scalar functions and apply them only over their values. - /// - /// Returns `Ok(None)` if no reduction is possible. fn reduce_parent( array: &Self::Array, parent: &ArrayRef, @@ -248,14 +178,13 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { } } +/// Alias for migration — downstream code can start using `ArrayVTable`. +pub use VTable as ArrayVTable; + /// Placeholder type used to indicate when a particular vtable is not supported by the encoding. pub struct NotSupported; /// Returns the validity as a child array if it produces one. -/// -/// - `NonNullable` and `AllValid` produce no child (returns `None`) -/// - `AllInvalid` produces a `ConstantArray` of `false` values -/// - `Array` returns the validity array #[inline] pub fn validity_to_child(validity: &Validity, len: usize) -> Option { match validity { @@ -281,8 +210,6 @@ pub fn patches_nchildren(patches: &Patches) -> usize { } /// Returns the child at the given index within a patches component. -/// -/// Index 0 = patch_indices, 1 = patch_values, 2 = patch_chunk_offsets (if present). #[inline] pub fn patches_child(patches: &Patches, idx: usize) -> ArrayRef { match idx { @@ -308,10 +235,10 @@ pub fn patches_child_name(idx: usize) -> &'static str { } } -/// Alias for migration — downstream code can start using `ArrayVTable` while `VTable` remains -/// the canonical name until Phase 1b renames it. -pub use VTable as ArrayVTable; - +/// vtable! macro — generates IntoArray, From, Deref, AsRef for inner array types. +/// +/// During the migration, IntoArray creates [`Array`] (the new typed wrapper) while +/// Deref/AsRef go through [`ArrayAdapter`] for backward-compatible DynArray access. #[macro_export] macro_rules! vtable { ($V:ident) => { @@ -337,8 +264,12 @@ macro_rules! vtable { impl $crate::IntoArray for [<$Base Array>] { fn into_array(self) -> $crate::ArrayRef { - // We can unsafe transmute ourselves to an ArrayAdapter. - std::sync::Arc::new(unsafe { std::mem::transmute::<[<$Base Array>], $crate::ArrayAdapter::<$VT>>(self) }) + use $crate::vtable::VTable; + let vtable = $VT::vtable(&self).clone(); + let dtype = $VT::dtype(&self).clone(); + let len = $VT::len(&self); + let stats = $VT::stats(&self).to_array_stats(); + std::sync::Arc::new($crate::vtable::Array::new(vtable, dtype, len, self, stats)) } } diff --git a/vortex-array/src/vtable/typed.rs b/vortex-array/src/vtable/typed.rs index 856e1cf1638..793963d2a0c 100644 --- a/vortex-array/src/vtable/typed.rs +++ b/vortex-array/src/vtable/typed.rs @@ -1,15 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Typed array wrapper [`Array`] that pairs a [`VTable`] instance with the inner array data. -//! -//! This is the new counterpart to [`ArrayAdapter`](crate::ArrayAdapter), introduced as part of -//! the vtable migration. Unlike ArrayAdapter (which is `#[repr(transparent)]` over `V::Array`), -//! `Array` stores the vtable instance alongside the inner data, enabling safe downcasting -//! through standard `Arc::downcast` instead of unsafe transmutes. -//! -//! During the migration, both `Array` and `ArrayAdapter` coexist behind `ArrayRef`. The -//! [`Matcher`](crate::matcher::Matcher) implementation tries both types when downcasting. +//! Typed array wrapper [`Array`] that pairs a [`VTable`] with common fields and inner data. use std::fmt::Debug; use std::fmt::Formatter; @@ -18,36 +10,76 @@ use std::sync::Arc; use crate::ArrayRef; use crate::IntoArray; +use crate::dtype::DType; +use crate::stats::ArrayStats; +use crate::stats::StatsSetRef; +use crate::vtable::ArrayId; use crate::vtable::VTable; /// A typed array, parameterized by a concrete [`VTable`]. /// -/// This struct holds the vtable instance alongside the encoding-specific data (`V::Array`). -/// It implements [`Deref`] to `V::Array`, so encoding-specific methods are callable directly -/// on `&Array`. +/// This struct holds the vtable instance, common fields (dtype, len, stats), and the +/// encoding-specific data (`V::Array`). It implements [`Deref`] to `V::Array`, so +/// encoding-specific methods are callable directly on `&Array`. /// -/// Common array properties (dtype, len, stats) are still accessed through `V` methods on -/// the inner array during the transition period. +/// Construct via encoding-specific constructors and type-erase with +/// [`into_array()`](IntoArray::into_array). pub struct Array { vtable: V, + pub(crate) dtype: DType, + pub(crate) len: usize, pub(crate) array: V::Array, + pub(crate) stats: ArrayStats, } +#[allow(clippy::same_name_method)] impl Array { - /// Create a new typed array wrapping a vtable and inner array data. - pub fn new(vtable: V, array: V::Array) -> Self { - Self { vtable, array } + /// Create a new typed array. + pub fn new(vtable: V, dtype: DType, len: usize, array: V::Array, stats: ArrayStats) -> Self { + Self { + vtable, + dtype, + len, + array, + stats, + } } /// Returns a reference to the vtable. - /// - /// Note: this is intentionally named differently from `DynArray::vtable()` to avoid - /// clippy::same_name_method. Use this for typed vtable access, or `DynArray::vtable()` - /// for the dynamic vtable reference. pub fn typed_vtable(&self) -> &V { &self.vtable } + /// Returns the dtype of this array. + pub fn dtype(&self) -> &DType { + &self.dtype + } + + /// Returns the length of this array. + pub fn len(&self) -> usize { + self.len + } + + /// Returns whether this array is empty. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the encoding ID of this array. + pub fn encoding_id(&self) -> ArrayId { + self.vtable.id() + } + + /// Returns the statistics for this array. + pub fn statistics(&self) -> StatsSetRef<'_> { + self.stats.to_ref(self) + } + + /// Returns a reference to the underlying [`ArrayStats`]. + pub fn array_stats(&self) -> &ArrayStats { + &self.stats + } + /// Returns a reference to the inner encoding-specific array data. pub fn inner(&self) -> &V::Array { &self.array @@ -57,6 +89,11 @@ impl Array { pub fn into_inner(self) -> V::Array { self.array } + + /// Returns a cloned [`ArrayRef`] for this array. + pub fn to_array_ref(&self) -> ArrayRef { + Arc::new(self.clone()) + } } impl Deref for Array { @@ -71,7 +108,10 @@ impl Clone for Array { fn clone(&self) -> Self { Self { vtable: self.vtable.clone(), + dtype: self.dtype.clone(), + len: self.len, array: self.array.clone(), + stats: self.stats.clone(), } } } @@ -80,6 +120,8 @@ impl Debug for Array { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("Array") .field("encoding", &self.vtable.id()) + .field("dtype", &self.dtype) + .field("len", &self.len) .field("inner", &self.array) .finish() } From f477c962fb41d154e544e55eb428e24f0a5982b6 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 26 Mar 2026 13:24:34 -0400 Subject: [PATCH 2/3] Phase 1b Signed-off-by: Nicholas Gates --- vortex-array/src/array/mod.rs | 17 ++++++++++------- vortex-array/src/vtable/dyn_.rs | 17 ++++++++++------- vortex-array/src/vtable/mod.rs | 7 +++++-- vortex-array/src/vtable/typed.rs | 14 ++++++++++++-- vortex-python/src/arrays/native.rs | 11 +++++++---- 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 7945701c4fc..94cd6f1d628 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -632,13 +632,16 @@ impl DynArray for Array { fn with_children(&self, children: Vec) -> VortexResult { let mut inner = self.array.clone(); V::with_children(&mut inner, children)?; - Ok(Array::new( - self.typed_vtable().clone(), - self.dtype.clone(), - self.len, - inner, - self.stats.clone(), - ) + // SAFETY: with_children preserves dtype and len. + Ok(unsafe { + Array::new_unchecked( + self.typed_vtable().clone(), + self.dtype.clone(), + self.len, + inner, + self.stats.clone(), + ) + } .into_array()) } } diff --git a/vortex-array/src/vtable/dyn_.rs b/vortex-array/src/vtable/dyn_.rs index be45ef3c70f..c0b2954fbe1 100644 --- a/vortex-array/src/vtable/dyn_.rs +++ b/vortex-array/src/vtable/dyn_.rs @@ -101,13 +101,16 @@ impl DynVTable for V { "Array dtype mismatch after building" ); // Wrap in Array for safe downcasting. - let array = Array::new( - self.clone(), - dtype.clone(), - len, - inner, - ArrayStats::default(), - ); + // SAFETY: We just validated that V::len(&inner) == len and V::dtype(&inner) == dtype. + let array = unsafe { + Array::new_unchecked( + self.clone(), + dtype.clone(), + len, + inner, + ArrayStats::default(), + ) + }; Ok(array.into_array()) } diff --git a/vortex-array/src/vtable/mod.rs b/vortex-array/src/vtable/mod.rs index 6a13f8fc372..dd6b5347932 100644 --- a/vortex-array/src/vtable/mod.rs +++ b/vortex-array/src/vtable/mod.rs @@ -238,7 +238,7 @@ pub fn patches_child_name(idx: usize) -> &'static str { /// vtable! macro — generates IntoArray, From, Deref, AsRef for inner array types. /// /// During the migration, IntoArray creates [`Array`] (the new typed wrapper) while -/// Deref/AsRef go through [`ArrayAdapter`] for backward-compatible DynArray access. +/// Deref/AsRef go through AlsoArrayAdapter for backward-compatible DynArray access. #[macro_export] macro_rules! vtable { ($V:ident) => { @@ -269,7 +269,10 @@ macro_rules! vtable { let dtype = $VT::dtype(&self).clone(); let len = $VT::len(&self); let stats = $VT::stats(&self).to_array_stats(); - std::sync::Arc::new($crate::vtable::Array::new(vtable, dtype, len, self, stats)) + // SAFETY: dtype and len are extracted from `self` via VTable methods. + std::sync::Arc::new(unsafe { + $crate::vtable::Array::new_unchecked(vtable, dtype, len, self, stats) + }) } } diff --git a/vortex-array/src/vtable/typed.rs b/vortex-array/src/vtable/typed.rs index 793963d2a0c..8bfc23aa835 100644 --- a/vortex-array/src/vtable/typed.rs +++ b/vortex-array/src/vtable/typed.rs @@ -34,8 +34,18 @@ pub struct Array { #[allow(clippy::same_name_method)] impl Array { - /// Create a new typed array. - pub fn new(vtable: V, dtype: DType, len: usize, array: V::Array, stats: ArrayStats) -> Self { + /// Create a new typed array without validating that the inner array's dtype/len match. + /// + /// # Safety + /// + /// The caller must ensure that `V::dtype(&array) == &dtype` and `V::len(&array) == len`. + pub unsafe fn new_unchecked( + vtable: V, + dtype: DType, + len: usize, + array: V::Array, + stats: ArrayStats, + ) -> Self { Self { vtable, dtype, diff --git a/vortex-python/src/arrays/native.rs b/vortex-python/src/arrays/native.rs index e7a1d86c382..34e7cae3594 100644 --- a/vortex-python/src/arrays/native.rs +++ b/vortex-python/src/arrays/native.rs @@ -21,6 +21,7 @@ use vortex::array::arrays::Primitive; use vortex::array::arrays::Struct; use vortex::array::arrays::VarBin; use vortex::array::arrays::VarBinView; +use vortex::array::vtable::Array; use vortex::array::vtable::VTable; use vortex::encodings::alp::ALP; use vortex::encodings::alp::ALPRD; @@ -251,10 +252,12 @@ pub trait AsArrayRef { impl AsArrayRef<::Array> for PyRef<'_, V> { fn as_array_ref(&self) -> &::Array { - self.as_super() - .inner() - .as_any() - .downcast_ref::>() + let any = self.as_super().inner().as_any(); + // Try new Array path first, then fall back to legacy ArrayAdapter. + if let Some(typed) = any.downcast_ref::>() { + return typed.inner(); + } + any.downcast_ref::>() .vortex_expect("Failed to downcast array") .as_inner() } From ed6138c25d98b10930a816d4d16d803c3f593455 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 26 Mar 2026 14:14:57 -0400 Subject: [PATCH 3/3] Phase 1b Signed-off-by: Nicholas Gates --- vortex-array/public-api.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 6af9fb87d60..88bdca80481 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -20702,7 +20702,7 @@ pub fn vortex_array::vtable::Array::is_empty(&self) -> bool pub fn vortex_array::vtable::Array::len(&self) -> usize -pub fn vortex_array::vtable::Array::new(vtable: V, dtype: vortex_array::dtype::DType, len: usize, array: ::Array, stats: vortex_array::stats::ArrayStats) -> Self +pub unsafe fn vortex_array::vtable::Array::new_unchecked(vtable: V, dtype: vortex_array::dtype::DType, len: usize, array: ::Array, stats: vortex_array::stats::ArrayStats) -> Self pub fn vortex_array::vtable::Array::statistics(&self) -> vortex_array::stats::StatsSetRef<'_>