@@ -26,17 +26,20 @@ use arrow::array::{
2626use arrow:: buffer:: { Buffer , MutableBuffer , NullBuffer , ScalarBuffer } ;
2727use arrow:: datatypes:: DataType ;
2828
29- /// Optimized version of the StringBuilder in Arrow that:
30- /// 1. Precalculating the expected length of the result, avoiding reallocations.
31- /// 2. Avoids creating / incrementally creating a `NullBufferBuilder`
32- pub struct StringArrayBuilder {
29+ /// Builder used by `concat`/`concat_ws` to assemble a [`StringArray`] one row
30+ /// at a time from multiple input columns.
31+ ///
32+ /// Each row is written via repeated `write` calls, followed by a single
33+ /// `append_offset` call to commit the row. The output null buffer is supplied
34+ /// by the caller at `finish` time.
35+ pub ( crate ) struct ConcatStringBuilder {
3336 offsets_buffer : MutableBuffer ,
3437 value_buffer : MutableBuffer ,
3538 /// If true, a safety check is required during the `finish` call
3639 tainted : bool ,
3740}
3841
39- impl StringArrayBuilder {
42+ impl ConcatStringBuilder {
4043 pub fn with_capacity ( item_capacity : usize , data_capacity : usize ) -> Self {
4144 let capacity = item_capacity
4245 . checked_add ( 1 )
@@ -151,21 +154,21 @@ impl StringArrayBuilder {
151154 }
152155}
153156
154- /// Optimized version of Arrow's [`StringViewBuilder`]. Rather than adding NULLs
155- /// on a row-by-row basis, the caller should provide nulls when calling
156- /// [`finish`](Self::finish). This allows callers to compute nulls more
157- /// efficiently (e.g., via bulk bitmap operations).
157+ /// Builder used by `concat`/`concat_ws` to assemble a [`StringViewArray`] one
158+ /// row at a time from multiple input columns.
158159///
159- /// [`StringViewBuilder`]: arrow::array::StringViewBuilder
160- pub struct StringViewArrayBuilder {
160+ /// Each row is written via repeated `write` calls, followed by a single
161+ /// `append_offset` call to commit the row as a single string view. The output
162+ /// null buffer is supplied by the caller at `finish` time.
163+ pub ( crate ) struct ConcatStringViewBuilder {
161164 views : Vec < u128 > ,
162165 data : Vec < u8 > ,
163166 block : Vec < u8 > ,
164167 /// If true, a safety check is required during the `append_offset` call
165168 tainted : bool ,
166169}
167170
168- impl StringViewArrayBuilder {
171+ impl ConcatStringViewBuilder {
169172 pub fn with_capacity ( item_capacity : usize , data_capacity : usize ) -> Self {
170173 Self {
171174 views : Vec :: with_capacity ( item_capacity) ,
@@ -286,14 +289,17 @@ impl StringViewArrayBuilder {
286289 }
287290}
288291
289- pub struct LargeStringArrayBuilder {
292+ /// Builder used by `concat`/`concat_ws` to assemble a [`LargeStringArray`] one
293+ /// row at a time from multiple input columns. See [`ConcatStringBuilder`] for
294+ /// details on the row-composition contract.
295+ pub ( crate ) struct ConcatLargeStringBuilder {
290296 offsets_buffer : MutableBuffer ,
291297 value_buffer : MutableBuffer ,
292298 /// If true, a safety check is required during the `finish` call
293299 tainted : bool ,
294300}
295301
296- impl LargeStringArrayBuilder {
302+ impl ConcatLargeStringBuilder {
297303 pub fn with_capacity ( item_capacity : usize , data_capacity : usize ) -> Self {
298304 let capacity = item_capacity
299305 . checked_add ( 1 )
@@ -426,7 +432,7 @@ impl LargeStringArrayBuilder {
426432/// LLVM is apparently overly eager to inline this function into some hot loops,
427433/// which bloats them and regresses performance, so we disable inlining for now.
428434#[ inline( never) ]
429- pub fn append_view (
435+ pub ( crate ) fn append_view (
430436 views_buffer : & mut Vec < u128 > ,
431437 original_view : & u128 ,
432438 substr : & str ,
@@ -447,7 +453,7 @@ pub fn append_view(
447453}
448454
449455#[ derive( Debug ) ]
450- pub enum ColumnarValueRef < ' a > {
456+ pub ( crate ) enum ColumnarValueRef < ' a > {
451457 Scalar ( & ' a [ u8 ] ) ,
452458 NullableArray ( & ' a StringArray ) ,
453459 NonNullableArray ( & ' a StringArray ) ,
@@ -497,13 +503,13 @@ mod tests {
497503
498504 #[ test]
499505 #[ should_panic( expected = "capacity integer overflow" ) ]
500- fn test_overflow_string_array_builder ( ) {
501- let _builder = StringArrayBuilder :: with_capacity ( usize:: MAX , usize:: MAX ) ;
506+ fn test_overflow_concat_string_builder ( ) {
507+ let _builder = ConcatStringBuilder :: with_capacity ( usize:: MAX , usize:: MAX ) ;
502508 }
503509
504510 #[ test]
505511 #[ should_panic( expected = "capacity integer overflow" ) ]
506- fn test_overflow_large_string_array_builder ( ) {
507- let _builder = LargeStringArrayBuilder :: with_capacity ( usize:: MAX , usize:: MAX ) ;
512+ fn test_overflow_concat_large_string_builder ( ) {
513+ let _builder = ConcatLargeStringBuilder :: with_capacity ( usize:: MAX , usize:: MAX ) ;
508514 }
509515}
0 commit comments