From cc6513fe1a7870692b2c683f15bd92a0d8e6c628 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 6 May 2026 08:23:34 -0400 Subject: [PATCH 1/6] Add tests for ordered dictionaries --- cpp/src/arrow/array/array_dict_test.cc | 77 ++++++++++++++++++++++++++ r/tests/testthat/test-Array.R | 21 +++++++ 2 files changed, 98 insertions(+) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 22d6d1fc5ae9..3bb46bdd573a 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1761,4 +1761,81 @@ TEST(TestDictionaryUnifier, TableZeroColumns) { AssertTablesEqual(*table, *unified); } +// GH-49689: Ordered dictionary tests + +TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) { + auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true); + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder)); + + auto builder_type = checked_cast(*boxed_builder->type()); + ASSERT_TRUE(builder_type.ordered()); +} + +TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) { + auto unordered_type = dictionary(int8(), utf8(), /*ordered=*/false); + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type, &boxed_builder)); + + auto builder_type = checked_cast(*boxed_builder->type()); + ASSERT_FALSE(builder_type.ordered()); +} + +TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) { + auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true); + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder)); + auto& builder = checked_cast&>(*boxed_builder); + + ASSERT_OK(builder.Append("a")); + ASSERT_OK(builder.Append("b")); + ASSERT_OK(builder.Append("a")); + + std::shared_ptr result; + ASSERT_OK(builder.Finish(&result)); + + auto result_type = checked_cast(*result->type()); + ASSERT_TRUE(result_type.ordered()); + + auto ex_dict = ArrayFromJSON(utf8(), R"(["a", "b"])"); + auto ex_indices = ArrayFromJSON(int8(), "[0, 1, 0]"); + DictionaryArray expected(ordered_type, ex_indices, ex_dict); + AssertArraysEqual(expected, *result); +} + +TEST(TestDictionaryBuilderOrdered, ListOfOrderedDictionary) { + auto ordered_dict_type = dictionary(int8(), utf8(), /*ordered=*/true); + auto list_type = list(field("item", ordered_dict_type)); + + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), list_type, &boxed_builder)); + auto& list_builder = checked_cast(*boxed_builder); + auto& dict_builder = + checked_cast&>(*list_builder.value_builder()); + + ASSERT_OK(list_builder.Append()); + ASSERT_OK(dict_builder.Append("a")); + ASSERT_OK(dict_builder.Append("b")); + ASSERT_OK(list_builder.Append()); + ASSERT_OK(dict_builder.Append("a")); + + std::shared_ptr result; + ASSERT_OK(list_builder.Finish(&result)); + + auto result_list_type = checked_cast(*result->type()); + auto result_dict_type = + checked_cast(*result_list_type.value_type()); + ASSERT_TRUE(result_dict_type.ordered()); +} + +TEST(TestDictionaryBuilderOrdered, MakeDictionaryBuilderPreservesOrdered) { + auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true); + std::unique_ptr builder; + ASSERT_OK(MakeDictionaryBuilder(default_memory_pool(), ordered_type, + /*dictionary=*/nullptr, &builder)); + + auto builder_type = checked_cast(*builder->type()); + ASSERT_TRUE(builder_type.ordered()); +} + } // namespace arrow diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 8520160d1255..9fcc3e6b868d 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -643,6 +643,13 @@ test_that("arrow_array() handles vector -> list arrays (ARROW-7662)", { expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), list_of(dictionary(int8(), utf8()))) expect_array_roundtrip(list(factor(NA, levels = c("a", "b"))), list_of(dictionary(int8(), utf8()))) + # ordered factor (GH-49689) + expect_array_roundtrip( + list(ordered(c("b", "a"), levels = c("a", "b"))), + list_of(dictionary(int8(), utf8(), ordered = TRUE)) + ) + expect_array_roundtrip(list(ordered(NA, levels = c("a", "b"))), list_of(dictionary(int8(), utf8(), ordered = TRUE))) + # struct expect_array_roundtrip( list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))), @@ -742,6 +749,13 @@ test_that("arrow_array() handles vector -> large list arrays", { as = large_list_of(dictionary(int8(), utf8())) ) + # ordered factor (GH-49689) + expect_array_roundtrip( + list(ordered(c("b", "a"), levels = c("a", "b"))), + large_list_of(dictionary(int8(), utf8(), ordered = TRUE)), + as = large_list_of(dictionary(int8(), utf8(), ordered = TRUE)) + ) + # struct expect_array_roundtrip( list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))), @@ -809,6 +823,13 @@ test_that("arrow_array() handles vector -> fixed size list arrays", { as = fixed_size_list_of(dictionary(int8(), utf8()), 2L) ) + # ordered factor (GH-49689) + expect_array_roundtrip( + list(ordered(c("b", "a"), levels = c("a", "b"))), + fixed_size_list_of(dictionary(int8(), utf8(), ordered = TRUE), 2L), + as = fixed_size_list_of(dictionary(int8(), utf8(), ordered = TRUE), 2L) + ) + # struct expect_array_roundtrip( list(tibble::tibble(a = 1L, b = 1L, c = "", d = TRUE)), From 2205a435574a2ac47fb018ea837f88252638b08b Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 6 May 2026 08:45:03 -0400 Subject: [PATCH 2/6] Fix failing tests to fail properly --- cpp/src/arrow/array/array_dict_test.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 3bb46bdd573a..6914468c2f93 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1768,7 +1768,7 @@ TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) { std::unique_ptr boxed_builder; ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder)); - auto builder_type = checked_cast(*boxed_builder->type()); + const auto& builder_type = checked_cast(*boxed_builder->type()); ASSERT_TRUE(builder_type.ordered()); } @@ -1777,7 +1777,7 @@ TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) { std::unique_ptr boxed_builder; ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type, &boxed_builder)); - auto builder_type = checked_cast(*boxed_builder->type()); + const auto& builder_type = checked_cast(*boxed_builder->type()); ASSERT_FALSE(builder_type.ordered()); } @@ -1794,7 +1794,7 @@ TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) { std::shared_ptr result; ASSERT_OK(builder.Finish(&result)); - auto result_type = checked_cast(*result->type()); + const auto& result_type = checked_cast(*result->type()); ASSERT_TRUE(result_type.ordered()); auto ex_dict = ArrayFromJSON(utf8(), R"(["a", "b"])"); @@ -1822,8 +1822,8 @@ TEST(TestDictionaryBuilderOrdered, ListOfOrderedDictionary) { std::shared_ptr result; ASSERT_OK(list_builder.Finish(&result)); - auto result_list_type = checked_cast(*result->type()); - auto result_dict_type = + const auto& result_list_type = checked_cast(*result->type()); + const auto& result_dict_type = checked_cast(*result_list_type.value_type()); ASSERT_TRUE(result_dict_type.ordered()); } @@ -1834,7 +1834,7 @@ TEST(TestDictionaryBuilderOrdered, MakeDictionaryBuilderPreservesOrdered) { ASSERT_OK(MakeDictionaryBuilder(default_memory_pool(), ordered_type, /*dictionary=*/nullptr, &builder)); - auto builder_type = checked_cast(*builder->type()); + const auto& builder_type = checked_cast(*builder->type()); ASSERT_TRUE(builder_type.ordered()); } From f25c976ea79cc16e105978d6f8557708cf000d52 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 6 May 2026 09:41:00 -0400 Subject: [PATCH 3/6] Plumb in changes needed for ordered dictionaries --- cpp/src/arrow/array/builder_dict.h | 33 +++++++++++++++++++++--------- cpp/src/arrow/builder.cc | 27 +++++++++++++++++------- r/src/r_to_arrow.cpp | 8 -------- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h index 116c82049eea..ef67ffdccb33 100644 --- a/cpp/src/arrow/array/builder_dict.h +++ b/cpp/src/arrow/array/builder_dict.h @@ -160,7 +160,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(-1), indices_builder_(start_int_size, pool, alignment), - value_type_(value_type) {} + value_type_(value_type), + ordered_(false) {} template explicit DictionaryBuilderBase( @@ -173,7 +174,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(-1), indices_builder_(pool, alignment), - value_type_(value_type) {} + value_type_(value_type), + ordered_(false) {} template explicit DictionaryBuilderBase( @@ -187,7 +189,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(-1), indices_builder_(index_type, pool, alignment), - value_type_(value_type) {} + value_type_(value_type), + ordered_(false) {} template DictionaryBuilderBase(uint8_t start_int_size, @@ -202,7 +205,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(static_cast(*value_type).byte_width()), indices_builder_(start_int_size, pool, alignment), - value_type_(value_type) {} + value_type_(value_type), + ordered_(false) {} template explicit DictionaryBuilderBase( @@ -214,7 +218,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(static_cast(*value_type).byte_width()), indices_builder_(pool, alignment), - value_type_(value_type) {} + value_type_(value_type), + ordered_(false) {} template explicit DictionaryBuilderBase( @@ -227,7 +232,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(static_cast(*value_type).byte_width()), indices_builder_(index_type, pool, alignment), - value_type_(value_type) {} + value_type_(value_type), + ordered_(false) {} template explicit DictionaryBuilderBase( @@ -243,7 +249,8 @@ class DictionaryBuilderBase : public ArrayBuilder { delta_offset_(0), byte_width_(-1), indices_builder_(pool, alignment), - value_type_(dictionary->type()) {} + value_type_(dictionary->type()), + ordered_(false) {} ~DictionaryBuilderBase() override = default; @@ -490,9 +497,11 @@ class DictionaryBuilderBase : public ArrayBuilder { Status Finish(std::shared_ptr* out) { return FinishTyped(out); } std::shared_ptr type() const override { - return ::arrow::dictionary(indices_builder_.type(), value_type_); + return ::arrow::dictionary(indices_builder_.type(), value_type_, ordered_); } + void set_ordered(bool ordered) { ordered_ = ordered; } + protected: template Status AppendArraySliceImpl(const typename TypeTraits::ArrayType& dict, @@ -561,6 +570,7 @@ class DictionaryBuilderBase : public ArrayBuilder { BuilderType indices_builder_; std::shared_ptr value_type_; + bool ordered_; }; template @@ -647,7 +657,7 @@ class DictionaryBuilderBase : public ArrayBuilder { Status FinishInternal(std::shared_ptr* out) override { ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out)); - (*out)->type = dictionary((*out)->type, null()); + (*out)->type = dictionary((*out)->type, null(), ordered_); (*out)->dictionary = NullArray(0).data(); return Status::OK(); } @@ -659,11 +669,14 @@ class DictionaryBuilderBase : public ArrayBuilder { Status Finish(std::shared_ptr* out) { return FinishTyped(out); } std::shared_ptr type() const override { - return ::arrow::dictionary(indices_builder_.type(), null()); + return ::arrow::dictionary(indices_builder_.type(), null(), ordered_); } + void set_ordered(bool ordered) { ordered_ = ordered; } + protected: BuilderType indices_builder_; + bool ordered_ = false; }; } // namespace internal diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 1a6a718c15e1..b7704a2a59a3 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -168,17 +168,24 @@ struct DictionaryBuilderCase { template Status CreateFor() { using AdaptiveBuilderType = DictionaryBuilder; + using ExactBuilderType = + internal::DictionaryBuilderBase; if (dictionary != nullptr) { - out->reset(new AdaptiveBuilderType(dictionary, pool)); + auto builder = new AdaptiveBuilderType(dictionary, pool); + builder->set_ordered(ordered); + out->reset(builder); } else if (exact_index_type) { if (!is_integer(index_type->id())) { return Status::TypeError("MakeBuilder: invalid index type ", *index_type); } - out->reset(new internal::DictionaryBuilderBase( - index_type, value_type, pool)); + auto builder = new ExactBuilderType(index_type, value_type, pool); + builder->set_ordered(ordered); + out->reset(builder); } else { auto start_int_size = index_type->byte_width(); - out->reset(new AdaptiveBuilderType(start_int_size, value_type, pool)); + auto builder = new AdaptiveBuilderType(start_int_size, value_type, pool); + builder->set_ordered(ordered); + out->reset(builder); } return Status::OK(); } @@ -190,6 +197,7 @@ struct DictionaryBuilderCase { const std::shared_ptr& value_type; const std::shared_ptr& dictionary; bool exact_index_type; + bool ordered; std::unique_ptr* out; }; @@ -206,6 +214,7 @@ struct MakeBuilderImpl { dict_type.value_type(), /*dictionary=*/nullptr, exact_index_type, + dict_type.ordered(), &out}; return visitor.Make(); } @@ -332,9 +341,13 @@ Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr& const std::shared_ptr& dictionary, std::unique_ptr* out) { const auto& dict_type = static_cast(*type); - DictionaryBuilderCase visitor = { - pool, dict_type.index_type(), dict_type.value_type(), - dictionary, /*exact_index_type=*/false, out}; + DictionaryBuilderCase visitor = {pool, + dict_type.index_type(), + dict_type.value_type(), + dictionary, + /*exact_index_type=*/false, + dict_type.ordered(), + out}; return visitor.Make(); } diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 45d68043af5a..e521b0e8db32 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -996,14 +996,6 @@ class RDictionaryConverter> Result> ToChunkedArray() override { ARROW_ASSIGN_OR_RAISE(auto result, this->builder_->Finish()); - auto result_type = checked_cast(result->type().get()); - if (this->dict_type_->ordered() && !result_type->ordered()) { - // TODO: we should not have to do that, there is probably something wrong - // in the DictionaryBuilder code - result->data()->type = - arrow::dictionary(result_type->index_type(), result_type->value_type(), true); - } - return std::make_shared( std::make_shared(result->data())); } From 4adb92cba37ef08cfcd45c8e192080a8bfc90a21 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 6 May 2026 11:19:23 -0400 Subject: [PATCH 4/6] Fix dangling reference --- cpp/src/arrow/builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index b7704a2a59a3..04a8860eb833 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -195,7 +195,7 @@ struct DictionaryBuilderCase { MemoryPool* pool; const std::shared_ptr& index_type; const std::shared_ptr& value_type; - const std::shared_ptr& dictionary; + std::shared_ptr dictionary; bool exact_index_type; bool ordered; std::unique_ptr* out; From f3eb5a05c65fe81b138512ab40532dedd702e924 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 6 May 2026 15:05:42 -0400 Subject: [PATCH 5/6] fix tests --- cpp/src/arrow/array/array_dict_test.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 6914468c2f93..2001c6318367 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1768,8 +1768,8 @@ TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) { std::unique_ptr boxed_builder; ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder)); - const auto& builder_type = checked_cast(*boxed_builder->type()); - ASSERT_TRUE(builder_type.ordered()); + auto builder_type = boxed_builder->type(); + ASSERT_TRUE(checked_cast(*builder_type).ordered()); } TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) { @@ -1777,8 +1777,8 @@ TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) { std::unique_ptr boxed_builder; ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type, &boxed_builder)); - const auto& builder_type = checked_cast(*boxed_builder->type()); - ASSERT_FALSE(builder_type.ordered()); + auto builder_type = boxed_builder->type(); + ASSERT_FALSE(checked_cast(*builder_type).ordered()); } TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) { @@ -1834,8 +1834,8 @@ TEST(TestDictionaryBuilderOrdered, MakeDictionaryBuilderPreservesOrdered) { ASSERT_OK(MakeDictionaryBuilder(default_memory_pool(), ordered_type, /*dictionary=*/nullptr, &builder)); - const auto& builder_type = checked_cast(*builder->type()); - ASSERT_TRUE(builder_type.ordered()); + auto builder_type = builder->type(); + ASSERT_TRUE(checked_cast(*builder_type).ordered()); } } // namespace arrow From 66d7c5327367f797c75d7e3cec59a91401fdbbec Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Thu, 28 May 2026 12:51:12 +0100 Subject: [PATCH 6/6] Changes from PR feedback --- cpp/src/arrow/array/array_dict_test.cc | 117 +++++++++++++------------ cpp/src/arrow/array/builder_dict.h | 81 ++++++++++------- cpp/src/arrow/builder.cc | 15 ++-- 3 files changed, 116 insertions(+), 97 deletions(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 2001c6318367..5b1af6f8cb3f 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1764,78 +1764,81 @@ TEST(TestDictionaryUnifier, TableZeroColumns) { // GH-49689: Ordered dictionary tests TEST(TestDictionaryBuilderOrdered, TypePreservesOrderedFlag) { - auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true); - std::unique_ptr boxed_builder; - ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder)); - - auto builder_type = boxed_builder->type(); - ASSERT_TRUE(checked_cast(*builder_type).ordered()); -} - -TEST(TestDictionaryBuilderOrdered, UnorderedTypeStaysUnordered) { - auto unordered_type = dictionary(int8(), utf8(), /*ordered=*/false); - std::unique_ptr boxed_builder; - ASSERT_OK(MakeBuilder(default_memory_pool(), unordered_type, &boxed_builder)); - - auto builder_type = boxed_builder->type(); - ASSERT_FALSE(checked_cast(*builder_type).ordered()); + for (bool ordered : {true, false}) { + ARROW_SCOPED_TRACE("ordered = ", ordered); + auto dict_type = dictionary(int8(), utf8(), ordered); + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), dict_type, &boxed_builder)); + + auto builder_type = boxed_builder->type(); + ASSERT_EQ(checked_cast(*builder_type).ordered(), ordered); + } } TEST(TestDictionaryBuilderOrdered, FinishPreservesOrderedFlag) { - auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true); - std::unique_ptr boxed_builder; - ASSERT_OK(MakeBuilder(default_memory_pool(), ordered_type, &boxed_builder)); - auto& builder = checked_cast&>(*boxed_builder); + for (bool ordered : {true, false}) { + ARROW_SCOPED_TRACE("ordered = ", ordered); + auto dict_type = dictionary(int8(), utf8(), ordered); + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), dict_type, &boxed_builder)); + auto& builder = checked_cast&>(*boxed_builder); - ASSERT_OK(builder.Append("a")); - ASSERT_OK(builder.Append("b")); - ASSERT_OK(builder.Append("a")); + ASSERT_OK(builder.Append("a")); + ASSERT_OK(builder.Append("b")); + ASSERT_OK(builder.Append("a")); - std::shared_ptr result; - ASSERT_OK(builder.Finish(&result)); + std::shared_ptr result; + ASSERT_OK(builder.Finish(&result)); - const auto& result_type = checked_cast(*result->type()); - ASSERT_TRUE(result_type.ordered()); + const auto& result_type = checked_cast(*result->type()); + ASSERT_EQ(result_type.ordered(), ordered); - auto ex_dict = ArrayFromJSON(utf8(), R"(["a", "b"])"); - auto ex_indices = ArrayFromJSON(int8(), "[0, 1, 0]"); - DictionaryArray expected(ordered_type, ex_indices, ex_dict); - AssertArraysEqual(expected, *result); + auto ex_dict = ArrayFromJSON(utf8(), R"(["a", "b"])"); + auto ex_indices = ArrayFromJSON(int8(), "[0, 1, 0]"); + DictionaryArray expected(dict_type, ex_indices, ex_dict); + AssertArraysEqual(expected, *result); + } } TEST(TestDictionaryBuilderOrdered, ListOfOrderedDictionary) { - auto ordered_dict_type = dictionary(int8(), utf8(), /*ordered=*/true); - auto list_type = list(field("item", ordered_dict_type)); - - std::unique_ptr boxed_builder; - ASSERT_OK(MakeBuilder(default_memory_pool(), list_type, &boxed_builder)); - auto& list_builder = checked_cast(*boxed_builder); - auto& dict_builder = - checked_cast&>(*list_builder.value_builder()); - - ASSERT_OK(list_builder.Append()); - ASSERT_OK(dict_builder.Append("a")); - ASSERT_OK(dict_builder.Append("b")); - ASSERT_OK(list_builder.Append()); - ASSERT_OK(dict_builder.Append("a")); + for (bool ordered : {true, false}) { + ARROW_SCOPED_TRACE("ordered = ", ordered); + auto dict_type = dictionary(int8(), utf8(), ordered); + auto list_type = list(field("item", dict_type)); + + std::unique_ptr boxed_builder; + ASSERT_OK(MakeBuilder(default_memory_pool(), list_type, &boxed_builder)); + auto& list_builder = checked_cast(*boxed_builder); + auto& dict_builder = + checked_cast&>(*list_builder.value_builder()); + + ASSERT_OK(list_builder.Append()); + ASSERT_OK(dict_builder.Append("a")); + ASSERT_OK(dict_builder.Append("b")); + ASSERT_OK(list_builder.Append()); + ASSERT_OK(dict_builder.Append("a")); - std::shared_ptr result; - ASSERT_OK(list_builder.Finish(&result)); + std::shared_ptr result; + ASSERT_OK(list_builder.Finish(&result)); - const auto& result_list_type = checked_cast(*result->type()); - const auto& result_dict_type = - checked_cast(*result_list_type.value_type()); - ASSERT_TRUE(result_dict_type.ordered()); + const auto& result_list_type = checked_cast(*result->type()); + const auto& result_dict_type = + checked_cast(*result_list_type.value_type()); + ASSERT_EQ(result_dict_type.ordered(), ordered); + } } TEST(TestDictionaryBuilderOrdered, MakeDictionaryBuilderPreservesOrdered) { - auto ordered_type = dictionary(int8(), utf8(), /*ordered=*/true); - std::unique_ptr builder; - ASSERT_OK(MakeDictionaryBuilder(default_memory_pool(), ordered_type, - /*dictionary=*/nullptr, &builder)); - - auto builder_type = builder->type(); - ASSERT_TRUE(checked_cast(*builder_type).ordered()); + for (bool ordered : {true, false}) { + ARROW_SCOPED_TRACE("ordered = ", ordered); + auto dict_type = dictionary(int8(), utf8(), ordered); + std::unique_ptr builder; + ASSERT_OK(MakeDictionaryBuilder(default_memory_pool(), dict_type, + /*dictionary=*/nullptr, &builder)); + + auto builder_type = builder->type(); + ASSERT_EQ(checked_cast(*builder_type).ordered(), ordered); + } } } // namespace arrow diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h index ef67ffdccb33..269cdeee646d 100644 --- a/cpp/src/arrow/array/builder_dict.h +++ b/cpp/src/arrow/array/builder_dict.h @@ -154,28 +154,28 @@ class DictionaryBuilderBase : public ArrayBuilder { const std::shared_ptr&> value_type, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, value_type)), delta_offset_(0), byte_width_(-1), indices_builder_(start_int_size, pool, alignment), value_type_(value_type), - ordered_(false) {} + ordered_(ordered) {} template explicit DictionaryBuilderBase( enable_if_t::value, const std::shared_ptr&> value_type, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, value_type)), delta_offset_(0), byte_width_(-1), indices_builder_(pool, alignment), value_type_(value_type), - ordered_(false) {} + ordered_(ordered) {} template explicit DictionaryBuilderBase( @@ -183,14 +183,14 @@ class DictionaryBuilderBase : public ArrayBuilder { enable_if_t::value, const std::shared_ptr&> value_type, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, value_type)), delta_offset_(0), byte_width_(-1), indices_builder_(index_type, pool, alignment), value_type_(value_type), - ordered_(false) {} + ordered_(ordered) {} template DictionaryBuilderBase(uint8_t start_int_size, @@ -199,41 +199,41 @@ class DictionaryBuilderBase : public ArrayBuilder { const std::shared_ptr&> value_type, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, value_type)), delta_offset_(0), byte_width_(static_cast(*value_type).byte_width()), indices_builder_(start_int_size, pool, alignment), value_type_(value_type), - ordered_(false) {} + ordered_(ordered) {} template explicit DictionaryBuilderBase( enable_if_fixed_size_binary&> value_type, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, value_type)), delta_offset_(0), byte_width_(static_cast(*value_type).byte_width()), indices_builder_(pool, alignment), value_type_(value_type), - ordered_(false) {} + ordered_(ordered) {} template explicit DictionaryBuilderBase( const std::shared_ptr& index_type, enable_if_fixed_size_binary&> value_type, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, value_type)), delta_offset_(0), byte_width_(static_cast(*value_type).byte_width()), indices_builder_(index_type, pool, alignment), value_type_(value_type), - ordered_(false) {} + ordered_(ordered) {} template explicit DictionaryBuilderBase( @@ -243,14 +243,15 @@ class DictionaryBuilderBase : public ArrayBuilder { // This constructor doesn't check for errors. Use InsertMemoValues instead. explicit DictionaryBuilderBase(const std::shared_ptr& dictionary, MemoryPool* pool = default_memory_pool(), - int64_t alignment = kDefaultBufferAlignment) + int64_t alignment = kDefaultBufferAlignment, + bool ordered = false) : ArrayBuilder(pool, alignment), memo_table_(new internal::DictionaryMemoTable(pool, dictionary)), delta_offset_(0), byte_width_(-1), indices_builder_(pool, alignment), value_type_(dictionary->type()), - ordered_(false) {} + ordered_(ordered) {} ~DictionaryBuilderBase() override = default; @@ -500,8 +501,6 @@ class DictionaryBuilderBase : public ArrayBuilder { return ::arrow::dictionary(indices_builder_.type(), value_type_, ordered_); } - void set_ordered(bool ordered) { ordered_ = ordered; } - protected: template Status AppendArraySliceImpl(const typename TypeTraits::ArrayType& dict, @@ -570,7 +569,7 @@ class DictionaryBuilderBase : public ArrayBuilder { BuilderType indices_builder_; std::shared_ptr value_type_; - bool ordered_; + bool ordered_ = false; }; template @@ -581,31 +580,53 @@ class DictionaryBuilderBase : public ArrayBuilder { enable_if_t::value, uint8_t> start_int_size, const std::shared_ptr& value_type, - MemoryPool* pool = default_memory_pool()) - : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {} + MemoryPool* pool = default_memory_pool(), + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) + : ArrayBuilder(pool, alignment), + indices_builder_(start_int_size, pool, alignment), + ordered_(ordered) {} explicit DictionaryBuilderBase(const std::shared_ptr& value_type, - MemoryPool* pool = default_memory_pool()) - : ArrayBuilder(pool), indices_builder_(pool) {} + MemoryPool* pool = default_memory_pool(), + int64_t alignment = kDefaultBufferAlignment, + bool ordered = false) + : ArrayBuilder(pool, alignment), + indices_builder_(pool, alignment), + ordered_(ordered) {} explicit DictionaryBuilderBase(const std::shared_ptr& index_type, const std::shared_ptr& value_type, - MemoryPool* pool = default_memory_pool()) - : ArrayBuilder(pool), indices_builder_(index_type, pool) {} + MemoryPool* pool = default_memory_pool(), + int64_t alignment = kDefaultBufferAlignment, + bool ordered = false) + : ArrayBuilder(pool, alignment), + indices_builder_(index_type, pool, alignment), + ordered_(ordered) {} template explicit DictionaryBuilderBase( enable_if_t::value, uint8_t> start_int_size, - MemoryPool* pool = default_memory_pool()) - : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {} + MemoryPool* pool = default_memory_pool(), + int64_t alignment = kDefaultBufferAlignment, bool ordered = false) + : ArrayBuilder(pool, alignment), + indices_builder_(start_int_size, pool, alignment), + ordered_(ordered) {} - explicit DictionaryBuilderBase(MemoryPool* pool = default_memory_pool()) - : ArrayBuilder(pool), indices_builder_(pool) {} + explicit DictionaryBuilderBase(MemoryPool* pool = default_memory_pool(), + int64_t alignment = kDefaultBufferAlignment, + bool ordered = false) + : ArrayBuilder(pool, alignment), + indices_builder_(pool, alignment), + ordered_(ordered) {} explicit DictionaryBuilderBase(const std::shared_ptr& dictionary, - MemoryPool* pool = default_memory_pool()) - : ArrayBuilder(pool), indices_builder_(pool) {} + MemoryPool* pool = default_memory_pool(), + int64_t alignment = kDefaultBufferAlignment, + bool ordered = false) + : ArrayBuilder(pool, alignment), + indices_builder_(pool, alignment), + ordered_(ordered) {} /// \brief Append a scalar null value Status AppendNull() final { @@ -672,8 +693,6 @@ class DictionaryBuilderBase : public ArrayBuilder { return ::arrow::dictionary(indices_builder_.type(), null(), ordered_); } - void set_ordered(bool ordered) { ordered_ = ordered; } - protected: BuilderType indices_builder_; bool ordered_ = false; diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 04a8860eb833..2190f1ce04a9 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -171,21 +171,18 @@ struct DictionaryBuilderCase { using ExactBuilderType = internal::DictionaryBuilderBase; if (dictionary != nullptr) { - auto builder = new AdaptiveBuilderType(dictionary, pool); - builder->set_ordered(ordered); - out->reset(builder); + out->reset( + new AdaptiveBuilderType(dictionary, pool, kDefaultBufferAlignment, ordered)); } else if (exact_index_type) { if (!is_integer(index_type->id())) { return Status::TypeError("MakeBuilder: invalid index type ", *index_type); } - auto builder = new ExactBuilderType(index_type, value_type, pool); - builder->set_ordered(ordered); - out->reset(builder); + out->reset(new ExactBuilderType(index_type, value_type, pool, + kDefaultBufferAlignment, ordered)); } else { auto start_int_size = index_type->byte_width(); - auto builder = new AdaptiveBuilderType(start_int_size, value_type, pool); - builder->set_ordered(ordered); - out->reset(builder); + out->reset(new AdaptiveBuilderType(start_int_size, value_type, pool, + kDefaultBufferAlignment, ordered)); } return Status::OK(); }