Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions datafusion/optimizer/src/single_distinct_to_groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,11 @@ impl OptimizerRule for SingleDistinctToGroupBy {
func,
params:
AggregateFunctionParams {
mut args, distinct, ..
mut args,
distinct,
filter,
order_by,
null_treatment,
},
}) => {
if distinct {
Expand All @@ -204,9 +208,9 @@ impl OptimizerRule for SingleDistinctToGroupBy {
func,
vec![col(SINGLE_DISTINCT_ALIAS)],
false, // intentional to remove distinct here
None,
vec![],
None,
filter,
order_by,
null_treatment,
)))
// if the aggregate function is not distinct, we need to rewrite it like two phase aggregation
} else {
Expand All @@ -217,9 +221,9 @@ impl OptimizerRule for SingleDistinctToGroupBy {
Arc::clone(&func),
args,
false,
None,
vec![],
None,
filter,
order_by,
null_treatment,
))
.alias(&alias_str),
);
Expand Down
53 changes: 53 additions & 0 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,59 @@ select array_sort(c1), array_sort(c2) from (
statement ok
drop table array_agg_distinct_list_table;

# Test array_agg with DISTINCT and IGNORE NULLS (regression test for issue #19735)
query ?
SELECT array_sort(ARRAY_AGG(DISTINCT x IGNORE NULLS)) as result
FROM (VALUES (1), (2), (NULL), (2), (NULL), (1)) AS t(x);
----
[1, 2]

# Test that non-DISTINCT aggregates also preserve IGNORE NULLS when mixed with DISTINCT
# This tests the two-phase aggregation rewrite in SingleDistinctToGroupBy
query I?
SELECT
COUNT(DISTINCT x) as distinct_count,
array_sort(ARRAY_AGG(y IGNORE NULLS)) as y_agg
FROM (VALUES
(1, 10),
(1, 20),
(2, 30),
(3, NULL),
(3, 40),
(NULL, 50)
) AS t(x, y)
----
3 [10, 20, 30, 40, 50]

# Test that FILTER clause is preserved in two-phase aggregation rewrite
query II
SELECT
COUNT(DISTINCT x) as distinct_count,
SUM(y) FILTER (WHERE y > 15) as filtered_sum
FROM (VALUES
(1, 10),
(1, 20),
(2, 5),
(2, 30),
(3, 25)
) AS t(x, y)
----
3 75

# Test that ORDER BY is preserved in two-phase aggregation rewrite
query I?
SELECT
COUNT(DISTINCT x) as distinct_count,
ARRAY_AGG(y ORDER BY y DESC) as ordered_agg
FROM (VALUES
(1, 10),
(1, 30),
(2, 20),
(2, 40)
) AS t(x, y)
----
2 [40, 30, 20, 10]

statement error This feature is not implemented: Calling array_agg: LIMIT not supported in function arguments: 1
SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100

Expand Down