diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 05edd230daccb..00c8fab228117 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -184,7 +184,11 @@ impl OptimizerRule for SingleDistinctToGroupBy { func, params: AggregateFunctionParams { - mut args, distinct, .. + mut args, + distinct, + filter, + order_by, + null_treatment, }, }) => { if distinct { @@ -204,9 +208,9 @@ impl OptimizerRule for SingleDistinctToGroupBy { func, vec![col(SINGLE_DISTINCT_ALIAS)], false, // intentional to remove distinct here - None, - vec![], - None, + filter, + order_by, + null_treatment, ))) // if the aggregate function is not distinct, we need to rewrite it like two phase aggregation } else { @@ -217,9 +221,9 @@ impl OptimizerRule for SingleDistinctToGroupBy { Arc::clone(&func), args, false, - None, - vec![], - None, + filter, + order_by, + null_treatment, )) .alias(&alias_str), ); diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 3c962a0f87f36..cff451644d16f 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -379,6 +379,59 @@ select array_sort(c1), array_sort(c2) from ( statement ok drop table array_agg_distinct_list_table; +# Test array_agg with DISTINCT and IGNORE NULLS (regression test for issue #19735) +query ? +SELECT array_sort(ARRAY_AGG(DISTINCT x IGNORE NULLS)) as result +FROM (VALUES (1), (2), (NULL), (2), (NULL), (1)) AS t(x); +---- +[1, 2] + +# Test that non-DISTINCT aggregates also preserve IGNORE NULLS when mixed with DISTINCT +# This tests the two-phase aggregation rewrite in SingleDistinctToGroupBy +query I? +SELECT + COUNT(DISTINCT x) as distinct_count, + array_sort(ARRAY_AGG(y IGNORE NULLS)) as y_agg +FROM (VALUES + (1, 10), + (1, 20), + (2, 30), + (3, NULL), + (3, 40), + (NULL, 50) +) AS t(x, y) +---- +3 [10, 20, 30, 40, 50] + +# Test that FILTER clause is preserved in two-phase aggregation rewrite +query II +SELECT + COUNT(DISTINCT x) as distinct_count, + SUM(y) FILTER (WHERE y > 15) as filtered_sum +FROM (VALUES + (1, 10), + (1, 20), + (2, 5), + (2, 30), + (3, 25) +) AS t(x, y) +---- +3 75 + +# Test that ORDER BY is preserved in two-phase aggregation rewrite +query I? +SELECT + COUNT(DISTINCT x) as distinct_count, + ARRAY_AGG(y ORDER BY y DESC) as ordered_agg +FROM (VALUES + (1, 10), + (1, 30), + (2, 20), + (2, 40) +) AS t(x, y) +---- +2 [40, 30, 20, 10] + statement error This feature is not implemented: Calling array_agg: LIMIT not supported in function arguments: 1 SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100