Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
168ed58
chor: enable arrays_overlap
kazuyukitanimura Apr 3, 2026
b617476
fix: workaround arrays_overlap
kazuyukitanimura Apr 3, 2026
6a68c52
fix: workaround arrays_overlap
kazuyukitanimura Apr 3, 2026
d297ec4
fix: workaround arrays_overlap
kazuyukitanimura Apr 3, 2026
0bd466f
Merge remote-tracking branch 'upstream/main' into enable-arrays_overlop
kazuyukitanimura Apr 8, 2026
8108dd1
fix: workaround arrays_overlap
kazuyukitanimura Apr 8, 2026
bcf8687
test
kazuyukitanimura Apr 9, 2026
d93db81
fix: workaround arrays_overlap
kazuyukitanimura Apr 9, 2026
c2a757f
test
kazuyukitanimura Apr 9, 2026
a76adf3
fix: workaround arrays_overlap
kazuyukitanimura Apr 9, 2026
8469711
fix: workaround arrays_overlap
kazuyukitanimura Apr 9, 2026
bfafd62
fix: workaround arrays_overlap
kazuyukitanimura Apr 9, 2026
4760834
Revert "fix: workaround arrays_overlap"
kazuyukitanimura Apr 9, 2026
b0b1410
Merge remote-tracking branch 'upstream/main' into enable-arrays_overlop
kazuyukitanimura Apr 9, 2026
f06db49
fix: workaround arrays_overlap
kazuyukitanimura Apr 10, 2026
32e356b
fix: workaround arrays_overlap
kazuyukitanimura Apr 10, 2026
b9ee1b1
Merge remote-tracking branch 'upstream/main' into enable-arrays_overlop
kazuyukitanimura Apr 18, 2026
8069876
Reapply "fix: workaround arrays_overlap"
kazuyukitanimura Apr 18, 2026
dd0c097
Revert "Reapply "fix: workaround arrays_overlap""
kazuyukitanimura Apr 18, 2026
ae3b53a
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
44c1559
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
715693b
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
b7a136f
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
80a9108
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
088161e
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
3b65df7
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
75b6db7
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
fe321a0
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
103c1c3
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
7c98e84
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
c3d0250
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
f0f3698
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
882c96f
fix: workaround arrays_overlap
kazuyukitanimura Apr 18, 2026
77413cf
Merge remote-tracking branch 'upstream/main' into enable-arrays_overlop
kazuyukitanimura Apr 20, 2026
77f65dc
fix: arrays_overlap
kazuyukitanimura Apr 20, 2026
bbb3dd3
fix: arrays_overlap
kazuyukitanimura Apr 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions docs/source/user-guide/latest/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ the [Comet Supported Expressions Guide](expressions.md) for more information on
[#3346](https://github.com/apache/datafusion-comet/issues/3346)
- **ArrayRemove**: Returns null when the element to remove is null, instead of removing null elements from the array.
[#3173](https://github.com/apache/datafusion-comet/issues/3173)
- **ArraysOverlap**: Inconsistent behavior when arrays contain NULL values.
[#3645](https://github.com/apache/datafusion-comet/issues/3645),
[#2036](https://github.com/apache/datafusion-comet/issues/2036)
- **ArrayUnion**: Sorts input arrays before performing the union, while Spark preserves the order of the first array
and appends unique elements from the second.
[#3644](https://github.com/apache/datafusion-comet/issues/3644)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/latest/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ Comet supports using the following aggregate functions within window contexts wi
| ArrayRemove | No | Returns null when element is null instead of removing null elements ([#3173](https://github.com/apache/datafusion-comet/issues/3173)) |
| ArrayRepeat | No | |
| ArrayUnion | No | Behaves differently than spark. Comet sorts the input arrays before performing the union, while Spark preserves the order of the first array and appends unique elements from the second. |
| ArraysOverlap | No | |
| ArraysOverlap | Yes | |
| CreateArray | Yes | |
| ElementAt | Yes | Input must be an array. Map inputs are not supported. |
| Flatten | Yes | |
Expand Down
8 changes: 0 additions & 8 deletions spark/src/main/scala/org/apache/comet/serde/arrays.scala
Original file line number Diff line number Diff line change
Expand Up @@ -246,14 +246,6 @@ object CometArrayMin extends CometExpressionSerde[ArrayMin] {
}

object CometArraysOverlap extends CometExpressionSerde[ArraysOverlap] {

override def getSupportLevel(expr: ArraysOverlap): SupportLevel =
Incompatible(
Some(
"Inconsistent behavior with NULL values" +
" (https://github.com/apache/datafusion-comet/issues/3645)" +
" (https://github.com/apache/datafusion-comet/issues/2036)"))

override def convert(
expr: ArraysOverlap,
inputs: Seq[Attribute],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ CREATE TABLE test_arrays_overlap(a array<int>, b array<int>) USING parquet
statement
INSERT INTO test_arrays_overlap VALUES (array(1, 2, 3), array(3, 4, 5)), (array(1, 2), array(3, 4)), (array(), array(1)), (NULL, array(1)), (array(1, NULL), array(NULL, 2))

query ignore(https://github.com/apache/datafusion-comet/issues/3645)
query
SELECT arrays_overlap(a, b) FROM test_arrays_overlap

-- column + literal
query ignore(https://github.com/apache/datafusion-comet/issues/3645)
query
SELECT arrays_overlap(a, array(3, 4, 5)) FROM test_arrays_overlap

-- literal + column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import scala.util.Random

import org.apache.hadoop.fs.Path
import org.apache.spark.sql.CometTestBase
import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayDistinct, ArrayExcept, ArrayInsert, ArrayIntersect, ArrayJoin, ArrayRepeat, ArraysOverlap, ArrayUnion}
import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayDistinct, ArrayExcept, ArrayInsert, ArrayIntersect, ArrayJoin, ArrayRepeat, ArrayUnion}
import org.apache.spark.sql.catalyst.expressions.{ArrayContains, ArrayRemove}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.functions._
Expand Down Expand Up @@ -545,27 +545,59 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
}

test("arrays_overlap") {
withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[ArraysOverlap]) -> "true") {
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
withTempView("t1") {
val path = new Path(dir.toURI.toString, "test.parquet")
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, 10000)
spark.read.parquet(path.toString).createOrReplaceTempView("t1")
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(_2, _3, _4), array(_3, _4)) from t1 where _2 is not null"))
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array('a', null, cast(_1 as string)), array('b', cast(_1 as string), cast(_2 as string))) from t1 where _1 is not null"))
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array('a', null), array('b', null)) from t1 where _1 is not null"))
checkSparkAnswerAndOperator(spark.sql(
"SELECT arrays_overlap((CASE WHEN _2 =_3 THEN array(_6, _7) END), array(_6, _7)) FROM t1"));
}
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
withTempView("t1") {
val path = new Path(dir.toURI.toString, "test.parquet")
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, 10000)
spark.read.parquet(path.toString).createOrReplaceTempView("t1")
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(_2, _3, _4), array(_3, _4)) from t1 where _2 is not null"))
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array('a', null, cast(_1 as string)), array('b', cast(_1 as string), cast(_2 as string))) from t1 where _1 is not null"))
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array('a', null), array('b', null)) from t1 where _1 is not null"))
checkSparkAnswerAndOperator(spark.sql(
"SELECT arrays_overlap((CASE WHEN _2 =_3 THEN array(_6, _7) END), array(_6, _7)) FROM t1"));
}
}
}
}

test("arrays_overlap - null handling behavior verification") {
Comment thread
kazuyukitanimura marked this conversation as resolved.
withTempDir { dir =>
withTempView("t1") {
val path = new Path(dir.toURI.toString, "test.parquet")
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = false, 100)
spark.read.parquet(path.toString).createOrReplaceTempView("t1")

// Test case 1: Common element exists - should return true
checkSparkAnswerAndOperator(
sql("SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) from t1 limit 1"))

// Test case 2: No common elements, no nulls - should return false
checkSparkAnswerAndOperator(
sql("SELECT arrays_overlap(array(1, 2), array(3, 4)) from t1 limit 1"))

// Test case 3: No common elements, but null exists - Spark returns null (three-valued logic)
checkSparkAnswerAndOperator(
sql("SELECT arrays_overlap(array(1, null, 3), array(4, 5)) from t1 limit 1"))

// Test case 4: Common element exists even with null - should return true
checkSparkAnswerAndOperator(
sql("SELECT arrays_overlap(array(1, null, 3), array(1, 4)) from t1 limit 1"))

// Test case 5: Both arrays have null but no common non-null elements
checkSparkAnswerAndOperator(
sql("SELECT arrays_overlap(array(1, null), array(2, null)) from t1 limit 1"))

// Test case 6: Empty arrays
checkSparkAnswerAndOperator(
sql("SELECT arrays_overlap(array(), array(1, 2)) from t1 limit 1"))
}
}
}

test("array_compact") {
// TODO fix for Spark 4.0.0
assume(!isSpark40Plus)
Expand Down
Loading