@@ -763,37 +763,38 @@ mod tests {
763763 Ok ( ( ) )
764764 }
765765
766- /// Regression test: a Substrait join expression containing both `equal` and
767- /// `is_not_distinct_from` (as Spark can produce) must preserve the
768- /// null-safe semantics of `IS NOT DISTINCT FROM` by demoting it to the
769- /// join filter when mixed with regular equality keys.
770- ///
771- /// The plan is loaded from a JSON-encoded Substrait protobuf to exercise
772- /// the full consumer path (`from_substrait_plan` → `from_join_rel`).
766+ /// Regression: a Substrait join with both `equal` and `is_not_distinct_from`
767+ /// must demote `IS NOT DISTINCT FROM` to the join filter (matching the SQL
768+ /// planner behavior tested in `join_is_not_distinct_from.slt:179-205`).
773769 #[ tokio:: test]
774770 async fn test_mixed_join_equal_and_indistinct_from_substrait_plan ( ) -> Result < ( ) > {
771+ let plan_str =
772+ test_plan_to_string ( "mixed_join_equal_and_indistinct.json" ) . await ?;
773+ // Eq becomes the equijoin key; IS NOT DISTINCT FROM is demoted to filter.
774+ assert_snapshot ! (
775+ plan_str,
776+ @r#"
777+ Projection: left.id, left.val, left.comment, right.id AS id0, right.val AS val0, right.comment AS comment0
778+ Inner Join: left.id = right.id Filter: left.val IS NOT DISTINCT FROM right.val
779+ SubqueryAlias: left
780+ Values: (Utf8("1"), Utf8("a"), Utf8("c1")), (Utf8("2"), Utf8("b"), Utf8("c2")), (Utf8("3"), Utf8(NULL), Utf8("c3")), (Utf8("4"), Utf8(NULL), Utf8("c4")), (Utf8("5"), Utf8("e"), Utf8("c5"))...
781+ SubqueryAlias: right
782+ Values: (Utf8("1"), Utf8("a"), Utf8("c1")), (Utf8("2"), Utf8("b"), Utf8("c2")), (Utf8("3"), Utf8(NULL), Utf8("c3")), (Utf8("4"), Utf8(NULL), Utf8("c4")), (Utf8("5"), Utf8("e"), Utf8("c5"))...
783+ "#
784+ ) ;
785+
786+ // Also execute to verify NULL=NULL rows (ids 3,4) are preserved.
775787 let path = "tests/testdata/test_plans/mixed_join_equal_and_indistinct.json" ;
776788 let proto = serde_json:: from_reader :: < _ , Plan > ( BufReader :: new (
777789 File :: open ( path) . expect ( "file not found" ) ,
778790 ) )
779791 . expect ( "failed to parse json" ) ;
780-
781792 let ctx = SessionContext :: new ( ) ;
782793 let plan = from_substrait_plan ( & ctx. state ( ) , & proto) . await ?;
783-
784- // Execute and count rows.
785- // Both tables have 6 identical rows; rows 3 and 4 have val=NULL.
786- // With correct handling, IS NOT DISTINCT FROM is demoted to the join
787- // filter, so NULL=NULL matches and all 6 rows appear in the output.
788794 let df = ctx. execute_logical_plan ( plan) . await ?;
789795 let results = df. collect ( ) . await ?;
790796 let total_rows: usize = results. iter ( ) . map ( |b| b. num_rows ( ) ) . sum ( ) ;
791-
792- assert_eq ! (
793- total_rows, 6 ,
794- "Expected 6 rows (including NULL=NULL matches via IS NOT DISTINCT FROM), \
795- got {total_rows}. Mixed equal/is_not_distinct_from lost null-safe semantics."
796- ) ;
797+ assert_eq ! ( total_rows, 6 ) ;
797798
798799 Ok ( ( ) )
799800 }
0 commit comments