Skip to content

Commit 3fe689a

Browse files
KyleAMathewsclaudeautofix-ci[bot]
authored
fix: deduplicate and filter null join keys in lazy join subset queries (#1448)
* fix: deduplicate and filter null join keys in lazy join snapshot requests When a lazy join collected foreign keys for subset queries, duplicate IDs and null values were passed through to the ANY() SQL param, producing bloated queries. Filter nulls, deduplicate via Set, and skip the request entirely when no valid keys remain. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: add changeset for lazy join key dedup fix Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * ci: apply automated fixes * test: strengthen all-null-keys test to assert no loadSubset calls Assert capturedOptions is empty rather than checking for absence of inArray expressions. This catches regressions where the fallback path issues an expensive full collection load with no where clause. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 2680868 commit 3fe689a

File tree

3 files changed

+224
-2
lines changed

3 files changed

+224
-2
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@tanstack/db': patch
3+
---
4+
5+
Deduplicate and filter null join keys in lazy join subset queries. Previously, when multiple rows referenced the same foreign key or had null foreign keys, the full unfiltered array was passed to `inArray()`, producing bloated `ANY()` SQL params with repeated IDs and NULLs.

packages/db/src/query/compiler/joins.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,20 @@ function processJoin(
302302
return
303303
}
304304

305-
// Request filtered snapshot from lazy collection for matching join keys
306-
const joinKeys = data.getInner().map(([[joinKey]]) => joinKey)
305+
// Deduplicate and filter null keys before requesting snapshot
306+
const joinKeys = [
307+
...new Set(
308+
data
309+
.getInner()
310+
.map(([[joinKey]]) => joinKey)
311+
.filter((key) => key != null),
312+
),
313+
]
314+
315+
if (joinKeys.length === 0) {
316+
return
317+
}
318+
307319
const lazyJoinRef = new PropRef(followRefResult.path)
308320
const loaded = lazySourceSubscription.requestSnapshot({
309321
where: inArray(lazyJoinRef, joinKeys),

packages/db/tests/query/live-query-collection.test.ts

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
} from '../utils.js'
1919
import { createDeferred } from '../../src/deferred'
2020
import { BTreeIndex } from '../../src/indexes/btree-index'
21+
import { Func, Value } from '../../src/query/ir.js'
2122
import type { ChangeMessage, LoadSubsetOptions } from '../../src/types.js'
2223

2324
// Sample user type for tests
@@ -2586,6 +2587,210 @@ describe(`createLiveQueryCollection`, () => {
25862587
})
25872588
})
25882589

2590+
describe(`lazy join key deduplication`, () => {
2591+
it(`should deduplicate join keys and filter nulls when requesting snapshot for lazy joins`, async () => {
2592+
type Task = {
2593+
id: number
2594+
name: string
2595+
project_id: number | null
2596+
}
2597+
2598+
type Project = {
2599+
id: number
2600+
name: string
2601+
}
2602+
2603+
// Main collection with duplicate foreign keys and null foreign keys
2604+
const taskCollection = createCollection<Task>({
2605+
id: `tasks-dedup`,
2606+
getKey: (task) => task.id,
2607+
sync: {
2608+
sync: ({ begin, write, commit, markReady }) => {
2609+
begin()
2610+
// Multiple tasks pointing to the same project (duplicates)
2611+
write({
2612+
type: `insert`,
2613+
value: { id: 1, name: `Task 1`, project_id: 10 },
2614+
})
2615+
write({
2616+
type: `insert`,
2617+
value: { id: 2, name: `Task 2`, project_id: 10 },
2618+
})
2619+
write({
2620+
type: `insert`,
2621+
value: { id: 3, name: `Task 3`, project_id: 10 },
2622+
})
2623+
write({
2624+
type: `insert`,
2625+
value: { id: 4, name: `Task 4`, project_id: 20 },
2626+
})
2627+
write({
2628+
type: `insert`,
2629+
value: { id: 5, name: `Task 5`, project_id: 20 },
2630+
})
2631+
// Tasks with null foreign key
2632+
write({
2633+
type: `insert`,
2634+
value: { id: 6, name: `Task 6`, project_id: null },
2635+
})
2636+
write({
2637+
type: `insert`,
2638+
value: { id: 7, name: `Task 7`, project_id: null },
2639+
})
2640+
commit()
2641+
markReady()
2642+
},
2643+
},
2644+
})
2645+
2646+
// Lazy joined collection that tracks loadSubset calls
2647+
const capturedOptions: Array<LoadSubsetOptions> = []
2648+
2649+
const projectCollection = createCollection<Project>({
2650+
id: `projects-dedup`,
2651+
getKey: (project) => project.id,
2652+
syncMode: `on-demand`,
2653+
sync: {
2654+
sync: ({ begin, write, commit, markReady }) => {
2655+
begin()
2656+
write({ type: `insert`, value: { id: 10, name: `Project A` } })
2657+
write({ type: `insert`, value: { id: 20, name: `Project B` } })
2658+
commit()
2659+
markReady()
2660+
return {
2661+
loadSubset: (options: LoadSubsetOptions) => {
2662+
capturedOptions.push(options)
2663+
return true
2664+
},
2665+
}
2666+
},
2667+
},
2668+
})
2669+
2670+
const liveQuery = createLiveQueryCollection((q) =>
2671+
q
2672+
.from({ task: taskCollection })
2673+
.leftJoin({ project: projectCollection }, ({ task, project }) =>
2674+
eq(task.project_id, project.id),
2675+
),
2676+
)
2677+
2678+
await liveQuery.preload()
2679+
await flushPromises()
2680+
2681+
// Find the inArray expression in loadSubset calls
2682+
// It may be wrapped in `and` since requestSnapshot combines expressions
2683+
const findInArrayExpr = (
2684+
expr: LoadSubsetOptions[`where`],
2685+
): Func | undefined => {
2686+
if (!(expr instanceof Func)) return undefined
2687+
if (expr.name === `in`) return expr
2688+
if (expr.name === `and` || expr.name === `or`) {
2689+
for (const arg of expr.args) {
2690+
const found = findInArrayExpr(arg)
2691+
if (found) return found
2692+
}
2693+
}
2694+
return undefined
2695+
}
2696+
2697+
const inExpr = capturedOptions
2698+
.map((opt) => findInArrayExpr(opt.where))
2699+
.find((expr) => expr !== undefined)
2700+
2701+
expect(inExpr).toBeDefined()
2702+
2703+
// The second arg of inArray is the array of values
2704+
const arrayArg = inExpr!.args[1]
2705+
expect(arrayArg).toBeInstanceOf(Value)
2706+
const valuesArg = arrayArg as Value<Array<number>>
2707+
const values = valuesArg.value.slice().sort()
2708+
2709+
// Should contain only the 2 unique project IDs -- no nulls, no duplicates
2710+
expect(values).toEqual([10, 20])
2711+
})
2712+
2713+
it(`should skip loadSubset when all join keys are null`, async () => {
2714+
type Task = {
2715+
id: number
2716+
name: string
2717+
project_id: number | null
2718+
}
2719+
2720+
type Project = {
2721+
id: number
2722+
name: string
2723+
}
2724+
2725+
const taskCollection = createCollection<Task>({
2726+
id: `tasks-all-null`,
2727+
getKey: (task) => task.id,
2728+
sync: {
2729+
sync: ({ begin, write, commit, markReady }) => {
2730+
begin()
2731+
write({
2732+
type: `insert`,
2733+
value: { id: 1, name: `Task 1`, project_id: null },
2734+
})
2735+
write({
2736+
type: `insert`,
2737+
value: { id: 2, name: `Task 2`, project_id: null },
2738+
})
2739+
write({
2740+
type: `insert`,
2741+
value: { id: 3, name: `Task 3`, project_id: null },
2742+
})
2743+
commit()
2744+
markReady()
2745+
},
2746+
},
2747+
})
2748+
2749+
const capturedOptions: Array<LoadSubsetOptions> = []
2750+
2751+
const projectCollection = createCollection<Project>({
2752+
id: `projects-all-null`,
2753+
getKey: (project) => project.id,
2754+
syncMode: `on-demand`,
2755+
sync: {
2756+
sync: ({ begin, write, commit, markReady }) => {
2757+
begin()
2758+
write({ type: `insert`, value: { id: 10, name: `Project A` } })
2759+
commit()
2760+
markReady()
2761+
return {
2762+
loadSubset: (options: LoadSubsetOptions) => {
2763+
capturedOptions.push(options)
2764+
return true
2765+
},
2766+
}
2767+
},
2768+
},
2769+
})
2770+
2771+
const liveQuery = createLiveQueryCollection((q) =>
2772+
q
2773+
.from({ task: taskCollection })
2774+
.leftJoin({ project: projectCollection }, ({ task, project }) =>
2775+
eq(task.project_id, project.id),
2776+
),
2777+
)
2778+
2779+
await liveQuery.preload()
2780+
await flushPromises()
2781+
2782+
// No loadSubset call should have been made for the lazy join
2783+
// since all keys were null and filtered out
2784+
expect(capturedOptions).toHaveLength(0)
2785+
2786+
// All tasks should still appear in results with null project
2787+
expect(liveQuery.toArray).toHaveLength(3)
2788+
for (const row of liveQuery.toArray) {
2789+
expect(row.project).toBeUndefined()
2790+
}
2791+
})
2792+
})
2793+
25892794
describe(`chained live query collections without custom getKey`, () => {
25902795
it(`should return all items when a live query collection without getKey is used as a source`, async () => {
25912796
// Create a live query collection with the default (internal) getKey

0 commit comments

Comments
 (0)