Skip to content

Commit 74e6afd

Browse files
phernandezclaude
andcommitted
fix: create search_vector_chunks in test fixtures for Postgres compatibility
Embedding status tests were creating search_vector_chunks inline using SQLite-only DDL (AUTOINCREMENT). Added Postgres DDL constants to models/search.py and wired them into the test fixture so both backends create the table at setup time — matching what the Alembic migration does in production. Also fixed stub search_vector_embeddings to use chunk_id (Postgres column name) instead of rowid, and added inter-test cleanup to prevent ordering-dependent failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent aa635b8 commit 74e6afd

3 files changed

Lines changed: 72 additions & 65 deletions

File tree

src/basic_memory/models/search.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,27 @@
9393
);
9494
""")
9595

96+
# Postgres semantic chunk metadata table.
97+
# Matches the Alembic migration (h1b2c3d4e5f6) schema.
98+
# Used by tests to create the table without running full migrations.
99+
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE = DDL("""
100+
CREATE TABLE IF NOT EXISTS search_vector_chunks (
101+
id BIGSERIAL PRIMARY KEY,
102+
entity_id INTEGER NOT NULL,
103+
project_id INTEGER NOT NULL,
104+
chunk_key TEXT NOT NULL,
105+
chunk_text TEXT NOT NULL,
106+
source_hash TEXT NOT NULL,
107+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
108+
UNIQUE (project_id, entity_id, chunk_key)
109+
)
110+
""")
111+
112+
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX = DDL("""
113+
CREATE INDEX IF NOT EXISTS idx_search_vector_chunks_project_entity
114+
ON search_vector_chunks (project_id, entity_id)
115+
""")
116+
96117
# Local semantic chunk metadata table for SQLite.
97118
# Embedding vectors live in sqlite-vec virtual table keyed by this table rowid.
98119
CREATE_SQLITE_SEARCH_VECTOR_CHUNKS = DDL("""

tests/conftest.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,12 @@ async def engine_factory(
188188
189189
Uses parameterized db_backend fixture to run tests against both backends.
190190
"""
191-
from basic_memory.models.search import CREATE_SEARCH_INDEX
191+
from basic_memory.models.search import (
192+
CREATE_SEARCH_INDEX,
193+
CREATE_SQLITE_SEARCH_VECTOR_CHUNKS,
194+
CREATE_SQLITE_SEARCH_VECTOR_CHUNKS_PROJECT_ENTITY,
195+
CREATE_SQLITE_SEARCH_VECTOR_CHUNKS_UNIQUE,
196+
)
192197

193198
if db_backend == "postgres":
194199
# Postgres mode using testcontainers
@@ -221,6 +226,8 @@ async def engine_factory(
221226
CREATE_POSTGRES_SEARCH_INDEX_FTS,
222227
CREATE_POSTGRES_SEARCH_INDEX_METADATA,
223228
CREATE_POSTGRES_SEARCH_INDEX_PERMALINK,
229+
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE,
230+
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX,
224231
)
225232

226233
# Drop and recreate all tables for test isolation
@@ -235,6 +242,8 @@ async def engine_factory(
235242
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_FTS)
236243
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_METADATA)
237244
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_PERMALINK)
245+
await conn.execute(CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE)
246+
await conn.execute(CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX)
238247

239248
# Mark migrations as already applied for this test-created schema.
240249
#
@@ -269,6 +278,9 @@ async def engine_factory(
269278
async with engine.begin() as conn:
270279
await conn.run_sync(Base.metadata.create_all)
271280
await conn.execute(CREATE_SEARCH_INDEX)
281+
await conn.execute(CREATE_SQLITE_SEARCH_VECTOR_CHUNKS)
282+
await conn.execute(CREATE_SQLITE_SEARCH_VECTOR_CHUNKS_PROJECT_ENTITY)
283+
await conn.execute(CREATE_SQLITE_SEARCH_VECTOR_CHUNKS_UNIQUE)
272284

273285
# Yield after setup is complete
274286
yield engine, session_maker

tests/services/test_project_service_embedding_status.py

Lines changed: 38 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
"""Tests for ProjectService.get_embedding_status()."""
22

3+
import os
34
from unittest.mock import patch
45

56
import pytest
7+
from sqlalchemy import text
68

79
from basic_memory.schemas.project_info import EmbeddingStatus
810
from basic_memory.services.project_service import ProjectService
911

1012

13+
def _is_postgres() -> bool:
14+
return os.environ.get("BASIC_MEMORY_TEST_POSTGRES", "").lower() in ("1", "true", "yes")
15+
16+
1117
@pytest.mark.asyncio
1218
async def test_embedding_status_semantic_disabled(project_service: ProjectService, test_project):
1319
"""When semantic search is disabled, return minimal status with zero counts."""
@@ -32,6 +38,11 @@ async def test_embedding_status_vector_tables_missing(
3238
project_service: ProjectService, test_graph, test_project
3339
):
3440
"""When vector tables don't exist, recommend reindex."""
41+
# Drop the chunks table created by the fixture to simulate missing vector tables
42+
# Postgres requires CASCADE (due to index dependencies); SQLite doesn't support it
43+
drop_sql = "DROP TABLE IF EXISTS search_vector_chunks CASCADE" if _is_postgres() else "DROP TABLE IF EXISTS search_vector_chunks"
44+
await project_service.repository.execute_query(text(drop_sql), {})
45+
3546
with patch.object(
3647
type(project_service),
3748
"config_manager",
@@ -41,40 +52,20 @@ async def test_embedding_status_vector_tables_missing(
4152
):
4253
status = await project_service.get_embedding_status(test_project.id)
4354

44-
# Vector tables are not created by the standard test fixtures
45-
# If they don't exist, status should flag it
4655
assert status.semantic_search_enabled is True
4756
assert status.embedding_provider == "fastembed"
4857
assert status.embedding_model == "bge-small-en-v1.5"
49-
50-
if not status.vector_tables_exist:
51-
assert status.reindex_recommended is True
52-
assert "Vector tables not initialized" in (status.reindex_reason or "")
58+
assert status.vector_tables_exist is False
59+
assert status.reindex_recommended is True
60+
assert "Vector tables not initialized" in (status.reindex_reason or "")
5361

5462

5563
@pytest.mark.asyncio
5664
async def test_embedding_status_entities_without_chunks(
5765
project_service: ProjectService, test_graph, test_project
5866
):
5967
"""When entities have search_index rows but no chunks, recommend reindex."""
60-
# Create vector tables (empty) so the table-existence check passes
61-
from sqlalchemy import text
62-
63-
await project_service.repository.execute_query(
64-
text(
65-
"CREATE TABLE IF NOT EXISTS search_vector_chunks ("
66-
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
67-
" entity_id INTEGER NOT NULL,"
68-
" project_id INTEGER NOT NULL,"
69-
" chunk_key TEXT NOT NULL,"
70-
" chunk_text TEXT NOT NULL,"
71-
" source_hash TEXT NOT NULL,"
72-
" updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP"
73-
")"
74-
),
75-
{},
76-
)
77-
68+
# search_vector_chunks table is created by the test fixture (empty)
7869
with patch.object(
7970
type(project_service),
8071
"config_manager",
@@ -98,24 +89,6 @@ async def test_embedding_status_orphaned_chunks(
9889
project_service: ProjectService, test_graph, test_project
9990
):
10091
"""When chunks exist without matching embeddings, recommend reindex."""
101-
from sqlalchemy import text
102-
103-
# Create vector tables
104-
await project_service.repository.execute_query(
105-
text(
106-
"CREATE TABLE IF NOT EXISTS search_vector_chunks ("
107-
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
108-
" entity_id INTEGER NOT NULL,"
109-
" project_id INTEGER NOT NULL,"
110-
" chunk_key TEXT NOT NULL,"
111-
" chunk_text TEXT NOT NULL,"
112-
" source_hash TEXT NOT NULL,"
113-
" updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP"
114-
")"
115-
),
116-
{},
117-
)
118-
11992
# Insert a chunk row (no matching embedding = orphan)
12093
# Get a real entity_id from the test graph
12194
entity_result = await project_service.repository.execute_query(
@@ -133,12 +106,14 @@ async def test_embedding_status_orphaned_chunks(
133106
{"entity_id": entity_id, "project_id": test_project.id},
134107
)
135108

136-
# Create a minimal search_vector_embeddings table (not sqlite-vec virtual table)
137-
# so the LEFT JOIN works and finds the orphan
109+
# Create a minimal search_vector_embeddings stub (not a real vector table)
110+
# so the LEFT JOIN works and finds the orphan.
111+
# Uses chunk_id as PK — Postgres queries join on chunk_id,
112+
# SQLite queries join on rowid which aliases INTEGER PRIMARY KEY.
138113
await project_service.repository.execute_query(
139114
text(
140115
"CREATE TABLE IF NOT EXISTS search_vector_embeddings ("
141-
" rowid INTEGER PRIMARY KEY"
116+
" chunk_id INTEGER PRIMARY KEY"
142117
")"
143118
),
144119
{},
@@ -153,6 +128,11 @@ async def test_embedding_status_orphaned_chunks(
153128
):
154129
status = await project_service.get_embedding_status(test_project.id)
155130

131+
# Clean up stub table to avoid polluting subsequent tests
132+
await project_service.repository.execute_query(
133+
text("DROP TABLE IF EXISTS search_vector_embeddings"), {}
134+
)
135+
156136
assert status.vector_tables_exist is True
157137
assert status.total_chunks == 1
158138
assert status.orphaned_chunks == 1
@@ -165,33 +145,22 @@ async def test_embedding_status_healthy(
165145
project_service: ProjectService, test_graph, test_project
166146
):
167147
"""When all entities have embeddings, no reindex recommended."""
168-
from sqlalchemy import text
169-
170-
# Create vector chunks table
148+
# Clear any leftover data from prior tests
171149
await project_service.repository.execute_query(
172-
text(
173-
"CREATE TABLE IF NOT EXISTS search_vector_chunks ("
174-
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
175-
" entity_id INTEGER NOT NULL,"
176-
" project_id INTEGER NOT NULL,"
177-
" chunk_key TEXT NOT NULL,"
178-
" chunk_text TEXT NOT NULL,"
179-
" source_hash TEXT NOT NULL,"
180-
" updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP"
181-
")"
182-
),
183-
{},
150+
text("DELETE FROM search_vector_chunks"), {}
184151
)
185152

186153
# Drop any existing virtual table (may have been created by search_service init)
187-
# and recreate as a simple regular table for testing the join logic
154+
# and recreate as a simple regular table for testing the join logic.
155+
# Uses chunk_id as PK — Postgres queries join on chunk_id,
156+
# SQLite queries join on rowid which aliases INTEGER PRIMARY KEY.
188157
await project_service.repository.execute_query(
189158
text("DROP TABLE IF EXISTS search_vector_embeddings"), {}
190159
)
191160
await project_service.repository.execute_query(
192161
text(
193162
"CREATE TABLE search_vector_embeddings ("
194-
" rowid INTEGER PRIMARY KEY"
163+
" chunk_id INTEGER PRIMARY KEY"
195164
")"
196165
),
197166
{},
@@ -222,8 +191,8 @@ async def test_embedding_status_healthy(
222191
},
223192
)
224193
await project_service.repository.execute_query(
225-
text("INSERT INTO search_vector_embeddings (rowid) VALUES (:rowid)"),
226-
{"rowid": chunk_id},
194+
text("INSERT INTO search_vector_embeddings (chunk_id) VALUES (:chunk_id)"),
195+
{"chunk_id": chunk_id},
227196
)
228197
chunk_id += 1
229198

@@ -236,6 +205,11 @@ async def test_embedding_status_healthy(
236205
):
237206
status = await project_service.get_embedding_status(test_project.id)
238207

208+
# Clean up stub table to avoid polluting subsequent tests
209+
await project_service.repository.execute_query(
210+
text("DROP TABLE IF EXISTS search_vector_embeddings"), {}
211+
)
212+
239213
assert status.vector_tables_exist is True
240214
assert status.total_chunks > 0
241215
assert status.total_embeddings == status.total_chunks

0 commit comments

Comments
 (0)