@@ -997,25 +997,37 @@ async def get_embedding_status(self, project_id: int) -> EmbeddingStatus:
997997 )
998998
999999 # --- Count queries (tables exist) ---
1000+ # Filter by entity existence to exclude stale rows from deleted entities
1001+ # that remain in derived search tables (search_index, search_vector_chunks)
1002+ entity_exists = "AND entity_id IN (SELECT id FROM entity WHERE project_id = :project_id)"
1003+ # Same filter for aliased chunks table (used in JOIN queries below)
1004+ chunk_entity_exists = (
1005+ "AND c.entity_id IN (SELECT id FROM entity WHERE project_id = :project_id)"
1006+ )
1007+
10001008 si_result = await self .repository .execute_query (
10011009 text (
1002- "SELECT COUNT(DISTINCT entity_id) FROM search_index WHERE project_id = :project_id"
1010+ "SELECT COUNT(DISTINCT entity_id) FROM search_index "
1011+ f"WHERE project_id = :project_id { entity_exists } "
10031012 ),
10041013 {"project_id" : project_id },
10051014 )
10061015 total_indexed_entities = si_result .scalar () or 0
10071016
10081017 try :
10091018 chunks_result = await self .repository .execute_query (
1010- text ("SELECT COUNT(*) FROM search_vector_chunks WHERE project_id = :project_id" ),
1019+ text (
1020+ "SELECT COUNT(*) FROM search_vector_chunks "
1021+ f"WHERE project_id = :project_id { entity_exists } "
1022+ ),
10111023 {"project_id" : project_id },
10121024 )
10131025 total_chunks = chunks_result .scalar () or 0
10141026
10151027 entities_with_chunks_result = await self .repository .execute_query (
10161028 text (
10171029 "SELECT COUNT(DISTINCT entity_id) FROM search_vector_chunks "
1018- "WHERE project_id = :project_id"
1030+ f "WHERE project_id = :project_id { entity_exists } "
10191031 ),
10201032 {"project_id" : project_id },
10211033 )
@@ -1026,13 +1038,13 @@ async def get_embedding_status(self, project_id: int) -> EmbeddingStatus:
10261038 embeddings_sql = text (
10271039 "SELECT COUNT(*) FROM search_vector_chunks c "
10281040 "JOIN search_vector_embeddings e ON e.chunk_id = c.id "
1029- "WHERE c.project_id = :project_id"
1041+ f "WHERE c.project_id = :project_id { chunk_entity_exists } "
10301042 )
10311043 else :
10321044 embeddings_sql = text (
10331045 "SELECT COUNT(*) FROM search_vector_chunks c "
10341046 "JOIN search_vector_embeddings e ON e.rowid = c.id "
1035- "WHERE c.project_id = :project_id"
1047+ f "WHERE c.project_id = :project_id { chunk_entity_exists } "
10361048 )
10371049
10381050 embeddings_result = await self .repository .execute_query (
@@ -1045,13 +1057,13 @@ async def get_embedding_status(self, project_id: int) -> EmbeddingStatus:
10451057 orphan_sql = text (
10461058 "SELECT COUNT(*) FROM search_vector_chunks c "
10471059 "LEFT JOIN search_vector_embeddings e ON e.chunk_id = c.id "
1048- "WHERE c.project_id = :project_id AND e.chunk_id IS NULL"
1060+ f "WHERE c.project_id = :project_id AND e.chunk_id IS NULL { chunk_entity_exists } "
10491061 )
10501062 else :
10511063 orphan_sql = text (
10521064 "SELECT COUNT(*) FROM search_vector_chunks c "
10531065 "LEFT JOIN search_vector_embeddings e ON e.rowid = c.id "
1054- "WHERE c.project_id = :project_id AND e.rowid IS NULL"
1066+ f "WHERE c.project_id = :project_id AND e.rowid IS NULL { chunk_entity_exists } "
10551067 )
10561068
10571069 orphan_result = await self .repository .execute_query (
0 commit comments