|
16 | 16 | from basic_memory.repository.project_repository import ProjectRepository |
17 | 17 | from basic_memory.schemas import ( |
18 | 18 | ActivityMetrics, |
| 19 | + EmbeddingStatus, |
19 | 20 | ProjectInfoResponse, |
20 | 21 | ProjectStatistics, |
21 | 22 | SystemStatus, |
@@ -597,6 +598,9 @@ async def get_project_info(self, project_name: Optional[str] = None) -> ProjectI |
597 | 598 | # Get activity metrics for the specified project |
598 | 599 | activity = await self.get_activity_metrics(db_project.id) |
599 | 600 |
|
| 601 | + # Get embedding status for the specified project |
| 602 | + embedding_status = await self.get_embedding_status(db_project.id) |
| 603 | + |
600 | 604 | # Get system status |
601 | 605 | system = self.get_system_status() |
602 | 606 |
|
@@ -650,6 +654,7 @@ async def get_project_info(self, project_name: Optional[str] = None) -> ProjectI |
650 | 654 | statistics=statistics, |
651 | 655 | activity=activity, |
652 | 656 | system=system, |
| 657 | + embedding_status=embedding_status, |
653 | 658 | ) |
654 | 659 |
|
655 | 660 | async def get_statistics(self, project_id: int) -> ProjectStatistics: |
@@ -918,6 +923,163 @@ async def get_activity_metrics(self, project_id: int) -> ActivityMetrics: |
918 | 923 | monthly_growth=monthly_growth, |
919 | 924 | ) |
920 | 925 |
|
| 926 | + async def get_embedding_status(self, project_id: int) -> EmbeddingStatus: |
| 927 | + """Get embedding/vector index status for the specified project. |
| 928 | +
|
| 929 | + Reports config, counts, and whether a reindex is recommended. |
| 930 | + """ |
| 931 | + config = self.config_manager.config |
| 932 | + semantic_enabled = config.semantic_search_enabled |
| 933 | + |
| 934 | + # When semantic search is disabled, return minimal status |
| 935 | + if not semantic_enabled: |
| 936 | + return EmbeddingStatus(semantic_search_enabled=False) |
| 937 | + |
| 938 | + provider = config.semantic_embedding_provider |
| 939 | + model = config.semantic_embedding_model |
| 940 | + dimensions = config.semantic_embedding_dimensions |
| 941 | + |
| 942 | + is_postgres = config.database_backend == DatabaseBackend.POSTGRES |
| 943 | + |
| 944 | + # --- Check vector table existence --- |
| 945 | + if is_postgres: |
| 946 | + table_check_sql = text( |
| 947 | + "SELECT COUNT(*) FROM information_schema.tables " |
| 948 | + "WHERE table_name = 'search_vector_chunks'" |
| 949 | + ) |
| 950 | + else: |
| 951 | + table_check_sql = text( |
| 952 | + "SELECT COUNT(*) FROM sqlite_master " |
| 953 | + "WHERE type = 'table' AND name = 'search_vector_chunks'" |
| 954 | + ) |
| 955 | + |
| 956 | + table_result = await self.repository.execute_query(table_check_sql, {}) |
| 957 | + vector_tables_exist = (table_result.scalar() or 0) > 0 |
| 958 | + |
| 959 | + if not vector_tables_exist: |
| 960 | + # Count distinct entities in search index for the recommendation message |
| 961 | + si_result = await self.repository.execute_query( |
| 962 | + text( |
| 963 | + "SELECT COUNT(DISTINCT entity_id) FROM search_index " |
| 964 | + "WHERE project_id = :project_id" |
| 965 | + ), |
| 966 | + {"project_id": project_id}, |
| 967 | + ) |
| 968 | + total_indexed_entities = si_result.scalar() or 0 |
| 969 | + |
| 970 | + return EmbeddingStatus( |
| 971 | + semantic_search_enabled=True, |
| 972 | + embedding_provider=provider, |
| 973 | + embedding_model=model, |
| 974 | + embedding_dimensions=dimensions, |
| 975 | + total_indexed_entities=total_indexed_entities, |
| 976 | + vector_tables_exist=False, |
| 977 | + reindex_recommended=True, |
| 978 | + reindex_reason=( |
| 979 | + "Vector tables not initialized — run: bm reindex --embeddings" |
| 980 | + ), |
| 981 | + ) |
| 982 | + |
| 983 | + # --- Count queries (tables exist) --- |
| 984 | + si_result = await self.repository.execute_query( |
| 985 | + text( |
| 986 | + "SELECT COUNT(DISTINCT entity_id) FROM search_index " |
| 987 | + "WHERE project_id = :project_id" |
| 988 | + ), |
| 989 | + {"project_id": project_id}, |
| 990 | + ) |
| 991 | + total_indexed_entities = si_result.scalar() or 0 |
| 992 | + |
| 993 | + chunks_result = await self.repository.execute_query( |
| 994 | + text("SELECT COUNT(*) FROM search_vector_chunks WHERE project_id = :project_id"), |
| 995 | + {"project_id": project_id}, |
| 996 | + ) |
| 997 | + total_chunks = chunks_result.scalar() or 0 |
| 998 | + |
| 999 | + entities_with_chunks_result = await self.repository.execute_query( |
| 1000 | + text( |
| 1001 | + "SELECT COUNT(DISTINCT entity_id) FROM search_vector_chunks " |
| 1002 | + "WHERE project_id = :project_id" |
| 1003 | + ), |
| 1004 | + {"project_id": project_id}, |
| 1005 | + ) |
| 1006 | + total_entities_with_chunks = entities_with_chunks_result.scalar() or 0 |
| 1007 | + |
| 1008 | + # Embeddings count — join pattern differs between SQLite and Postgres |
| 1009 | + if is_postgres: |
| 1010 | + embeddings_sql = text( |
| 1011 | + "SELECT COUNT(*) FROM search_vector_chunks c " |
| 1012 | + "JOIN search_vector_embeddings e ON e.chunk_id = c.id " |
| 1013 | + "WHERE c.project_id = :project_id" |
| 1014 | + ) |
| 1015 | + else: |
| 1016 | + embeddings_sql = text( |
| 1017 | + "SELECT COUNT(*) FROM search_vector_chunks c " |
| 1018 | + "JOIN search_vector_embeddings e ON e.rowid = c.id " |
| 1019 | + "WHERE c.project_id = :project_id" |
| 1020 | + ) |
| 1021 | + |
| 1022 | + embeddings_result = await self.repository.execute_query( |
| 1023 | + embeddings_sql, {"project_id": project_id} |
| 1024 | + ) |
| 1025 | + total_embeddings = embeddings_result.scalar() or 0 |
| 1026 | + |
| 1027 | + # Orphaned chunks (chunks without embeddings — indicates interrupted indexing) |
| 1028 | + if is_postgres: |
| 1029 | + orphan_sql = text( |
| 1030 | + "SELECT COUNT(*) FROM search_vector_chunks c " |
| 1031 | + "LEFT JOIN search_vector_embeddings e ON e.chunk_id = c.id " |
| 1032 | + "WHERE c.project_id = :project_id AND e.chunk_id IS NULL" |
| 1033 | + ) |
| 1034 | + else: |
| 1035 | + orphan_sql = text( |
| 1036 | + "SELECT COUNT(*) FROM search_vector_chunks c " |
| 1037 | + "LEFT JOIN search_vector_embeddings e ON e.rowid = c.id " |
| 1038 | + "WHERE c.project_id = :project_id AND e.rowid IS NULL" |
| 1039 | + ) |
| 1040 | + |
| 1041 | + orphan_result = await self.repository.execute_query( |
| 1042 | + orphan_sql, {"project_id": project_id} |
| 1043 | + ) |
| 1044 | + orphaned_chunks = orphan_result.scalar() or 0 |
| 1045 | + |
| 1046 | + # --- Reindex recommendation logic (priority order) --- |
| 1047 | + reindex_recommended = False |
| 1048 | + reindex_reason = None |
| 1049 | + |
| 1050 | + if total_indexed_entities > 0 and total_chunks == 0: |
| 1051 | + reindex_recommended = True |
| 1052 | + reindex_reason = ( |
| 1053 | + "Embeddings have never been built — run: bm reindex --embeddings" |
| 1054 | + ) |
| 1055 | + elif orphaned_chunks > 0: |
| 1056 | + reindex_recommended = True |
| 1057 | + reindex_reason = ( |
| 1058 | + f"{orphaned_chunks} orphaned chunks found (interrupted indexing) " |
| 1059 | + "— run: bm reindex --embeddings" |
| 1060 | + ) |
| 1061 | + elif total_indexed_entities > total_entities_with_chunks: |
| 1062 | + missing = total_indexed_entities - total_entities_with_chunks |
| 1063 | + reindex_recommended = True |
| 1064 | + reindex_reason = ( |
| 1065 | + f"{missing} entities missing embeddings — run: bm reindex --embeddings" |
| 1066 | + ) |
| 1067 | + |
| 1068 | + return EmbeddingStatus( |
| 1069 | + semantic_search_enabled=True, |
| 1070 | + embedding_provider=provider, |
| 1071 | + embedding_model=model, |
| 1072 | + embedding_dimensions=dimensions, |
| 1073 | + total_indexed_entities=total_indexed_entities, |
| 1074 | + total_entities_with_chunks=total_entities_with_chunks, |
| 1075 | + total_chunks=total_chunks, |
| 1076 | + total_embeddings=total_embeddings, |
| 1077 | + orphaned_chunks=orphaned_chunks, |
| 1078 | + vector_tables_exist=True, |
| 1079 | + reindex_recommended=reindex_recommended, |
| 1080 | + reindex_reason=reindex_reason, |
| 1081 | + ) |
| 1082 | + |
921 | 1083 | def get_system_status(self) -> SystemStatus: |
922 | 1084 | """Get system status information.""" |
923 | 1085 | import basic_memory |
|
0 commit comments