@@ -120,23 +120,82 @@ async def init_search_index(self):
120120 logger .error (f"Error initializing search index: { e } " )
121121 raise e
122122
123- def _prepare_search_term (self , term : str , is_prefix : bool = True ) -> str :
124- """Prepare a search term for FTS5 query .
125-
123+ def _prepare_boolean_query (self , query : str ) -> str :
124+ """Prepare a Boolean query by quoting individual terms while preserving operators .
125+
126126 Args:
127- term: The search term to prepare
127+ query: A Boolean query like "tier1-test AND unicode" or "(hello OR world) NOT test"
128+
129+ Returns:
130+ A properly formatted Boolean query with quoted terms that need quoting
131+ """
132+ import re
133+
134+ # Define Boolean operators and their boundaries
135+ boolean_pattern = r'\b(AND|OR|NOT)\b'
136+
137+ # Split the query by Boolean operators, keeping the operators
138+ parts = re .split (f'({ boolean_pattern } )' , query )
139+
140+ processed_parts = []
141+ for part in parts :
142+ part = part .strip ()
143+ if not part :
144+ continue
145+
146+ # If it's a Boolean operator, keep it as is
147+ if re .match (boolean_pattern , part ):
148+ processed_parts .append (part )
149+ else :
150+ # This is a search term (may include parentheses)
151+ # Handle parentheses separately
152+ if part .startswith ('(' ) and part .endswith (')' ):
153+ # Extract the term inside parentheses
154+ inner_term = part [1 :- 1 ].strip ()
155+ # Recursively process the inner term if it contains Boolean operators
156+ if any (op in f" { inner_term } " for op in [" AND " , " OR " , " NOT " ]):
157+ processed_inner = self ._prepare_boolean_query (inner_term )
158+ processed_parts .append (f"({ processed_inner } )" )
159+ else :
160+ # Single term in parentheses - for Boolean queries, don't add prefix wildcards
161+ prepared_term = self ._prepare_single_term (inner_term , is_prefix = False )
162+ processed_parts .append (f"({ prepared_term } )" )
163+ elif part .startswith ('(' ):
164+ # Opening parenthesis with term - for Boolean queries, don't add prefix wildcards
165+ paren_match = re .match (r'\((.+)' , part )
166+ if paren_match :
167+ inner_term = paren_match .group (1 ).strip ()
168+ prepared_term = self ._prepare_single_term (inner_term , is_prefix = False )
169+ processed_parts .append (f"({ prepared_term } " )
170+ elif part .endswith (')' ):
171+ # Closing parenthesis with term - for Boolean queries, don't add prefix wildcards
172+ paren_match = re .match (r'(.+)\)' , part )
173+ if paren_match :
174+ inner_term = paren_match .group (1 ).strip ()
175+ prepared_term = self ._prepare_single_term (inner_term , is_prefix = False )
176+ processed_parts .append (f"{ prepared_term } )" )
177+ else :
178+ # Regular term - for Boolean queries, don't add prefix wildcards
179+ prepared_term = self ._prepare_single_term (part , is_prefix = False )
180+ processed_parts .append (prepared_term )
181+
182+ return " " .join (processed_parts )
183+
184+ def _prepare_single_term (self , term : str , is_prefix : bool = True ) -> str :
185+ """Prepare a single search term (no Boolean operators).
186+
187+ Args:
188+ term: A single search term
128189 is_prefix: Whether to add prefix search capability (* suffix)
129-
130- For FTS5:
131- - Boolean operators (AND, OR, NOT) are preserved for complex queries
132- - Terms with FTS5 special characters are quoted to prevent syntax errors
133- - Simple terms get prefix wildcards for better matching
190+
191+ Returns:
192+ A properly formatted single term
134193 """
135- # Check for explicit boolean operators - if present, return the term as is
136- boolean_operators = [" AND " , " OR " , " NOT " ]
137- if any (op in f" { term } " for op in boolean_operators ):
194+ if not term or not term .strip ():
138195 return term
139-
196+
197+ term = term .strip ()
198+
140199 # Check if term is already a proper wildcard pattern (alphanumeric + *)
141200 # e.g., "hello*", "test*world" - these should be left alone
142201 if "*" in term and all (c .isalnum () or c in "*_-" for c in term ):
@@ -218,6 +277,26 @@ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
218277
219278 return term
220279
280+ def _prepare_search_term (self , term : str , is_prefix : bool = True ) -> str :
281+ """Prepare a search term for FTS5 query.
282+
283+ Args:
284+ term: The search term to prepare
285+ is_prefix: Whether to add prefix search capability (* suffix)
286+
287+ For FTS5:
288+ - Boolean operators (AND, OR, NOT) are preserved for complex queries
289+ - Terms with FTS5 special characters are quoted to prevent syntax errors
290+ - Simple terms get prefix wildcards for better matching
291+ """
292+ # Check for explicit boolean operators - if present, process as Boolean query
293+ boolean_operators = [" AND " , " OR " , " NOT " ]
294+ if any (op in f" { term } " for op in boolean_operators ):
295+ return self ._prepare_boolean_query (term )
296+
297+ # For non-Boolean queries, use the single term preparation logic
298+ return self ._prepare_single_term (term , is_prefix )
299+
221300 async def search (
222301 self ,
223302 search_text : Optional [str ] = None ,
@@ -242,19 +321,10 @@ async def search(
242321 # For wildcard searches, don't add any text conditions - return all results
243322 pass
244323 else :
245- # Check for explicit boolean operators - only detect them in proper boolean contexts
246- has_boolean = any (op in f" { search_text } " for op in [" AND " , " OR " , " NOT " ])
247-
248- if has_boolean :
249- # If boolean operators are present, use the raw query
250- # No need to prepare it, FTS5 will understand the operators
251- params ["text" ] = search_text
252- conditions .append ("(title MATCH :text OR content_stems MATCH :text)" )
253- else :
254- # Standard search with term preparation
255- processed_text = self ._prepare_search_term (search_text .strip ())
256- params ["text" ] = processed_text
257- conditions .append ("(title MATCH :text OR content_stems MATCH :text)" )
324+ # Use _prepare_search_term to handle both Boolean and non-Boolean queries
325+ processed_text = self ._prepare_search_term (search_text .strip ())
326+ params ["text" ] = processed_text
327+ conditions .append ("(title MATCH :text OR content_stems MATCH :text)" )
258328
259329 # Handle title match search
260330 if title :
0 commit comments