@@ -10,7 +10,6 @@ import (
1010 "time"
1111
1212 "github.com/blevesearch/bleve/v2"
13- "github.com/blevesearch/bleve/v2/analysis/lang/en"
1413 "github.com/blevesearch/bleve/v2/mapping"
1514 "github.com/blevesearch/bleve/v2/search/query"
1615 "github.com/gabriel-vasile/mimetype"
@@ -158,13 +157,14 @@ func (si *SearchIndexer) cleanup() {
158157func (si * SearchIndexer ) createIndexMapping () mapping.IndexMapping {
159158 docMapping := bleve .NewDocumentMapping ()
160159
161- // Text fields with standard analyzer
160+ // Text fields with standard analyzer (better for mixed content including numbers)
161+ // Standard analyzer doesn't do aggressive stemming like en analyzer
162162 textField := bleve .NewTextFieldMapping ()
163- textField .Analyzer = en . AnalyzerName
163+ textField .Analyzer = "standard"
164164 textField .Store = true
165165 textField .Index = true
166166
167- // Keyword fields for exact match
167+ // Keyword fields for exact match (no analysis, exact term matching)
168168 keywordField := bleve .NewKeywordFieldMapping ()
169169 keywordField .Store = true
170170 keywordField .Index = true
@@ -179,8 +179,8 @@ func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
179179 "id" : keywordField ,
180180 "type" : keywordField ,
181181 "path" : keywordField ,
182- "name" : textField ,
183- "content" : textField ,
182+ "name" : textField , // Use text field with standard analyzer
183+ "content" : textField , // Use text field with standard analyzer
184184 "updated_at" : dateField ,
185185 }
186186
@@ -190,7 +190,7 @@ func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
190190
191191 indexMapping := bleve .NewIndexMapping ()
192192 indexMapping .DefaultMapping = docMapping
193- indexMapping .DefaultAnalyzer = en . AnalyzerName
193+ indexMapping .DefaultAnalyzer = "standard"
194194
195195 return indexMapping
196196}
@@ -365,6 +365,26 @@ func (si *SearchIndexer) searchWithType(ctx context.Context, queryStr string, do
365365 }
366366}
367367
368+ // isNumericQuery checks if the query string is primarily numeric
369+ // This helps us apply different search strategies for numbers vs text
370+ func isNumericQuery (queryStr string ) bool {
371+ if len (queryStr ) == 0 {
372+ return false
373+ }
374+
375+ // Count numeric characters
376+ numericCount := 0
377+ for _ , ch := range queryStr {
378+ if ch >= '0' && ch <= '9' {
379+ numericCount ++
380+ }
381+ }
382+
383+ // If more than 50% of characters are digits, treat as numeric query
384+ // This handles cases like "9005", "port:9005", "192.168.1.1", etc.
385+ return float64 (numericCount )/ float64 (len (queryStr )) > 0.5
386+ }
387+
368388// buildQuery builds a search query with optional type filtering
369389func (si * SearchIndexer ) buildQuery (queryStr string , docType string ) query.Query {
370390 mainQuery := bleve .NewBooleanQuery ()
@@ -376,6 +396,9 @@ func (si *SearchIndexer) buildQuery(queryStr string, docType string) query.Query
376396 mainQuery .AddMust (typeQuery )
377397 }
378398
399+ // Determine if this is a numeric query
400+ isNumeric := isNumericQuery (queryStr )
401+
379402 // Add text search across name and content fields only
380403 textQuery := bleve .NewBooleanQuery ()
381404 searchFields := []string {"name" , "content" }
@@ -384,30 +407,62 @@ func (si *SearchIndexer) buildQuery(queryStr string, docType string) query.Query
384407 // Create a boolean query for this field to combine multiple query types
385408 fieldQuery := bleve .NewBooleanQuery ()
386409
387- // 1. Exact match query (highest priority)
388- matchQuery := bleve .NewMatchQuery (queryStr )
389- matchQuery .SetField (field )
390- matchQuery .SetBoost (3.0 ) // Higher boost for exact matches
391- fieldQuery .AddShould (matchQuery )
392-
393- // 2. Prefix query for partial matches (e.g., "access" matches "access_log")
394- prefixQuery := bleve .NewPrefixQuery (queryStr )
395- prefixQuery .SetField (field )
396- prefixQuery .SetBoost (2.0 ) // Medium boost for prefix matches
397- fieldQuery .AddShould (prefixQuery )
398-
399- // 3. Wildcard query for more flexible matching
400- wildcardQuery := bleve .NewWildcardQuery ("*" + queryStr + "*" )
401- wildcardQuery .SetField (field )
402- wildcardQuery .SetBoost (1.5 ) // Lower boost for wildcard matches
403- fieldQuery .AddShould (wildcardQuery )
404-
405- // 4. Fuzzy match query (allows 1 character difference)
406- fuzzyQuery := bleve .NewFuzzyQuery (queryStr )
407- fuzzyQuery .SetField (field )
408- fuzzyQuery .SetFuzziness (1 )
409- fuzzyQuery .SetBoost (1.0 ) // Lowest boost for fuzzy matches
410- fieldQuery .AddShould (fuzzyQuery )
410+ if isNumeric {
411+ // Numeric query strategy: prioritize exact matches and prefix matches
412+ // Avoid fuzzy matching to prevent false positives
413+
414+ // 1. Term query for exact token match (highest priority for numbers)
415+ termQuery := bleve .NewTermQuery (queryStr )
416+ termQuery .SetField (field )
417+ termQuery .SetBoost (10.0 ) // Highest boost for exact term matches
418+ fieldQuery .AddShould (termQuery )
419+
420+ // 2. Prefix query for partial matches (e.g., "9005" matches "90051234")
421+ prefixQuery := bleve .NewPrefixQuery (queryStr )
422+ prefixQuery .SetField (field )
423+ prefixQuery .SetBoost (5.0 ) // High boost for prefix matches
424+ fieldQuery .AddShould (prefixQuery )
425+
426+ // 3. Wildcard query for substring matching (e.g., "9005" in "listen 9005;")
427+ wildcardQuery := bleve .NewWildcardQuery ("*" + queryStr + "*" )
428+ wildcardQuery .SetField (field )
429+ wildcardQuery .SetBoost (2.0 ) // Lower boost for wildcard matches
430+ fieldQuery .AddShould (wildcardQuery )
431+
432+ } else {
433+ // Text query strategy: more flexible matching with fuzzy support
434+
435+ // 1. Term query for exact token match (highest priority)
436+ termQuery := bleve .NewTermQuery (strings .ToLower (queryStr ))
437+ termQuery .SetField (field )
438+ termQuery .SetBoost (8.0 ) // High boost for exact matches
439+ fieldQuery .AddShould (termQuery )
440+
441+ // 2. Match query for analyzed text search (handles case-insensitive, etc.)
442+ matchQuery := bleve .NewMatchQuery (queryStr )
443+ matchQuery .SetField (field )
444+ matchQuery .SetBoost (4.0 ) // Medium-high boost for match queries
445+ fieldQuery .AddShould (matchQuery )
446+
447+ // 3. Prefix query for partial matches (e.g., "access" matches "access_log")
448+ prefixQuery := bleve .NewPrefixQuery (strings .ToLower (queryStr ))
449+ prefixQuery .SetField (field )
450+ prefixQuery .SetBoost (3.0 ) // Medium boost for prefix matches
451+ fieldQuery .AddShould (prefixQuery )
452+
453+ // 4. Wildcard query for more flexible matching
454+ wildcardQuery := bleve .NewWildcardQuery ("*" + strings .ToLower (queryStr ) + "*" )
455+ wildcardQuery .SetField (field )
456+ wildcardQuery .SetBoost (2.0 ) // Lower boost for wildcard matches
457+ fieldQuery .AddShould (wildcardQuery )
458+
459+ // 5. Fuzzy match query (allows 1 character difference) - only for text queries
460+ fuzzyQuery := bleve .NewFuzzyQuery (queryStr )
461+ fuzzyQuery .SetField (field )
462+ fuzzyQuery .SetFuzziness (1 )
463+ fuzzyQuery .SetBoost (1.0 ) // Lowest boost for fuzzy matches
464+ fieldQuery .AddShould (fuzzyQuery )
465+ }
411466
412467 textQuery .AddShould (fieldQuery )
413468 }
0 commit comments