Skip to content

Commit 9a62507

Browse files
committed
enhance(config-list): content filter #1413
1 parent cfb6cae commit 9a62507

File tree

3 files changed

+227
-32
lines changed

3 files changed

+227
-32
lines changed

internal/cache/search.go

Lines changed: 86 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"time"
1111

1212
"github.com/blevesearch/bleve/v2"
13-
"github.com/blevesearch/bleve/v2/analysis/lang/en"
1413
"github.com/blevesearch/bleve/v2/mapping"
1514
"github.com/blevesearch/bleve/v2/search/query"
1615
"github.com/gabriel-vasile/mimetype"
@@ -158,13 +157,14 @@ func (si *SearchIndexer) cleanup() {
158157
func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
159158
docMapping := bleve.NewDocumentMapping()
160159

161-
// Text fields with standard analyzer
160+
// Text fields with standard analyzer (better for mixed content including numbers)
161+
// Standard analyzer doesn't do aggressive stemming like en analyzer
162162
textField := bleve.NewTextFieldMapping()
163-
textField.Analyzer = en.AnalyzerName
163+
textField.Analyzer = "standard"
164164
textField.Store = true
165165
textField.Index = true
166166

167-
// Keyword fields for exact match
167+
// Keyword fields for exact match (no analysis, exact term matching)
168168
keywordField := bleve.NewKeywordFieldMapping()
169169
keywordField.Store = true
170170
keywordField.Index = true
@@ -179,8 +179,8 @@ func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
179179
"id": keywordField,
180180
"type": keywordField,
181181
"path": keywordField,
182-
"name": textField,
183-
"content": textField,
182+
"name": textField, // Use text field with standard analyzer
183+
"content": textField, // Use text field with standard analyzer
184184
"updated_at": dateField,
185185
}
186186

@@ -190,7 +190,7 @@ func (si *SearchIndexer) createIndexMapping() mapping.IndexMapping {
190190

191191
indexMapping := bleve.NewIndexMapping()
192192
indexMapping.DefaultMapping = docMapping
193-
indexMapping.DefaultAnalyzer = en.AnalyzerName
193+
indexMapping.DefaultAnalyzer = "standard"
194194

195195
return indexMapping
196196
}
@@ -365,6 +365,26 @@ func (si *SearchIndexer) searchWithType(ctx context.Context, queryStr string, do
365365
}
366366
}
367367

368+
// isNumericQuery checks if the query string is primarily numeric
369+
// This helps us apply different search strategies for numbers vs text
370+
func isNumericQuery(queryStr string) bool {
371+
if len(queryStr) == 0 {
372+
return false
373+
}
374+
375+
// Count numeric characters
376+
numericCount := 0
377+
for _, ch := range queryStr {
378+
if ch >= '0' && ch <= '9' {
379+
numericCount++
380+
}
381+
}
382+
383+
// If more than 50% of characters are digits, treat as numeric query
384+
// This handles cases like "9005", "port:9005", "192.168.1.1", etc.
385+
return float64(numericCount)/float64(len(queryStr)) > 0.5
386+
}
387+
368388
// buildQuery builds a search query with optional type filtering
369389
func (si *SearchIndexer) buildQuery(queryStr string, docType string) query.Query {
370390
mainQuery := bleve.NewBooleanQuery()
@@ -376,6 +396,9 @@ func (si *SearchIndexer) buildQuery(queryStr string, docType string) query.Query
376396
mainQuery.AddMust(typeQuery)
377397
}
378398

399+
// Determine if this is a numeric query
400+
isNumeric := isNumericQuery(queryStr)
401+
379402
// Add text search across name and content fields only
380403
textQuery := bleve.NewBooleanQuery()
381404
searchFields := []string{"name", "content"}
@@ -384,30 +407,62 @@ func (si *SearchIndexer) buildQuery(queryStr string, docType string) query.Query
384407
// Create a boolean query for this field to combine multiple query types
385408
fieldQuery := bleve.NewBooleanQuery()
386409

387-
// 1. Exact match query (highest priority)
388-
matchQuery := bleve.NewMatchQuery(queryStr)
389-
matchQuery.SetField(field)
390-
matchQuery.SetBoost(3.0) // Higher boost for exact matches
391-
fieldQuery.AddShould(matchQuery)
392-
393-
// 2. Prefix query for partial matches (e.g., "access" matches "access_log")
394-
prefixQuery := bleve.NewPrefixQuery(queryStr)
395-
prefixQuery.SetField(field)
396-
prefixQuery.SetBoost(2.0) // Medium boost for prefix matches
397-
fieldQuery.AddShould(prefixQuery)
398-
399-
// 3. Wildcard query for more flexible matching
400-
wildcardQuery := bleve.NewWildcardQuery("*" + queryStr + "*")
401-
wildcardQuery.SetField(field)
402-
wildcardQuery.SetBoost(1.5) // Lower boost for wildcard matches
403-
fieldQuery.AddShould(wildcardQuery)
404-
405-
// 4. Fuzzy match query (allows 1 character difference)
406-
fuzzyQuery := bleve.NewFuzzyQuery(queryStr)
407-
fuzzyQuery.SetField(field)
408-
fuzzyQuery.SetFuzziness(1)
409-
fuzzyQuery.SetBoost(1.0) // Lowest boost for fuzzy matches
410-
fieldQuery.AddShould(fuzzyQuery)
410+
if isNumeric {
411+
// Numeric query strategy: prioritize exact matches and prefix matches
412+
// Avoid fuzzy matching to prevent false positives
413+
414+
// 1. Term query for exact token match (highest priority for numbers)
415+
termQuery := bleve.NewTermQuery(queryStr)
416+
termQuery.SetField(field)
417+
termQuery.SetBoost(10.0) // Highest boost for exact term matches
418+
fieldQuery.AddShould(termQuery)
419+
420+
// 2. Prefix query for partial matches (e.g., "9005" matches "90051234")
421+
prefixQuery := bleve.NewPrefixQuery(queryStr)
422+
prefixQuery.SetField(field)
423+
prefixQuery.SetBoost(5.0) // High boost for prefix matches
424+
fieldQuery.AddShould(prefixQuery)
425+
426+
// 3. Wildcard query for substring matching (e.g., "9005" in "listen 9005;")
427+
wildcardQuery := bleve.NewWildcardQuery("*" + queryStr + "*")
428+
wildcardQuery.SetField(field)
429+
wildcardQuery.SetBoost(2.0) // Lower boost for wildcard matches
430+
fieldQuery.AddShould(wildcardQuery)
431+
432+
} else {
433+
// Text query strategy: more flexible matching with fuzzy support
434+
435+
// 1. Term query for exact token match (highest priority)
436+
termQuery := bleve.NewTermQuery(strings.ToLower(queryStr))
437+
termQuery.SetField(field)
438+
termQuery.SetBoost(8.0) // High boost for exact matches
439+
fieldQuery.AddShould(termQuery)
440+
441+
// 2. Match query for analyzed text search (handles case-insensitive, etc.)
442+
matchQuery := bleve.NewMatchQuery(queryStr)
443+
matchQuery.SetField(field)
444+
matchQuery.SetBoost(4.0) // Medium-high boost for match queries
445+
fieldQuery.AddShould(matchQuery)
446+
447+
// 3. Prefix query for partial matches (e.g., "access" matches "access_log")
448+
prefixQuery := bleve.NewPrefixQuery(strings.ToLower(queryStr))
449+
prefixQuery.SetField(field)
450+
prefixQuery.SetBoost(3.0) // Medium boost for prefix matches
451+
fieldQuery.AddShould(prefixQuery)
452+
453+
// 4. Wildcard query for more flexible matching
454+
wildcardQuery := bleve.NewWildcardQuery("*" + strings.ToLower(queryStr) + "*")
455+
wildcardQuery.SetField(field)
456+
wildcardQuery.SetBoost(2.0) // Lower boost for wildcard matches
457+
fieldQuery.AddShould(wildcardQuery)
458+
459+
// 5. Fuzzy match query (allows 1 character difference) - only for text queries
460+
fuzzyQuery := bleve.NewFuzzyQuery(queryStr)
461+
fuzzyQuery.SetField(field)
462+
fuzzyQuery.SetFuzziness(1)
463+
fuzzyQuery.SetBoost(1.0) // Lowest boost for fuzzy matches
464+
fieldQuery.AddShould(fuzzyQuery)
465+
}
411466

412467
textQuery.AddShould(fieldQuery)
413468
}

internal/cache/search_test.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
package cache
2+
3+
import (
4+
"testing"
5+
)
6+
7+
// TestIsNumericQuery tests the isNumericQuery function
8+
func TestIsNumericQuery(t *testing.T) {
9+
tests := []struct {
10+
name string
11+
query string
12+
expected bool
13+
}{
14+
{
15+
name: "Pure number",
16+
query: "9005",
17+
expected: true,
18+
},
19+
{
20+
name: "Port with colon",
21+
query: ":9005",
22+
expected: true, // 4/5 = 80% are digits
23+
},
24+
{
25+
name: "IP address",
26+
query: "192.168.1.1",
27+
expected: true, // 9/11 = 81% are digits
28+
},
29+
{
30+
name: "Pure text",
31+
query: "nginx",
32+
expected: false,
33+
},
34+
{
35+
name: "Mixed with mostly text",
36+
query: "server9005",
37+
expected: false, // 4/10 = 40% are digits
38+
},
39+
{
40+
name: "Mixed with mostly numbers",
41+
query: "9005server",
42+
expected: false, // 4/10 = 40% are digits
43+
},
44+
{
45+
name: "Port number",
46+
query: "8080",
47+
expected: true,
48+
},
49+
{
50+
name: "Version number",
51+
query: "v1.2.3",
52+
expected: false, // 3/6 = 50% exactly, not > 50%
53+
},
54+
{
55+
name: "Empty string",
56+
query: "",
57+
expected: false,
58+
},
59+
}
60+
61+
for _, tt := range tests {
62+
t.Run(tt.name, func(t *testing.T) {
63+
result := isNumericQuery(tt.query)
64+
if result != tt.expected {
65+
t.Errorf("isNumericQuery(%q) = %v, want %v", tt.query, result, tt.expected)
66+
}
67+
})
68+
}
69+
}
70+
71+
// TestBuildQuery tests the buildQuery function structure
72+
func TestBuildQuery(t *testing.T) {
73+
indexer := &SearchIndexer{}
74+
75+
tests := []struct {
76+
name string
77+
query string
78+
docType string
79+
validate func(t *testing.T, query interface{})
80+
}{
81+
{
82+
name: "Numeric query",
83+
query: "9005",
84+
docType: "",
85+
validate: func(t *testing.T, query interface{}) {
86+
if query == nil {
87+
t.Error("Expected non-nil query")
88+
}
89+
// The query should be built with numeric strategy
90+
// which prioritizes exact matches
91+
},
92+
},
93+
{
94+
name: "Text query",
95+
query: "nginx",
96+
docType: "",
97+
validate: func(t *testing.T, query interface{}) {
98+
if query == nil {
99+
t.Error("Expected non-nil query")
100+
}
101+
// The query should be built with text strategy
102+
// which includes fuzzy matching
103+
},
104+
},
105+
{
106+
name: "Numeric query with type filter",
107+
query: "9005",
108+
docType: "site",
109+
validate: func(t *testing.T, query interface{}) {
110+
if query == nil {
111+
t.Error("Expected non-nil query")
112+
}
113+
// The query should include type filter
114+
},
115+
},
116+
}
117+
118+
for _, tt := range tests {
119+
t.Run(tt.name, func(t *testing.T) {
120+
query := indexer.buildQuery(tt.query, tt.docType)
121+
tt.validate(t, query)
122+
})
123+
}
124+
}
125+
126+
// TestSearchStrategyDifference ensures numeric and text queries use different strategies
127+
func TestSearchStrategyDifference(t *testing.T) {
128+
// Test that numeric queries don't use fuzzy matching
129+
numericQuery := "9005"
130+
if !isNumericQuery(numericQuery) {
131+
t.Error("Expected '9005' to be detected as numeric")
132+
}
133+
134+
// Test that text queries do use fuzzy matching
135+
textQuery := "nginx"
136+
if isNumericQuery(textQuery) {
137+
t.Error("Expected 'nginx' to be detected as text")
138+
}
139+
}
140+

internal/config/generic_list.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ func SiteStatusMapBuilder(maintenanceSuffix string) StatusMapBuilder {
272272
// DefaultFilterMatcher provides the standard filtering logic with name search
273273
func DefaultFilterMatcher(fileName string, status Status, namespaceID uint64, options *GenericListOptions) bool {
274274
// Exact name matching
275-
if options.Name != "" && fileName != options.Name {
275+
if options.Name != "" && !strings.Contains(fileName, options.Name) {
276276
return false
277277
}
278278
if options.Status != "" && status != Status(options.Status) {

0 commit comments

Comments
 (0)