Skip to content

Commit ceec789

Browse files
authored
Merge pull request #4540 from ClickHouse/fix-search-ranking
trying again, after reverts. tested against index locally.
2 parents 48b69be + 1c16c3a commit ceec789

File tree

3 files changed

+30
-8
lines changed

3 files changed

+30
-8
lines changed

scripts/search/index_pages.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@
2525
link_data = []
2626

2727

28+
def get_doc_type_rank(doc_type):
29+
"""Return numeric rank for doc_type to use in Algolia customRanking."""
30+
ranks = {
31+
'guide': 3,
32+
'reference': 3,
33+
'changelog': 1,
34+
'landing_page': 1
35+
}
36+
return ranks.get(doc_type, 2) # Default to 2 for unspecified types
37+
38+
2839
def split_url_and_anchor(url):
2940
parsed_url = urlparse(url)
3041
url_without_anchor = urlunparse(parsed_url._replace(fragment=""))
@@ -39,7 +50,11 @@ def read_metadata(text):
3950
parts = part.split(":")
4051
if len(parts) == 2:
4152
if parts[0] in ['title', 'description', 'slug', 'keywords', 'score', 'doc_type']:
42-
metadata[parts[0]] = int(parts[1].strip()) if parts[0] == 'score' else parts[1].strip()
53+
value = parts[1].strip()
54+
# Strip quotes only from doc_type
55+
if parts[0] == 'doc_type':
56+
value = value.strip("'\"")
57+
metadata[parts[0]] = int(value) if parts[0] == 'score' else value
4358
return metadata
4459

4560

@@ -249,7 +264,8 @@ def parse_markdown_content(metadata, content, base_url):
249264
'lvl1': current_h1
250265
},
251266
'score': metadata.get('score', 0),
252-
'doc_type': metadata.get('doc_type', '')
267+
'doc_type': metadata.get('doc_type', ''),
268+
'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
253269
}
254270
for line in lines:
255271
if line.startswith('# '):
@@ -294,7 +310,8 @@ def parse_markdown_content(metadata, content, base_url):
294310
'lvl1': current_h1,
295311
'lvl2': current_h2,
296312
},
297-
'doc_type': metadata.get('doc_type', '')
313+
'doc_type': metadata.get('doc_type', ''),
314+
'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
298315
}
299316
elif line.startswith('### '):
300317
# note we send users to the h2 or h1 even on ###
@@ -324,7 +341,8 @@ def parse_markdown_content(metadata, content, base_url):
324341
'lvl2': current_h2,
325342
'lvl3': current_h3,
326343
},
327-
'doc_type': metadata.get('doc_type', '')
344+
'doc_type': metadata.get('doc_type', ''),
345+
'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
328346
}
329347
elif line.startswith('#### '):
330348
if current_subdoc:
@@ -351,7 +369,8 @@ def parse_markdown_content(metadata, content, base_url):
351369
'lvl3': current_h3,
352370
'lvl4': current_h4,
353371
},
354-
'doc_type': metadata.get('doc_type', '')
372+
'doc_type': metadata.get('doc_type', ''),
373+
'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
355374
}
356375
elif current_subdoc:
357376
current_subdoc['content'] += line + '\n'
@@ -453,6 +472,7 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name,
453472
print(f"URL: {sample_record.get('url', 'N/A')}")
454473
print(f"Type: {sample_record.get('type', 'N/A')}")
455474
print(f"Doc Type: {sample_record.get('doc_type', 'N/A')}")
475+
print(f"Doc Type Rank: {sample_record.get('doc_type_rank', 'N/A')}")
456476
print(f"Keywords: {sample_record.get('keywords', 'N/A')}")
457477
print("--- End sample ---\n")
458478
print(f"{'processed' if dry_run else 'indexed'} {len(batch)} records")

scripts/search/settings.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
"url_without_anchor",
2323
"type",
2424
"title",
25-
"doc_type"
25+
"doc_type",
26+
"doc_type_rank"
2627
],
2728
"camelCaseAttributes": [
2829
"h1",
@@ -53,7 +54,7 @@
5354
"an"
5455
],
5556
"attributesForFaceting": [
56-
"doc_type"
57+
"filterOnly(doc_type)"
5758
],
5859
"attributesToSnippet": [
5960
"content:15",
@@ -83,6 +84,7 @@
8384
"custom"
8485
],
8586
"customRanking": [
87+
"desc(doc_type_rank)",
8688
"desc(score)",
8789
"desc(page_rank)"
8890
],

src/theme/SearchBar/utils/searchConfig.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export const createDocTypeFilters = (docTypes) => {
88
if (!docTypes) return [];
99

1010
const types = Array.isArray(docTypes) ? docTypes : [docTypes];
11-
return types.map(type => `doc_type:'${type}'`);
11+
return types.map(type => `doc_type:${type}`);
1212
};
1313

1414
/**

0 commit comments

Comments
 (0)