Merge pull request #4539 from ClickHouse/revert-4538-revert-4520-revert-4519-fix-search-ranking

Blargian · web-flow · commit 48b69be6e65f · 2025-10-07T16:36:23.000+02:00
Revert "adding doc_type ranking so references and guides appear first"""
diff --git a/scripts/search/index_pages.py b/scripts/search/index_pages.py
@@ -25,17 +25,6 @@
 link_data = []
 
 
-def get_doc_type_rank(doc_type):
-    """Return numeric rank for doc_type to use in Algolia customRanking."""
-    ranks = {
-        'guide': 3,
-        'reference': 3,
-        'changelog': 1,
-        'landing_page': 1
-    }
-    return ranks.get(doc_type, 2)  # Default to 2 for unspecified types
-
-
 def split_url_and_anchor(url):
     parsed_url = urlparse(url)
     url_without_anchor = urlunparse(parsed_url._replace(fragment=""))
@@ -50,11 +39,7 @@ def read_metadata(text):
         parts = part.split(":")
         if len(parts) == 2:
             if parts[0] in ['title', 'description', 'slug', 'keywords', 'score', 'doc_type']:
-                value = parts[1].strip()
-                # Strip quotes only from doc_type
-                if parts[0] == 'doc_type':
-                    value = value.strip("'\"")
-                metadata[parts[0]] = int(value) if parts[0] == 'score' else value
+                metadata[parts[0]] = int(parts[1].strip()) if parts[0] == 'score' else parts[1].strip()
     return metadata
 
 
@@ -264,8 +249,7 @@ def parse_markdown_content(metadata, content, base_url):
             'lvl1': current_h1
         },
         'score': metadata.get('score', 0),
-        'doc_type': metadata.get('doc_type', ''),
-        'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
+        'doc_type': metadata.get('doc_type', '')
     }
     for line in lines:
         if line.startswith('# '):
@@ -310,8 +294,7 @@ def parse_markdown_content(metadata, content, base_url):
                     'lvl1': current_h1,
                     'lvl2': current_h2,
                 },
-                'doc_type': metadata.get('doc_type', ''),
-                'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
+                'doc_type': metadata.get('doc_type', '')
             }
         elif line.startswith('### '):
             # note we send users to the h2 or h1 even on ###
@@ -341,8 +324,7 @@ def parse_markdown_content(metadata, content, base_url):
                     'lvl2': current_h2,
                     'lvl3': current_h3,
                 },
-                'doc_type': metadata.get('doc_type', ''),
-                'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
+                'doc_type': metadata.get('doc_type', '')
             }
         elif line.startswith('#### '):
             if current_subdoc:
@@ -369,8 +351,7 @@ def parse_markdown_content(metadata, content, base_url):
                     'lvl3': current_h3,
                     'lvl4': current_h4,
                 },
-                'doc_type': metadata.get('doc_type', ''),
-                'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', ''))
+                'doc_type': metadata.get('doc_type', '')
             }
         elif current_subdoc:
             current_subdoc['content'] += line + '\n'
@@ -472,7 +453,6 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name,
                 print(f"URL: {sample_record.get('url', 'N/A')}")
                 print(f"Type: {sample_record.get('type', 'N/A')}")
                 print(f"Doc Type: {sample_record.get('doc_type', 'N/A')}")
-                print(f"Doc Type Rank: {sample_record.get('doc_type_rank', 'N/A')}")
                 print(f"Keywords: {sample_record.get('keywords', 'N/A')}")
                 print("--- End sample ---\n")
         print(f"{'processed' if dry_run else 'indexed'} {len(batch)} records")
diff --git a/scripts/search/settings.json b/scripts/search/settings.json
@@ -22,8 +22,7 @@
       "url_without_anchor",
       "type",
       "title",
-      "doc_type",
-      "doc_type_rank"
+      "doc_type"
     ],
     "camelCaseAttributes": [
       "h1",
@@ -54,7 +53,7 @@
       "an"
     ],
     "attributesForFaceting": [
-      "filterOnly(doc_type)"
+      "doc_type"
     ],
     "attributesToSnippet": [
       "content:15",
@@ -84,7 +83,6 @@
       "custom"
     ],
     "customRanking": [
-      "desc(doc_type_rank)",
       "desc(score)",
       "desc(page_rank)"
     ],
diff --git a/src/theme/SearchBar/utils/searchConfig.js b/src/theme/SearchBar/utils/searchConfig.js
@@ -8,7 +8,7 @@ export const createDocTypeFilters = (docTypes) => {
   if (!docTypes) return [];
   
   const types = Array.isArray(docTypes) ? docTypes : [docTypes];
-  return types.map(type => `doc_type:${type}`);
+  return types.map(type => `doc_type:'${type}'`);
 };
 
 /**