Skip to content

Commit 3825267

Browse files
authored
Merge pull request #4468 from ClickHouse/search-filtering
search filtering by doc_type
2 parents 0033e5d + 0f6f791 commit 3825267

File tree

6 files changed

+207
-60
lines changed

6 files changed

+207
-60
lines changed

scripts/search/index_pages.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def read_metadata(text):
3838
for part in parts:
3939
parts = part.split(":")
4040
if len(parts) == 2:
41-
if parts[0] in ['title', 'description', 'slug', 'keywords', 'score']:
41+
if parts[0] in ['title', 'description', 'slug', 'keywords', 'score', 'doc_type']:
4242
metadata[parts[0]] = int(parts[1].strip()) if parts[0] == 'score' else parts[1].strip()
4343
return metadata
4444

@@ -215,12 +215,12 @@ def update_page_links(directory, base_directory, page_path, url, content, base_u
215215
c_page = os.path.abspath(os.path.join(os.path.dirname(page_path), './' + target))
216216
metadata, _ = parse_metadata_and_content(directory, base_directory, c_page, log_snippet_failure=False)
217217
if 'slug' in metadata:
218-
link_data.append((url, f'{base_url}{metadata.get('slug')}'))
218+
link_data.append((url, f"{base_url}{metadata.get('slug')}"))
219219
else:
220220
fail = True
221221
elif target.startswith('/'): # ignore external links
222222
target = target.removesuffix('/')
223-
link_data.append((url, f'{base_url}{target}'))
223+
link_data.append((url, f"{base_url}{target}"))
224224
if fail:
225225
print(f"Warning: couldn't resolve link for {page_path}")
226226

@@ -248,7 +248,8 @@ def parse_markdown_content(metadata, content, base_url):
248248
'lvl0': current_h1,
249249
'lvl1': current_h1
250250
},
251-
'score': metadata.get('score', 0)
251+
'score': metadata.get('score', 0),
252+
'doc_type': metadata.get('doc_type', '')
252253
}
253254
for line in lines:
254255
if line.startswith('# '):
@@ -266,8 +267,7 @@ def parse_markdown_content(metadata, content, base_url):
266267
current_subdoc['type'] = 'lvl1'
267268
current_subdoc['object_id'] = custom_slugify(heading_slug)
268269
current_subdoc['hierarchy']['lvl1'] = current_h1
269-
current_subdoc['hierarchy']['lvl0'] = current_h1 if metadata.get('title', '') == '' else metadata.get(
270-
'title', '')
270+
current_subdoc['hierarchy']['lvl0'] = current_h1 if metadata.get('title', '') == '' else metadata.get('title', '')
271271
elif line.startswith('## '):
272272
if current_subdoc:
273273
yield from split_large_document(current_subdoc)
@@ -293,7 +293,8 @@ def parse_markdown_content(metadata, content, base_url):
293293
'lvl0': current_h1 if metadata.get('title', '') == '' else metadata.get('title', ''),
294294
'lvl1': current_h1,
295295
'lvl2': current_h2,
296-
}
296+
},
297+
'doc_type': metadata.get('doc_type', '')
297298
}
298299
elif line.startswith('### '):
299300
# note we send users to the h2 or h1 even on ###
@@ -322,7 +323,8 @@ def parse_markdown_content(metadata, content, base_url):
322323
'lvl1': current_h1,
323324
'lvl2': current_h2,
324325
'lvl3': current_h3,
325-
}
326+
},
327+
'doc_type': metadata.get('doc_type', '')
326328
}
327329
elif line.startswith('#### '):
328330
if current_subdoc:
@@ -348,7 +350,8 @@ def parse_markdown_content(metadata, content, base_url):
348350
'lvl2': current_h2,
349351
'lvl3': current_h3,
350352
'lvl4': current_h4,
351-
}
353+
},
354+
'doc_type': metadata.get('doc_type', '')
352355
}
353356
elif current_subdoc:
354357
current_subdoc['content'] += line + '\n'
@@ -410,9 +413,9 @@ def compute_page_rank(link_data, damping_factor=0.85, max_iter=100, tol=1e-6):
410413
def create_new_index(client, index_name):
411414
try:
412415
client.delete_index(index_name)
413-
print(f'Temporary index \'{index_name}\' deleted successfully.')
416+
print(f"Temporary index '{index_name}' deleted successfully.")
414417
except:
415-
print(f'Temporary index \'{index_name}\' does not exist or could not be deleted')
418+
print(f"Temporary index '{index_name}' does not exist or could not be deleted")
416419
client.set_settings(index_name, settings['settings'])
417420
client.save_rules(index_name, settings['rules'])
418421
print(f"Settings applied to temporary index '{index_name}'.")
@@ -442,9 +445,19 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name,
442445
else:
443446
for d in batch:
444447
print(f"{d['url']} - {d['page_rank']}")
445-
print(f'{'processed' if dry_run else 'indexed'} {len(batch)} records')
448+
# Print a sample record to verify doc_type is included
449+
if batch:
450+
print("\n--- Sample record ---")
451+
sample_record = batch[0]
452+
print(f"Title: {sample_record.get('title', 'N/A')}")
453+
print(f"URL: {sample_record.get('url', 'N/A')}")
454+
print(f"Type: {sample_record.get('type', 'N/A')}")
455+
print(f"Doc Type: {sample_record.get('doc_type', 'N/A')}")
456+
print(f"Keywords: {sample_record.get('keywords', 'N/A')}")
457+
print("--- End sample ---\n")
458+
print(f"{'processed' if dry_run else 'indexed'} {len(batch)} records")
446459
t += len(batch)
447-
print(f'total {'processed' if dry_run else 'indexed'} {t} records')
460+
print(f"total {'processed' if dry_run else 'indexed'} {t} records")
448461
if not dry_run:
449462
print('switching temporary index...', end='')
450463
client.operation_index(temp_index_name, {"operation": "move", "destination": algolia_index_name})
@@ -471,4 +484,4 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name,
471484
if args.dry_run:
472485
print('Dry running, not sending results to Algolia.')
473486
main(args.base_directory, args.algolia_app_id, args.algolia_api_key, args.algolia_index_name,
474-
dry_run=args.dry_run)
487+
dry_run=args.dry_run)

scripts/search/settings.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
"url",
2222
"url_without_anchor",
2323
"type",
24-
"title"
24+
"title",
25+
"doc_type"
2526
],
2627
"camelCaseAttributes": [
2728
"h1",
@@ -51,7 +52,9 @@
5152
"a",
5253
"an"
5354
],
54-
"attributesForFaceting": null,
55+
"attributesForFaceting": [
56+
"doc_type"
57+
],
5558
"attributesToSnippet": [
5659
"content:15",
5760
"title:10"
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import React from 'react';
2+
3+
const DOC_TYPES = {
4+
GUIDE: 'guide',
5+
REFERENCE: 'reference',
6+
CHANGELOG: 'changelog',
7+
LANDINGPAGE: 'landing-page',
8+
};
9+
10+
export function DocTypeSelector({ selectedDocTypes, onSelectionChange, className }) {
11+
const handleChange = (event) => {
12+
const value = event.target.value;
13+
if (value === 'all') {
14+
onSelectionChange(null);
15+
} else {
16+
onSelectionChange([value]);
17+
}
18+
};
19+
20+
const currentValue = selectedDocTypes?.length === 1 ? selectedDocTypes[0] : 'all';
21+
22+
return (
23+
<select
24+
value={currentValue}
25+
onChange={handleChange}
26+
className={className}
27+
style={{
28+
padding: '6px 12px',
29+
borderRadius: '6px',
30+
border: '1px solid var(--docsearch-searchbox-shadow)',
31+
backgroundColor: 'var(--docsearch-modal-background)',
32+
color: 'var(--docsearch-text-color)',
33+
fontSize: '14px',
34+
minWidth: '140px',
35+
cursor: 'pointer'
36+
}}
37+
>
38+
<option value="all">All docs</option>
39+
<option value={DOC_TYPES.GUIDE}>Guides</option>
40+
<option value={DOC_TYPES.REFERENCE}>Reference</option>
41+
<option value={DOC_TYPES.CHANGELOG}>Changelog</option>
42+
<option value={DOC_TYPES.LANDINGPAGE}>Landing Pages</option>
43+
</select>
44+
);
45+
}
46+
47+
export { DOC_TYPES };

src/theme/SearchBar/index.js

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import React, { useCallback, useMemo, useRef } from 'react';
1+
import React, { useCallback, useMemo, useRef, useState } from 'react';
22
import { DocSearchButton, useDocSearchKeyboardEvents } from '@docsearch/react';
33
import Head from '@docusaurus/Head';
44
import { useHistory } from '@docusaurus/router';
@@ -21,6 +21,7 @@ import {
2121
} from './utils/searchConfig';
2222
import { SearchHit } from './searchHit';
2323
import { SearchResultsFooter } from './searchResultsFooter';
24+
import { DocTypeSelector } from './docTypeSelector';
2425

2526
function DocSearch({ contextualSearch, externalUrlRegex, ...props }) {
2627
const queryIDRef = useRef(null);
@@ -31,6 +32,9 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) {
3132
const history = useHistory();
3233
const searchButtonRef = useRef(null);
3334

35+
// Doc type filtering state
36+
const [selectedDocTypes, setSelectedDocTypes] = useState(null);
37+
3438
// Use the modal management hook
3539
const {
3640
isOpen,
@@ -43,8 +47,13 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) {
4347
importDocSearchModalIfNeeded
4448
} = useDocSearchModal();
4549

46-
// Configure search parameters
47-
const searchParameters = createSearchParameters(props, contextualSearch, contextualSearchFacetFilters);
50+
// Configure search parameters with doc_type filter
51+
const searchParameters = createSearchParameters(
52+
props,
53+
contextualSearch,
54+
contextualSearchFacetFilters,
55+
selectedDocTypes
56+
);
4857

4958
useEffect(() => {
5059
initializeSearchAnalytics(props.appId, props.apiKey);
@@ -66,6 +75,10 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) {
6675
});
6776
}, [props.transformItems, processSearchResultUrl, currentLocale]);
6877

78+
const handleDocTypeChange = useCallback((docTypes) => {
79+
setSelectedDocTypes(docTypes);
80+
}, []);
81+
6982
const resultsFooterComponent = useMemo(
7083
() =>
7184
// eslint-disable-next-line react/no-unstable-nested-components
@@ -130,23 +143,40 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) {
130143
DocSearchModal &&
131144
searchContainer &&
132145
createPortal(
133-
<DocSearchModal
134-
onClose={onClose}
135-
initialScrollY={window.scrollY}
136-
initialQuery={initialQuery}
137-
navigator={navigator}
138-
transformItems={transformItems}
139-
hitComponent={SearchHit}
140-
transformSearchClient={transformSearchClient}
141-
{...(props.searchPagePath && {
142-
resultsFooterComponent,
143-
})}
144-
{...props}
145-
insights={true}
146-
searchParameters={searchParameters}
147-
placeholder={translations.placeholder}
148-
translations={translations.modal}
149-
/>,
146+
<>
147+
<DocSearchModal
148+
onClose={onClose}
149+
initialScrollY={window.scrollY}
150+
initialQuery={initialQuery}
151+
navigator={navigator}
152+
transformItems={transformItems}
153+
hitComponent={SearchHit}
154+
transformSearchClient={transformSearchClient}
155+
{...(props.searchPagePath && {
156+
resultsFooterComponent,
157+
})}
158+
{...props}
159+
insights={true}
160+
searchParameters={searchParameters}
161+
placeholder={translations.placeholder}
162+
translations={translations.modal}
163+
/>
164+
165+
{/* Selector positioned as overlay */}
166+
<div style={{
167+
position: 'fixed',
168+
top: window.innerWidth < 768 ? '55px' : '120px',
169+
right: window.innerWidth < 768 ? 'calc(50% - 185px)' : 'calc(50% - 255px)',
170+
zIndex: 10000,
171+
backgroundColor: 'var(--docsearch-modal-background)',
172+
boxShadow: '0 2px 8px rgba(0,0,0,0.1)'
173+
}}>
174+
<DocTypeSelector
175+
selectedDocTypes={selectedDocTypes}
176+
onSelectionChange={handleDocTypeChange}
177+
/>
178+
</div>
179+
</>,
150180
searchContainer,
151181
)}
152182
</>

src/theme/SearchBar/searchHit.jsx

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,44 @@ export function SearchHit({ hit, children }) {
1010
.slice(0, 3) // Take first 3 segments max
1111
.map(segment => segment.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()));
1212

13+
// Format doc_type for display, stripping quotes and formatting
14+
const formatDocType = (docType) => {
15+
if (!docType) return null;
16+
// Remove surrounding quotes and format
17+
const cleaned = docType.replace(/^'|'$/g, '');
18+
return cleaned.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
19+
};
20+
21+
const docTypeDisplay = formatDocType(hit.doc_type);
22+
1323
return (
1424
<Link onClick={handleClick} to={hit.url}>
1525
{children}
16-
{breadcrumbs.length > 0 && (
17-
<span style={{
18-
fontSize: '10px',
19-
color: '#888',
20-
display: 'block',
21-
lineHeight: '1',
22-
marginBottom: '12px'
23-
}}>
24-
{breadcrumbs.join(' › ')}
25-
</span>
26-
)}
26+
<div style={{
27+
fontSize: '10px',
28+
color: '#888',
29+
lineHeight: '1',
30+
marginBottom: '12px'
31+
}}>
32+
{/* Doc type badge */}
33+
{docTypeDisplay && (
34+
<span style={{
35+
backgroundColor: '#f3f4f6',
36+
color: '#374151',
37+
padding: '2px 6px',
38+
borderRadius: '3px',
39+
marginRight: '8px',
40+
fontWeight: '500'
41+
}}>
42+
{docTypeDisplay}
43+
</span>
44+
)}
45+
46+
{/* Breadcrumbs */}
47+
{breadcrumbs.length > 0 && (
48+
<span>{breadcrumbs.join(' › ')}</span>
49+
)}
50+
</div>
2751
</Link>
2852
);
2953
}

0 commit comments

Comments
 (0)