2525link_data = []
2626
2727
28+ def get_doc_type_rank (doc_type ):
29+ """Return numeric rank for doc_type to use in Algolia customRanking."""
30+ ranks = {
31+ 'guide' : 3 ,
32+ 'reference' : 3 ,
33+ 'changelog' : 1 ,
34+ 'landing_page' : 1
35+ }
36+ return ranks .get (doc_type , 2 ) # Default to 2 for unspecified types
37+
38+
2839def split_url_and_anchor (url ):
2940 parsed_url = urlparse (url )
3041 url_without_anchor = urlunparse (parsed_url ._replace (fragment = "" ))
@@ -39,7 +50,11 @@ def read_metadata(text):
3950 parts = part .split (":" )
4051 if len (parts ) == 2 :
4152 if parts [0 ] in ['title' , 'description' , 'slug' , 'keywords' , 'score' , 'doc_type' ]:
42- metadata [parts [0 ]] = int (parts [1 ].strip ()) if parts [0 ] == 'score' else parts [1 ].strip ()
53+ value = parts [1 ].strip ()
54+ # Strip quotes only from doc_type
55+ if parts [0 ] == 'doc_type' :
56+ value = value .strip ("'\" " )
57+ metadata [parts [0 ]] = int (value ) if parts [0 ] == 'score' else value
4358 return metadata
4459
4560
@@ -249,7 +264,8 @@ def parse_markdown_content(metadata, content, base_url):
249264 'lvl1' : current_h1
250265 },
251266 'score' : metadata .get ('score' , 0 ),
252- 'doc_type' : metadata .get ('doc_type' , '' )
267+ 'doc_type' : metadata .get ('doc_type' , '' ),
268+ 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
253269 }
254270 for line in lines :
255271 if line .startswith ('# ' ):
@@ -294,7 +310,8 @@ def parse_markdown_content(metadata, content, base_url):
294310 'lvl1' : current_h1 ,
295311 'lvl2' : current_h2 ,
296312 },
297- 'doc_type' : metadata .get ('doc_type' , '' )
313+ 'doc_type' : metadata .get ('doc_type' , '' ),
314+ 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
298315 }
299316 elif line .startswith ('### ' ):
300317 # note we send users to the h2 or h1 even on ###
@@ -324,7 +341,8 @@ def parse_markdown_content(metadata, content, base_url):
324341 'lvl2' : current_h2 ,
325342 'lvl3' : current_h3 ,
326343 },
327- 'doc_type' : metadata .get ('doc_type' , '' )
344+ 'doc_type' : metadata .get ('doc_type' , '' ),
345+ 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
328346 }
329347 elif line .startswith ('#### ' ):
330348 if current_subdoc :
@@ -351,7 +369,8 @@ def parse_markdown_content(metadata, content, base_url):
351369 'lvl3' : current_h3 ,
352370 'lvl4' : current_h4 ,
353371 },
354- 'doc_type' : metadata .get ('doc_type' , '' )
372+ 'doc_type' : metadata .get ('doc_type' , '' ),
373+ 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
355374 }
356375 elif current_subdoc :
357376 current_subdoc ['content' ] += line + '\n '
@@ -453,6 +472,7 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name,
453472 print (f"URL: { sample_record .get ('url' , 'N/A' )} " )
454473 print (f"Type: { sample_record .get ('type' , 'N/A' )} " )
455474 print (f"Doc Type: { sample_record .get ('doc_type' , 'N/A' )} " )
475+ print (f"Doc Type Rank: { sample_record .get ('doc_type_rank' , 'N/A' )} " )
456476 print (f"Keywords: { sample_record .get ('keywords' , 'N/A' )} " )
457477 print ("--- End sample ---\n " )
458478 print (f"{ 'processed' if dry_run else 'indexed' } { len (batch )} records" )
0 commit comments