2525link_data = []
2626
2727
28- def get_doc_type_rank (doc_type ):
29- """Return numeric rank for doc_type to use in Algolia customRanking."""
30- ranks = {
31- 'guide' : 3 ,
32- 'reference' : 3 ,
33- 'changelog' : 1 ,
34- 'landing_page' : 1
35- }
36- return ranks .get (doc_type , 2 ) # Default to 2 for unspecified types
37-
38-
3928def split_url_and_anchor (url ):
4029 parsed_url = urlparse (url )
4130 url_without_anchor = urlunparse (parsed_url ._replace (fragment = "" ))
@@ -50,11 +39,7 @@ def read_metadata(text):
5039 parts = part .split (":" )
5140 if len (parts ) == 2 :
5241 if parts [0 ] in ['title' , 'description' , 'slug' , 'keywords' , 'score' , 'doc_type' ]:
53- value = parts [1 ].strip ()
54- # Strip quotes only from doc_type
55- if parts [0 ] == 'doc_type' :
56- value = value .strip ("'\" " )
57- metadata [parts [0 ]] = int (value ) if parts [0 ] == 'score' else value
42+ metadata [parts [0 ]] = int (parts [1 ].strip ()) if parts [0 ] == 'score' else parts [1 ].strip ()
5843 return metadata
5944
6045
@@ -264,8 +249,7 @@ def parse_markdown_content(metadata, content, base_url):
264249 'lvl1' : current_h1
265250 },
266251 'score' : metadata .get ('score' , 0 ),
267- 'doc_type' : metadata .get ('doc_type' , '' ),
268- 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
252+ 'doc_type' : metadata .get ('doc_type' , '' )
269253 }
270254 for line in lines :
271255 if line .startswith ('# ' ):
@@ -310,8 +294,7 @@ def parse_markdown_content(metadata, content, base_url):
310294 'lvl1' : current_h1 ,
311295 'lvl2' : current_h2 ,
312296 },
313- 'doc_type' : metadata .get ('doc_type' , '' ),
314- 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
297+ 'doc_type' : metadata .get ('doc_type' , '' )
315298 }
316299 elif line .startswith ('### ' ):
317300 # note we send users to the h2 or h1 even on ###
@@ -341,8 +324,7 @@ def parse_markdown_content(metadata, content, base_url):
341324 'lvl2' : current_h2 ,
342325 'lvl3' : current_h3 ,
343326 },
344- 'doc_type' : metadata .get ('doc_type' , '' ),
345- 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
327+ 'doc_type' : metadata .get ('doc_type' , '' )
346328 }
347329 elif line .startswith ('#### ' ):
348330 if current_subdoc :
@@ -369,8 +351,7 @@ def parse_markdown_content(metadata, content, base_url):
369351 'lvl3' : current_h3 ,
370352 'lvl4' : current_h4 ,
371353 },
372- 'doc_type' : metadata .get ('doc_type' , '' ),
373- 'doc_type_rank' : get_doc_type_rank (metadata .get ('doc_type' , '' ))
354+ 'doc_type' : metadata .get ('doc_type' , '' )
374355 }
375356 elif current_subdoc :
376357 current_subdoc ['content' ] += line + '\n '
@@ -472,7 +453,6 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name,
472453 print (f"URL: { sample_record .get ('url' , 'N/A' )} " )
473454 print (f"Type: { sample_record .get ('type' , 'N/A' )} " )
474455 print (f"Doc Type: { sample_record .get ('doc_type' , 'N/A' )} " )
475- print (f"Doc Type Rank: { sample_record .get ('doc_type_rank' , 'N/A' )} " )
476456 print (f"Keywords: { sample_record .get ('keywords' , 'N/A' )} " )
477457 print ("--- End sample ---\n " )
478458 print (f"{ 'processed' if dry_run else 'indexed' } { len (batch )} records" )
0 commit comments