1- #!/usr/bin/env python
1+ #!/usr/bin/env python3
22import tempfile
33import re
44import urllib .request
1212import sys
1313import marko
1414from marko .md_renderer import MarkdownRenderer
15+ import unicodedata
1516
1617script_path = Path (__file__ )
1718# Add the shared module to the path
@@ -35,8 +36,7 @@ def soupify(url: str) -> BeautifulSoup:
3536 cache_key = m .hexdigest ()
3637 cache_file = cache_path .joinpath (cache_key )
3738 if cache_file .exists ():
38- with cache_file .open () as f :
39- content = f .read ().replace (u'\xa0 ' , u' ' )
39+ content = unicodedata .normalize ("NFKD" , cache_file .read_text ())
4040 else :
4141 resp = requests .get (url )
4242
@@ -500,12 +500,12 @@ def get_help(rule):
500500 print (f"{ err .reason } : { err .stderr } " )
501501 temp_qhelp_path .unlink ()
502502
503- parsed_temp_help = md .parse (temp_help_path .read_text ())
503+ parsed_temp_help = md .parse (unicodedata . normalize ( "NFKD" , temp_help_path .read_text () ))
504504 # Remove the first header that is added by the QHelp to Markdown conversion
505505 del parsed_temp_help .children [0 ]
506506 temp_help_path .write_text (md .render (parsed_temp_help ))
507507
508- parsed_help = md .parse (help_path .read_text ())
508+ parsed_help = md .parse (unicodedata . normalize ( "NFKD" , help_path .read_text () ))
509509 if find_heading (parsed_help , 'CERT' ):
510510 # Check if it contains the CERT heading that needs to be replaced
511511 print (f"ID: { rule ['id' ]} - Found heading 'CERT' whose content will be replaced" )
@@ -514,7 +514,7 @@ def get_help(rule):
514514 # Otherwise update the content of every existing second level heading, note that this doesn't add headings!
515515 second_level_headings = {get_heading_text (heading ) for heading in iterate_headings (parsed_temp_help ) if heading .level == 2 }
516516 # Check if there are any headings we don't have in our current help file. If that is the case we need to manually update that first.
517- existing_second_level_headings = {get_heading_text (heading ). replace ( u' \xa0 ' , u' ' ) for heading in iterate_headings (parsed_help ) if heading .level == 2 }
517+ existing_second_level_headings = {get_heading_text (heading ) for heading in iterate_headings (parsed_help ) if heading .level == 2 }
518518 if not second_level_headings .issubset (existing_second_level_headings ):
519519 print (f"ID: { rule ['id' ]} - The original help is missing the header(s) '{ ', ' .join (second_level_headings .difference (existing_second_level_headings ))} '. Proceed with manually adding these in the expected location (See { temp_help_path } )." )
520520 sys .exit (1 )
0 commit comments