22"""
33from __future__ import annotations
44
5- import html
65import re
76from typing import Match , TypeVar
87
@@ -52,9 +51,6 @@ def arrayReplaceAt(
5251 return src [:pos ] + newElements + src [pos + 1 :]
5352
5453
55- ######################################################################
56-
57-
5854def isValidEntityCode (c : int ) -> bool :
5955 # broken sequence
6056 if c >= 0xD800 and c <= 0xDFFF :
@@ -89,47 +85,33 @@ def fromCodePoint(c: int) -> str:
8985 return chr (c )
9086
9187
92- UNESCAPE_MD_RE = re .compile (r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' )
88+ # UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])')
9389# ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE)
9490UNESCAPE_ALL_RE = re .compile (
9591 r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});" ,
9692 re .IGNORECASE ,
9793)
98- DIGITAL_ENTITY_TEST_RE = re .compile (r"^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))" , re .IGNORECASE )
94+ DIGITAL_ENTITY_BASE10_RE = re .compile (r"#([0-9]{1,8})" )
95+ DIGITAL_ENTITY_BASE16_RE = re .compile (r"#x([a-f0-9]{1,8})" , re .IGNORECASE )
9996
10097
10198def replaceEntityPattern (match : str , name : str ) -> str :
102- """Convert HTML entity patterns
103-
104- ::
105-
106- https://www.google.com -> https%3A//www.google.com
107-
99+ """Convert HTML entity patterns,
100+ see https://spec.commonmark.org/0.30/#entity-references
108101 """
109- code = 0
110-
111102 if name in entities :
112103 return entities [name ]
113104
114- if name [0 ] == "#" and DIGITAL_ENTITY_TEST_RE .search (name ):
115- code = int (name [2 :], 16 ) if name [1 ].lower () == "x" else int (name [1 :], 10 )
116- if isValidEntityCode (code ):
117- return fromCodePoint (code )
118-
119- return match
120-
121-
122- # def replaceEntities(string):
123- # if (string.indexOf('&') < 0):
124- # return string
125- # return string.replace(ENTITY_RE, replaceEntityPattern)
105+ code : None | int = None
106+ if pat := DIGITAL_ENTITY_BASE10_RE .fullmatch (name ):
107+ code = int (pat .group (1 ), 10 )
108+ elif pat := DIGITAL_ENTITY_BASE16_RE .fullmatch (name ):
109+ code = int (pat .group (1 ), 16 )
126110
111+ if code is not None and isValidEntityCode (code ):
112+ return fromCodePoint (code )
127113
128- def unescapeMd (string : str ) -> str :
129- raise NotImplementedError
130- # if "\\" in string:
131- # return string
132- # return string.replace(UNESCAPE_MD_RE, "$1")
114+ return match
133115
134116
135117def unescapeAll (string : str ) -> str :
@@ -154,30 +136,14 @@ def stripEscape(string: str) -> str:
154136 return ESCAPE_CHAR .sub (r"\1" , string )
155137
156138
157- # //////////////////////////////////////////////////////////////////////////////
158-
159- # TODO This section changed quite a lot, should re-check
160-
161- # UNESCAPE_HTML_RE = re.compile(r"\\&(?=(amp\;|lt\;|gt\;|quot\;))")
162- # ESCAPE_AND_HTML = re.compile(r"&(?!(amp\;|lt\;|gt\;|quot\;))")
163- # HTML_ESCAPE_REPLACE_RE = re.compile(r'[&<>"]')
164-
165-
166- # def escapeHtml(string: str):
167-
168- # if HTML_ESCAPE_REPLACE_RE.search(string):
169-
170- # string = UNESCAPE_HTML_RE.sub("&", string)
171- # string = ESCAPE_AND_HTML.sub("&", string)
172- # for k, v in {"<": "<", ">": ">", '"': """}.items():
173- # string = string.replace(k, v)
174-
175- # return string
176-
177-
178139def escapeHtml (raw : str ) -> str :
179- # return html.escape(html.unescape(raw)).replace("'", "'")
180- return html .escape (raw ).replace ("'" , "'" )
140+ """Replace special characters "&", "<", ">" and '"' to HTML-safe sequences."""
141+ # like html.escape, but without escaping single quotes
142+ raw = raw .replace ("&" , "&" ) # Must be done first!
143+ raw = raw .replace ("<" , "<" )
144+ raw = raw .replace (">" , ">" )
145+ raw = raw .replace ('"' , """ )
146+ return raw
181147
182148
183149# //////////////////////////////////////////////////////////////////////////////
0 commit comments