Skip to content

Commit a70db2a

Browse files
authored
👌 IMPROVE: Make link functions overridable (#135)
`validateLink`, `normalizeLink` and `normalizeLinkText` are now methods of `MarkdownIt`, meaning that they can be overriden by subclassing `MarkdownIt`.
1 parent a40f6a8 commit a70db2a

File tree

9 files changed

+57
-40
lines changed

9 files changed

+57
-40
lines changed

markdown_it/common/normalize_url.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def unescape_normalize_uri(x: str) -> str:
6767
return normalize_uri(unescape_string(x))
6868

6969

70-
def normalizeLink(url):
70+
def normalizeLink(url: str) -> str:
7171
"""Normalize destination URLs in links
7272
7373
::
@@ -117,7 +117,7 @@ def unescape_unquote(x: str) -> str:
117117
return unquote(unescape_string(x))
118118

119119

120-
def normalizeLinkText(link):
120+
def normalizeLinkText(link: str) -> str:
121121
"""Normalize autolink content
122122
123123
::
@@ -172,9 +172,6 @@ def validateLink(url: str, validator: Optional[Callable] = None) -> bool:
172172
This validator can prohibit more than really needed to prevent XSS.
173173
It's a tradeoff to keep code simple and to be secure by default.
174174
175-
If you need different setup - override validator method as you wish.
176-
Or replace it with dummy function and use external sanitizer.
177-
178175
Note: url should be normalized at this point, and existing entities decoded.
179176
"""
180177
if validator is not None:

markdown_it/main.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313

1414
from . import helpers, presets # noqa F401
15-
from .common import utils # noqa F401
15+
from .common import normalize_url, utils # noqa F401
1616
from .token import Token
1717
from .parser_core import ParserCore # noqa F401
1818
from .parser_block import ParserBlock # noqa F401
@@ -288,3 +288,35 @@ def renderInline(self, src: str, env: Optional[AttrDict] = None) -> Any:
288288
"""
289289
env = AttrDict() if env is None else env
290290
return self.renderer.render(self.parseInline(src, env), self.options, env)
291+
292+
# link methods
293+
294+
def validateLink(self, url: str) -> bool:
295+
"""Validate if the URL link is allowed in output.
296+
297+
This validator can prohibit more than really needed to prevent XSS.
298+
It's a tradeoff to keep code simple and to be secure by default.
299+
300+
Note: the url should be normalized at this point, and existing entities decoded.
301+
"""
302+
return normalize_url.validateLink(url)
303+
304+
def normalizeLink(self, url: str) -> str:
305+
"""Normalize destination URLs in links
306+
307+
::
308+
309+
[label]: destination 'title'
310+
^^^^^^^^^^^
311+
"""
312+
return normalize_url.normalizeLink(url)
313+
314+
def normalizeLinkText(self, link: str) -> str:
315+
"""Normalize autolink content
316+
317+
::
318+
319+
<destination>
320+
~~~~~~~~~~~
321+
"""
322+
return normalize_url.normalizeLinkText(link)

markdown_it/port.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,6 @@
1919
Use python's built-in `html.escape` and `urlparse.quote` methods, as a replacement for
2020
the JS dependencies [mdurl](https://www.npmjs.com/package/mdurl)
2121
and [punycode](https://www.npmjs.com/package/punycode)
22-
- |
23-
Remove indirect references within `MarkdownIt`;
24-
25-
self.validateLink = validateLink
26-
self.normalizeLink = normalizeLink
27-
self.normalizeLinkText = normalizeLinkText
28-
29-
in favour of using them directly through:
30-
31-
from markdown_it.common.normalize_url import normalizeLinkText
3222
- |
3323
In markdown_it/rules_block/reference.py,
3424
record line range in state.env["references"] and add state.env["duplicate_refs"]

markdown_it/rules_block/reference.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22

33
from ..common.utils import isSpace, normalizeReference, charCodeAt
4-
from ..common.normalize_url import normalizeLink, validateLink
54
from ..utils import AttrDict
65
from .state_block import StateBlock
76

@@ -115,8 +114,8 @@ def reference(state: StateBlock, startLine, _endLine, silent):
115114
if not res.ok:
116115
return False
117116

118-
href = normalizeLink(res.str)
119-
if not validateLink(href):
117+
href = state.md.normalizeLink(res.str)
118+
if not state.md.validateLink(href):
120119
return False
121120

122121
pos = res.pos

markdown_it/rules_block/state_block.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1-
from typing import List, Optional, Tuple
1+
from typing import List, Optional, Tuple, TYPE_CHECKING
22

33
from ..token import Token
44
from ..ruler import StateBase
55
from ..common.utils import isSpace
66

7+
if TYPE_CHECKING:
8+
from markdown_it.main import MarkdownIt
9+
710

811
class StateBlock(StateBase):
912
def __init__(
1013
self,
1114
src: str,
12-
md,
15+
md: "MarkdownIt",
1316
env,
1417
tokens: List[Token],
1518
srcCharCode: Optional[Tuple[int, ...]] = None,

markdown_it/rules_core/linkify.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import re
22

33
from ..common.utils import arrayReplaceAt
4-
from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink
54
from .state_core import StateCore
65
from ..token import Token
76

@@ -82,8 +81,8 @@ def linkify(state: StateCore) -> None:
8281

8382
for ln in range(len(links)):
8483
url = links[ln].url
85-
fullUrl = normalizeLink(url)
86-
if not validateLink(fullUrl):
84+
fullUrl = state.md.normalizeLink(url)
85+
if not state.md.validateLink(fullUrl):
8786
continue
8887

8988
urlText = links[ln].text
@@ -93,16 +92,16 @@ def linkify(state: StateCore) -> None:
9392
# and remove it afterwards.
9493
if not links[ln].schema:
9594
urlText = HTTP_RE.sub(
96-
"", normalizeLinkText("http://" + urlText)
95+
"", state.md.normalizeLinkText("http://" + urlText)
9796
)
9897
elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search(
9998
urlText
10099
):
101100
urlText = MAILTO_RE.sub(
102-
"", normalizeLinkText("mailto:" + urlText)
101+
"", state.md.normalizeLinkText("mailto:" + urlText)
103102
)
104103
else:
105-
urlText = normalizeLinkText(urlText)
104+
urlText = state.md.normalizeLinkText(urlText)
106105

107106
pos = links[ln].index
108107

markdown_it/rules_inline/autolink.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Process autolinks '<protocol:...>'
22
import re
33
from .state_inline import StateInline
4-
from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink
54

65
EMAIL_RE = re.compile(
76
r"^<([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>" # noqa: E501
@@ -25,8 +24,8 @@ def autolink(state: StateInline, silent: bool) -> bool:
2524
if linkMatch is not None:
2625

2726
url = linkMatch.group(0)[1:-1]
28-
fullUrl = normalizeLink(url)
29-
if not validateLink(fullUrl):
27+
fullUrl = state.md.normalizeLink(url)
28+
if not state.md.validateLink(fullUrl):
3029
return False
3130

3231
if not silent:
@@ -36,7 +35,7 @@ def autolink(state: StateInline, silent: bool) -> bool:
3635
token.info = "auto"
3736

3837
token = state.push("text", "", 0)
39-
token.content = normalizeLinkText(url)
38+
token.content = state.md.normalizeLinkText(url)
4039

4140
token = state.push("link_close", "a", -1)
4241
token.markup = "autolink"
@@ -49,8 +48,8 @@ def autolink(state: StateInline, silent: bool) -> bool:
4948
if emailMatch is not None:
5049

5150
url = emailMatch.group(0)[1:-1]
52-
fullUrl = normalizeLink("mailto:" + url)
53-
if not validateLink(fullUrl):
51+
fullUrl = state.md.normalizeLink("mailto:" + url)
52+
if not state.md.validateLink(fullUrl):
5453
return False
5554

5655
if not silent:
@@ -60,7 +59,7 @@ def autolink(state: StateInline, silent: bool) -> bool:
6059
token.info = "auto"
6160

6261
token = state.push("text", "", 0)
63-
token.content = normalizeLinkText(url)
62+
token.content = state.md.normalizeLinkText(url)
6463

6564
token = state.push("link_close", "a", -1)
6665
token.markup = "autolink"

markdown_it/rules_inline/image.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from .state_inline import StateInline
66
from ..token import Token
77
from ..common.utils import isSpace, normalizeReference
8-
from ..common.normalize_url import normalizeLink, validateLink
98

109

1110
def image(state: StateInline, silent: bool):
@@ -53,8 +52,8 @@ def image(state: StateInline, silent: bool):
5352
start = pos
5453
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
5554
if res.ok:
56-
href = normalizeLink(res.str)
57-
if validateLink(href):
55+
href = state.md.normalizeLink(res.str)
56+
if state.md.validateLink(href):
5857
pos = res.pos
5958
else:
6059
href = ""

markdown_it/rules_inline/link.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Process [link](<to> "stuff")
22

33
from ..common.utils import normalizeReference, isSpace
4-
from ..common.normalize_url import normalizeLink, validateLink
54
from .state_inline import StateInline
65

76

@@ -51,8 +50,8 @@ def link(state: StateInline, silent: bool):
5150
start = pos
5251
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
5352
if res.ok:
54-
href = normalizeLink(res.str)
55-
if validateLink(href):
53+
href = state.md.normalizeLink(res.str)
54+
if state.md.validateLink(href):
5655
pos = res.pos
5756
else:
5857
href = ""

0 commit comments

Comments
 (0)