|
| 1 | +from __future__ import annotations |
| 2 | + |
1 | 3 | import re |
| 4 | +from typing import Protocol |
2 | 5 |
|
3 | | -from ..common.utils import arrayReplaceAt |
| 6 | +from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen |
4 | 7 | from ..token import Token |
5 | 8 | from .state_core import StateCore |
6 | 9 |
|
7 | | -LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE) |
8 | | -LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE) |
9 | | - |
10 | 10 | HTTP_RE = re.compile(r"^http://") |
11 | 11 | MAILTO_RE = re.compile(r"^mailto:") |
12 | 12 | TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE) |
13 | 13 |
|
14 | 14 |
|
15 | | -def isLinkOpen(string: str) -> bool: |
16 | | - return bool(LINK_OPEN_RE.search(string)) |
17 | | - |
18 | | - |
19 | | -def isLinkClose(string: str) -> bool: |
20 | | - return bool(LINK_CLOSE_RE.search(string)) |
21 | | - |
22 | | - |
23 | 15 | def linkify(state: StateCore) -> None: |
24 | | - blockTokens = state.tokens |
25 | | - |
| 16 | + """Rule for identifying plain-text links.""" |
26 | 17 | if not state.md.options.linkify: |
27 | 18 | return |
28 | 19 |
|
29 | 20 | if not state.md.linkify: |
30 | 21 | raise ModuleNotFoundError("Linkify enabled but not installed.") |
31 | 22 |
|
32 | | - for j in range(len(blockTokens)): |
33 | | - if blockTokens[j].type != "inline" or not state.md.linkify.pretest( |
34 | | - blockTokens[j].content |
| 23 | + for inline_token in state.tokens: |
| 24 | + if inline_token.type != "inline" or not state.md.linkify.pretest( |
| 25 | + inline_token.content |
35 | 26 | ): |
36 | 27 | continue |
37 | 28 |
|
38 | | - tokens = blockTokens[j].children |
| 29 | + tokens = inline_token.children |
39 | 30 |
|
40 | 31 | htmlLinkLevel = 0 |
41 | 32 |
|
@@ -71,38 +62,47 @@ def linkify(state: StateCore) -> None: |
71 | 62 | currentToken.content |
72 | 63 | ): |
73 | 64 | text = currentToken.content |
74 | | - links = state.md.linkify.match(text) |
| 65 | + links: list[_LinkType] = state.md.linkify.match(text) or [] |
75 | 66 |
|
76 | 67 | # Now split string to nodes |
77 | 68 | nodes = [] |
78 | 69 | level = currentToken.level |
79 | 70 | lastPos = 0 |
80 | 71 |
|
81 | | - for ln in range(len(links)): |
82 | | - url = links[ln].url |
| 72 | + # forbid escape sequence at the start of the string, |
| 73 | + # this avoids http\://example.com/ from being linkified as |
| 74 | + # http:<a href="//example.com/">//example.com/</a> |
| 75 | + if ( |
| 76 | + links |
| 77 | + and links[0].index == 0 |
| 78 | + and i > 0 |
| 79 | + and tokens[i - 1].type == "text_special" |
| 80 | + ): |
| 81 | + links = links[1:] |
| 82 | + |
| 83 | + for link in links: |
| 84 | + url = link.url |
83 | 85 | fullUrl = state.md.normalizeLink(url) |
84 | 86 | if not state.md.validateLink(fullUrl): |
85 | 87 | continue |
86 | 88 |
|
87 | | - urlText = links[ln].text |
| 89 | + urlText = link.text |
88 | 90 |
|
89 | 91 | # Linkifier might send raw hostnames like "example.com", where url |
90 | 92 | # starts with domain name. So we prepend http:// in those cases, |
91 | 93 | # and remove it afterwards. |
92 | | - if not links[ln].schema: |
| 94 | + if not link.schema: |
93 | 95 | urlText = HTTP_RE.sub( |
94 | 96 | "", state.md.normalizeLinkText("http://" + urlText) |
95 | 97 | ) |
96 | | - elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search( |
97 | | - urlText |
98 | | - ): |
| 98 | + elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText): |
99 | 99 | urlText = MAILTO_RE.sub( |
100 | 100 | "", state.md.normalizeLinkText("mailto:" + urlText) |
101 | 101 | ) |
102 | 102 | else: |
103 | 103 | urlText = state.md.normalizeLinkText(urlText) |
104 | 104 |
|
105 | | - pos = links[ln].index |
| 105 | + pos = link.index |
106 | 106 |
|
107 | 107 | if pos > lastPos: |
108 | 108 | token = Token("text", "", 0) |
@@ -130,12 +130,20 @@ def linkify(state: StateCore) -> None: |
130 | 130 | token.info = "auto" |
131 | 131 | nodes.append(token) |
132 | 132 |
|
133 | | - lastPos = links[ln].last_index |
| 133 | + lastPos = link.last_index |
134 | 134 |
|
135 | 135 | if lastPos < len(text): |
136 | 136 | token = Token("text", "", 0) |
137 | 137 | token.content = text[lastPos:] |
138 | 138 | token.level = level |
139 | 139 | nodes.append(token) |
140 | 140 |
|
141 | | - blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes) |
| 141 | + inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes) |
| 142 | + |
| 143 | + |
| 144 | +class _LinkType(Protocol): |
| 145 | + url: str |
| 146 | + text: str |
| 147 | + index: int |
| 148 | + last_index: int |
| 149 | + schema: str | None |
0 commit comments