Skip to content

Commit 3c98683

Browse files
committed
👌 IMPROVE: autolink parsing performance
Fix quadratic complexity in autolinks implements: markdown-it/markdown-it@e729b90
1 parent 618fca8 commit 3c98683

File tree

1 file changed

+27
-20
lines changed

1 file changed

+27
-20
lines changed
Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# Process autolinks '<protocol:...>'
22
import re
33
from .state_inline import StateInline
4+
from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink
45

56
EMAIL_RE = re.compile(
6-
r"^<([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>" # noqa: E501
7+
r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501
78
)
8-
AUTOLINK_RE = re.compile(r"^<([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)>")
9+
AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
910

1011

1112
def autolink(state: StateInline, silent: bool) -> bool:
@@ -15,17 +16,26 @@ def autolink(state: StateInline, silent: bool) -> bool:
1516
if state.srcCharCode[pos] != 0x3C: # /* < */
1617
return False
1718

18-
tail = state.src[pos:]
19+
start = state.pos
20+
maximum = state.posMax
1921

20-
if ">" not in tail:
21-
return False
22+
while True:
23+
pos += 1
24+
if pos >= maximum:
25+
return False
26+
27+
ch = state.srcCharCode[pos]
2228

23-
linkMatch = AUTOLINK_RE.search(tail)
24-
if linkMatch is not None:
29+
if ch == 0x3C: # /* < */
30+
return False
31+
if ch == 0x3E: # /* > */
32+
break
33+
34+
url = state.src[start + 1 : pos]
2535

26-
url = linkMatch.group(0)[1:-1]
27-
fullUrl = state.md.normalizeLink(url)
28-
if not state.md.validateLink(fullUrl):
36+
if AUTOLINK_RE.search(url) is not None:
37+
fullUrl = normalizeLink(url)
38+
if not validateLink(fullUrl):
2939
return False
3040

3141
if not silent:
@@ -35,21 +45,18 @@ def autolink(state: StateInline, silent: bool) -> bool:
3545
token.info = "auto"
3646

3747
token = state.push("text", "", 0)
38-
token.content = state.md.normalizeLinkText(url)
48+
token.content = normalizeLinkText(url)
3949

4050
token = state.push("link_close", "a", -1)
4151
token.markup = "autolink"
4252
token.info = "auto"
4353

44-
state.pos += len(linkMatch.group(0))
54+
state.pos += len(url) + 2
4555
return True
4656

47-
emailMatch = EMAIL_RE.search(tail)
48-
if emailMatch is not None:
49-
50-
url = emailMatch.group(0)[1:-1]
51-
fullUrl = state.md.normalizeLink("mailto:" + url)
52-
if not state.md.validateLink(fullUrl):
57+
if EMAIL_RE.search(url) is not None:
58+
fullUrl = normalizeLink("mailto:" + url)
59+
if not validateLink(fullUrl):
5360
return False
5461

5562
if not silent:
@@ -59,13 +66,13 @@ def autolink(state: StateInline, silent: bool) -> bool:
5966
token.info = "auto"
6067

6168
token = state.push("text", "", 0)
62-
token.content = state.md.normalizeLinkText(url)
69+
token.content = normalizeLinkText(url)
6370

6471
token = state.push("link_close", "a", -1)
6572
token.markup = "autolink"
6673
token.info = "auto"
6774

68-
state.pos += len(emailMatch.group(0))
75+
state.pos += len(url) + 2
6976
return True
7077

7178
return False

0 commit comments

Comments
 (0)