Skip to content

Commit 350d31b

Browse files
authored
Merge pull request #238 from izo0x90/Hrist/Add-url-escpe-decoding
Adds url escape sequence decoding
2 parents 369005e + e8d1a3e commit 350d31b

File tree

3 files changed

+112
-14
lines changed

3 files changed

+112
-14
lines changed

lightbug_http/strings.mojo

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,12 @@ fn to_string(owned bytes: Bytes) -> String:
150150
if bytes[-1] != 0:
151151
bytes.append(0)
152152
return String(bytes^)
153+
154+
155+
fn find_all(s: String, sub_str: String) -> List[Int]:
156+
match_idxs = List[Int]()
157+
var current_idx: Int = s.find(sub_str)
158+
while current_idx > -1:
159+
match_idxs.append(current_idx)
160+
current_idx = s.find(sub_str, start=current_idx + 1)
161+
return match_idxs^

lightbug_http/uri.mojo

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ from collections import Dict
22
from utils import Variant
33
from lightbug_http.io.bytes import Bytes, bytes
44
from lightbug_http.strings import (
5+
find_all,
56
strSlash,
67
strHttp11,
78
strHttp10,
@@ -11,19 +12,91 @@ from lightbug_http.strings import (
1112
https,
1213
)
1314

15+
16+
fn unquote[
17+
expand_plus: Bool = False
18+
](
19+
input_str: String, disallowed_escapes: List[String] = List[String]()
20+
) -> String:
21+
var encoded_str = input_str.replace(
22+
QueryDelimiters.PLUS_ESCAPED_SPACE, " "
23+
) if expand_plus else input_str
24+
25+
var percent_idxs: List[Int] = find_all(
26+
encoded_str, URIDelimiters.CHAR_ESCAPE
27+
)
28+
29+
if len(percent_idxs) < 1:
30+
return encoded_str
31+
32+
var sub_strings = List[String]()
33+
34+
var current_idx = 0
35+
var slice_start = 0
36+
var slice_end = 0
37+
38+
var str_bytes = List[UInt8]()
39+
while current_idx < len(percent_idxs):
40+
slice_end = percent_idxs[current_idx]
41+
sub_strings.append(encoded_str[slice_start:slice_end])
42+
43+
var current_offset = slice_end
44+
while current_idx < len(percent_idxs):
45+
var char_byte = -1
46+
if (current_offset + 3) <= len(encoded_str):
47+
try:
48+
char_byte = atol(
49+
encoded_str[current_offset + 1 : current_offset + 3],
50+
base=16,
51+
)
52+
except:
53+
pass
54+
55+
if char_byte < 0:
56+
break
57+
58+
str_bytes.append(char_byte)
59+
60+
if percent_idxs[current_idx + 1] != (current_offset + 3):
61+
current_offset += 3
62+
break
63+
64+
current_idx += 1
65+
current_offset = percent_idxs[current_idx]
66+
67+
if len(str_bytes) > 0:
68+
str_bytes.append(0x00)
69+
var sub_str_from_bytes = String(str_bytes)
70+
for disallowed in disallowed_escapes:
71+
sub_str_from_bytes = sub_str_from_bytes.replace(
72+
disallowed[], ""
73+
)
74+
sub_strings.append(sub_str_from_bytes)
75+
str_bytes.clear()
76+
77+
slice_start = current_offset
78+
current_idx += 1
79+
80+
sub_strings.append(encoded_str[slice_start:])
81+
82+
return str("").join(sub_strings)
83+
84+
1485
alias QueryMap = Dict[String, String]
1586

1687

1788
struct QueryDelimiters:
1889
alias STRING_START = "?"
1990
alias ITEM = "&"
2091
alias ITEM_ASSIGN = "="
92+
alias PLUS_ESCAPED_SPACE = "+"
2193

2294

2395
struct URIDelimiters:
2496
alias SCHEMA = "://"
2597
alias PATH = strSlash
2698
alias ROOT_PATH = strSlash
99+
alias CHAR_ESCAPE = "%"
27100

28101

29102
@value
@@ -80,10 +153,14 @@ struct URI(Writable, Stringable, Representable):
80153
var original_path: String
81154
var query_string: String
82155
if n >= 0:
83-
original_path = request_uri[:n]
156+
original_path = unquote(
157+
request_uri[:n], disallowed_escapes=List(str("/"))
158+
)
84159
query_string = request_uri[n + 1 :]
85160
else:
86-
original_path = request_uri
161+
original_path = unquote(
162+
request_uri, disallowed_escapes=List(str("/"))
163+
)
87164
query_string = ""
88165

89166
var queries = QueryMap()
@@ -92,13 +169,12 @@ struct URI(Writable, Stringable, Representable):
92169

93170
for item in query_items:
94171
var key_val = item[].split(QueryDelimiters.ITEM_ASSIGN, 1)
172+
var key = unquote[expand_plus=True](key_val[0])
95173

96-
if key_val[0]:
97-
queries[key_val[0]] = ""
174+
if key:
175+
queries[key] = ""
98176
if len(key_val) == 2:
99-
# TODO: Query values are going to be URI encoded strings and should be decoded as part of the
100-
# query processing
101-
queries[key_val[0]] = key_val[1]
177+
queries[key] = unquote[expand_plus=True](key_val[1])
102178

103179
return URI(
104180
_original_path=original_path,
@@ -115,7 +191,9 @@ struct URI(Writable, Stringable, Representable):
115191
)
116192

117193
fn __str__(self) -> String:
118-
var result = String.write(self.scheme, URIDelimiters.SCHEMA, self.host, self.path)
194+
var result = String.write(
195+
self.scheme, URIDelimiters.SCHEMA, self.host, self.path
196+
)
119197
if len(self.query_string) > 0:
120198
result.write(QueryDelimiters.STRING_START, self.query_string)
121199
return result^

tests/lightbug_http/test_uri.mojo

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ def test_uri_parse_https_with_path():
6060
testing.assert_equal(uri.query_string, empty_string)
6161

6262

63+
def test_uri_parse_path_with_encoding():
64+
var uri = URI.parse("https://example.com/test%20test/index.html")
65+
testing.assert_equal(uri.path, "/test test/index.html")
66+
67+
68+
def test_uri_parse_path_with_encoding_ignore_slashes():
69+
var uri = URI.parse("https://example.com/trying_to%2F_be_clever/42.html")
70+
testing.assert_equal(uri.path, "/trying_to_be_clever/42.html")
71+
72+
6373
def test_uri_parse_http_basic():
6474
var uri = URI.parse("http://example.com")
6575
testing.assert_equal(uri.scheme, "http")
@@ -102,13 +112,14 @@ def test_uri_parse_multiple_query_parameters():
102112
testing.assert_equal(uri.request_uri, "/search?q=python&page=1&limit=20")
103113

104114
def test_uri_parse_query_with_special_characters():
105-
var uri = URI.parse("https://example.com/path?name=John+Doe&email=john%40example.com")
115+
var uri = URI.parse("https://example.com/path?name=John+Doe&email=john%40example.com&escaped%40%20name=42")
106116
testing.assert_equal(uri.scheme, "https")
107117
testing.assert_equal(uri.host, "example.com")
108118
testing.assert_equal(uri.path, "/path")
109-
testing.assert_equal(uri.query_string, "name=John+Doe&email=john%40example.com")
110-
# testing.assert_equal(uri.queries["name"], "John Doe") - fails, contains John+Doe
111-
# testing.assert_equal(uri.queries["email"], "john@example.com") - fails, contains john%40example.com
119+
testing.assert_equal(uri.query_string, "name=John+Doe&email=john%40example.com&escaped%40%20name=42")
120+
testing.assert_equal(uri.queries["name"], "John Doe")
121+
testing.assert_equal(uri.queries["email"], "john@example.com")
122+
testing.assert_equal(uri.queries["escaped@ name"], "42")
112123

113124
def test_uri_parse_empty_query_values():
114125
var uri = URI.parse("http://example.com/api?key=&token=&empty")
@@ -132,8 +143,8 @@ def test_uri_parse_complex_query():
132143
def test_uri_parse_query_with_unicode():
133144
var uri = URI.parse("http://example.com/search?q=%E2%82%AC&lang=%F0%9F%87%A9%F0%9F%87%AA")
134145
testing.assert_equal(uri.query_string, "q=%E2%82%AC&lang=%F0%9F%87%A9%F0%9F%87%AA")
135-
# testing.assert_equal(uri.queries["q"], "€") - fails, contains %E2%82%AC
136-
# testing.assert_equal(uri.queries["lang"], "🇩🇪") - fails, contains %F0%9F%87%A9%F0%9F%87%AA
146+
testing.assert_equal(uri.queries["q"], "")
147+
testing.assert_equal(uri.queries["lang"], "🇩🇪")
137148

138149
# def test_uri_parse_query_with_fragments():
139150
# var uri = URI.parse("http://example.com/page?id=123#section1")

0 commit comments

Comments
 (0)