Skip to content

Commit eacb5ce

Browse files
committed
Adds url escape sequence decoding
- Add utility function to decode/unquote url encoded strings - Updates URI object to decode/unquote url path and query values with "plus" character expansion into space for queries
1 parent 369005e commit eacb5ce

File tree

2 files changed

+84
-10
lines changed

2 files changed

+84
-10
lines changed

β€Žlightbug_http/uri.mojoβ€Ž

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,91 @@ from lightbug_http.strings import (
1111
https,
1212
)
1313

14+
15+
fn find_all(s: String, sub_str: String) -> List[Int]:
16+
match_idxs = List[Int]()
17+
var current_idx: Int = s.find(sub_str)
18+
while current_idx > -1:
19+
match_idxs.append(current_idx)
20+
current_idx = s.find(sub_str, start=current_idx + 1)
21+
return match_idxs^
22+
23+
24+
fn unquote[expand_plus: Bool = False](input_str: String) -> String:
25+
var encoded_str = input_str.replace(
26+
QueryDelimiters.PLUS_ESCAPED_SPACE, " "
27+
) if expand_plus else input_str
28+
29+
var percent_idxs: List[Int] = find_all(
30+
encoded_str, URIDelimiters.CHAR_ESCAPE
31+
)
32+
33+
if len(percent_idxs) < 1:
34+
return encoded_str
35+
36+
var sub_strings = List[String]()
37+
38+
var current_idx = 0
39+
var slice_start = 0
40+
var slice_end = 0
41+
42+
var str_bytes = List[UInt8]()
43+
while current_idx < len(percent_idxs):
44+
slice_end = percent_idxs[current_idx]
45+
sub_strings.append(encoded_str[slice_start:slice_end])
46+
47+
var current_offset = slice_end
48+
while current_idx < len(percent_idxs):
49+
var char_byte = -1
50+
if (current_offset + 3) <= len(encoded_str):
51+
try:
52+
char_byte = atol(
53+
encoded_str[current_offset + 1 : current_offset + 3],
54+
base=16,
55+
)
56+
except:
57+
pass
58+
59+
if char_byte < 0:
60+
break
61+
62+
str_bytes.append(char_byte)
63+
64+
if percent_idxs[current_idx + 1] != (current_offset + 3):
65+
current_offset += 3
66+
break
67+
68+
current_idx += 1
69+
current_offset = percent_idxs[current_idx]
70+
71+
if len(str_bytes) > 0:
72+
str_bytes.append(0x00)
73+
sub_strings.append(String(str_bytes))
74+
str_bytes.clear()
75+
76+
slice_start = current_offset
77+
current_idx += 1
78+
79+
sub_strings.append(encoded_str[slice_start:])
80+
81+
return str("").join(sub_strings)
82+
83+
1484
alias QueryMap = Dict[String, String]
1585

1686

1787
struct QueryDelimiters:
1888
alias STRING_START = "?"
1989
alias ITEM = "&"
2090
alias ITEM_ASSIGN = "="
91+
alias PLUS_ESCAPED_SPACE = "+"
2192

2293

2394
struct URIDelimiters:
2495
alias SCHEMA = "://"
2596
alias PATH = strSlash
2697
alias ROOT_PATH = strSlash
98+
alias CHAR_ESCAPE = "%"
2799

28100

29101
@value
@@ -80,10 +152,10 @@ struct URI(Writable, Stringable, Representable):
80152
var original_path: String
81153
var query_string: String
82154
if n >= 0:
83-
original_path = request_uri[:n]
155+
original_path = unquote(request_uri[:n])
84156
query_string = request_uri[n + 1 :]
85157
else:
86-
original_path = request_uri
158+
original_path = unquote(request_uri)
87159
query_string = ""
88160

89161
var queries = QueryMap()
@@ -96,9 +168,9 @@ struct URI(Writable, Stringable, Representable):
96168
if key_val[0]:
97169
queries[key_val[0]] = ""
98170
if len(key_val) == 2:
99-
# TODO: Query values are going to be URI encoded strings and should be decoded as part of the
100-
# query processing
101-
queries[key_val[0]] = key_val[1]
171+
queries[key_val[0]] = unquote[expand_plus=True](
172+
key_val[1]
173+
)
102174

103175
return URI(
104176
_original_path=original_path,
@@ -115,7 +187,9 @@ struct URI(Writable, Stringable, Representable):
115187
)
116188

117189
fn __str__(self) -> String:
118-
var result = String.write(self.scheme, URIDelimiters.SCHEMA, self.host, self.path)
190+
var result = String.write(
191+
self.scheme, URIDelimiters.SCHEMA, self.host, self.path
192+
)
119193
if len(self.query_string) > 0:
120194
result.write(QueryDelimiters.STRING_START, self.query_string)
121195
return result^

β€Žtests/lightbug_http/test_uri.mojoβ€Ž

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ def test_uri_parse_query_with_special_characters():
107107
testing.assert_equal(uri.host, "example.com")
108108
testing.assert_equal(uri.path, "/path")
109109
testing.assert_equal(uri.query_string, "name=John+Doe&email=john%40example.com")
110-
# testing.assert_equal(uri.queries["name"], "John Doe") - fails, contains John+Doe
111-
# testing.assert_equal(uri.queries["email"], "john@example.com") - fails, contains john%40example.com
110+
testing.assert_equal(uri.queries["name"], "John Doe")
111+
testing.assert_equal(uri.queries["email"], "john@example.com")
112112

113113
def test_uri_parse_empty_query_values():
114114
var uri = URI.parse("http://example.com/api?key=&token=&empty")
@@ -132,8 +132,8 @@ def test_uri_parse_complex_query():
132132
def test_uri_parse_query_with_unicode():
133133
var uri = URI.parse("http://example.com/search?q=%E2%82%AC&lang=%F0%9F%87%A9%F0%9F%87%AA")
134134
testing.assert_equal(uri.query_string, "q=%E2%82%AC&lang=%F0%9F%87%A9%F0%9F%87%AA")
135-
# testing.assert_equal(uri.queries["q"], "€") - fails, contains %E2%82%AC
136-
# testing.assert_equal(uri.queries["lang"], "πŸ‡©πŸ‡ͺ") - fails, contains %F0%9F%87%A9%F0%9F%87%AA
135+
testing.assert_equal(uri.queries["q"], "€")
136+
testing.assert_equal(uri.queries["lang"], "πŸ‡©πŸ‡ͺ")
137137

138138
# def test_uri_parse_query_with_fragments():
139139
# var uri = URI.parse("http://example.com/page?id=123#section1")

0 commit comments

Comments
Β (0)