Skip to content

Commit 3f4c4b6

Browse files
committed
Fully implement 'urlencode' filter
1 parent a9629bb commit 3f4c4b6

File tree

2 files changed

+72
-63
lines changed

2 files changed

+72
-63
lines changed

src/string_converter_filter.cpp

Lines changed: 71 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -68,78 +68,86 @@ struct UrlStringEncoder : public StringEncoder<UrlStringEncoder>
6868
template<typename CharT, typename Fn>
6969
void EncodeChar(CharT ch, Fn&& fn) const
7070
{
71+
enum EncodeStyle
72+
{
73+
None,
74+
Percent
75+
};
76+
77+
EncodeStyle encStyle = None;
7178
switch (ch)
7279
{
7380
case ' ':
7481
fn('+');
75-
break;
76-
case '+':
77-
fn('%', '2', 'B');
78-
break;
79-
case '\"':
80-
fn('%', '2', '2');
81-
break;
82-
case '%':
83-
fn('%', '2', '5');
84-
break;
85-
case '-':
86-
fn('%', '2', 'D');
87-
break;
88-
case '!':
89-
fn('%', '2', '1');
90-
break;
91-
case '#':
92-
fn('%', '2', '3');
93-
break;
94-
case '$':
95-
fn('%', '2', '4');
96-
break;
97-
case '&':
98-
fn('%', '2', '6');
99-
break;
100-
case '\'':
101-
fn('%', '2', '7');
102-
break;
103-
case '(':
104-
fn('%', '2', '8');
105-
break;
106-
case ')':
107-
fn('%', '2', '9');
108-
break;
109-
case '*':
110-
fn('%', '2', 'A');
111-
break;
112-
case ',':
113-
fn('%', '2', 'C');
114-
break;
115-
case '/':
116-
fn('%', '2', 'F');
117-
break;
118-
case ':':
119-
fn('%', '3', 'A');
120-
break;
121-
case ';':
122-
fn('%', '3', 'B');
123-
break;
124-
case '=':
125-
fn('%', '3', 'D');
126-
break;
127-
case '?':
128-
fn('%', '3', 'F');
129-
break;
130-
case '@':
131-
fn('%', '4', '0');
132-
break;
133-
case '[':
134-
fn('%', '5', 'B');
135-
break;
82+
return;
83+
case '+': case '\"': case '%': case '-':
84+
case '!': case '#': case '$': case '&':
85+
case '\'': case '(': case ')': case '*':
86+
case ',': case '/': case ':': case ';':
87+
case '=': case '?': case '@': case '[':
13688
case ']':
137-
fn('%', '5', 'D');
89+
encStyle = Percent;
13890
break;
13991
default:
140-
fn(ch);
92+
if (AsUnsigned(ch) > 0x7f)
93+
encStyle = Percent;
14194
break;
14295
}
96+
97+
if (encStyle == None)
98+
{
99+
fn(ch);
100+
return;
101+
}
102+
union
103+
{
104+
uint32_t intCh;
105+
uint8_t chars[4];
106+
};
107+
intCh = AsUnsigned(ch);
108+
if (intCh > 0xffffff)
109+
DoPercentEncoding(chars[3], fn);
110+
if (intCh > 0xffff)
111+
DoPercentEncoding(chars[2], fn);
112+
if (intCh > 0xff)
113+
DoPercentEncoding(chars[1], fn);
114+
DoPercentEncoding(chars[0], fn);
115+
}
116+
117+
template<typename Fn>
118+
void DoPercentEncoding(uint8_t ch, Fn&& fn) const
119+
{
120+
char chars[] = "0123456789ABCDEF";
121+
int ch1 = static_cast<int>(chars[(ch & 0xf0) >> 4]);
122+
int ch2 = static_cast<int>(chars[ch & 0x0f]);
123+
fn('%', ch1, ch2);
124+
}
125+
126+
template<typename Ch, size_t SZ>
127+
struct ToUnsigned;
128+
129+
template<typename Ch>
130+
struct ToUnsigned<Ch, 1>
131+
{
132+
static auto Cast(Ch ch) {return static_cast<uint8_t>(ch);}
133+
};
134+
135+
template<typename Ch>
136+
struct ToUnsigned<Ch, 2>
137+
{
138+
static auto Cast(Ch ch) {return static_cast<uint16_t>(ch);}
139+
};
140+
141+
template<typename Ch>
142+
struct ToUnsigned<Ch, 4>
143+
{
144+
static auto Cast(Ch ch) {return static_cast<uint32_t>(ch);}
145+
};
146+
147+
template<typename Ch>
148+
auto AsUnsigned(Ch ch) const
149+
{
150+
return static_cast<uint32_t>(ToUnsigned<Ch, sizeof(Ch)>::Cast(ch));
143151
}
144152
};
145153

test/filters_test.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ INSTANTIATE_TEST_CASE_P(DictSort, FilterGenericTest, ::testing::Values(
331331

332332
INSTANTIATE_TEST_CASE_P(UrlEncode, FilterGenericTest, ::testing::Values(
333333
InputOutputPair{"'Hello World' | urlencode", "Hello+World"},
334+
InputOutputPair{"'Hello World\xD0\x9C\xD0\xBA' | urlencode", "Hello+World%D0%9C%D0%BA"},
334335
InputOutputPair{"'! # $ & ( ) * + , / : ; = ? @ [ ] %' | urlencode", "%21+%23+%24+%26+%28+%29+%2A+%2B+%2C+%2F+%3A+%3B+%3D+%3F+%40+%5B+%5D+%25"}
335336
));
336337

0 commit comments

Comments
 (0)