|
20 | 20 | #include "src/utils/string.h" |
21 | 21 |
|
22 | 22 |
|
23 | | -namespace modsecurity::actions::transformations { |
24 | | - |
25 | | - |
26 | | -bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { |
27 | | - std::string ret; |
28 | | - unsigned char *input; |
29 | | - int _changed = 0; |
30 | | - char *out; |
31 | | - |
32 | | - input = reinterpret_cast<unsigned char *> |
33 | | - (malloc(sizeof(char) * value.length()+1)); |
| 23 | +constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1; |
| 24 | +constexpr int UNICODE_ERROR_INVALID_ENCODING = -2; |
34 | 25 |
|
35 | | - if (input == NULL) { |
36 | | - return ""; |
37 | | - } |
38 | | - |
39 | | - memcpy(input, value.c_str(), value.length()+1); |
40 | 26 |
|
41 | | - out = inplace(input, value.size() + 1, &_changed); |
42 | | - free(input); |
43 | | - if (out != NULL) { |
44 | | - ret.assign(reinterpret_cast<char *>(out), |
45 | | - strlen(reinterpret_cast<char *>(out))); |
46 | | - free(out); |
47 | | - } |
| 27 | +namespace modsecurity::actions::transformations { |
48 | 28 |
|
49 | | - const auto changed = ret != value; |
50 | | - value = ret; |
51 | | - return changed; |
52 | | -} |
53 | 29 |
|
| 30 | +static inline bool encode(std::string &value) { |
| 31 | + auto input = reinterpret_cast<unsigned char*>(value.data()); |
| 32 | + const auto input_len = value.length(); |
54 | 33 |
|
55 | | -char *Utf8ToUnicode::inplace(unsigned char *input, |
56 | | - uint64_t input_len, int *changed) { |
57 | | - unsigned int count = 0; |
58 | | - char *data; |
59 | | - char *data_orig; |
60 | | - unsigned int i, len, j; |
61 | | - unsigned int bytes_left = input_len; |
| 34 | + bool changed = false; |
| 35 | + std::string::size_type count = 0; |
| 36 | + auto bytes_left = input_len; |
62 | 37 | unsigned char unicode[8]; |
63 | | - *changed = 0; |
64 | 38 |
|
65 | 39 | /* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */ |
66 | 40 | /* Max size per character should fit in 4 bytes */ |
67 | | - len = input_len * 4 + 1; |
68 | | - data = reinterpret_cast<char *>(malloc(sizeof(char) * len)); |
69 | | - if (data == NULL) { |
70 | | - return NULL; |
71 | | - } |
72 | | - data_orig = data; |
| 41 | + const auto len = input_len * 4 + 1; |
| 42 | + std::string ret(len, {}); |
| 43 | + auto data = ret.data(); |
73 | 44 |
|
74 | | - if (input == NULL) { |
75 | | - free(data); |
76 | | - return NULL; |
77 | | - } |
78 | | - |
79 | | - for (i = 0; i < bytes_left;) { |
| 45 | + for (std::string::size_type i = 0; i < bytes_left;) { |
80 | 46 | int unicode_len = 0; |
81 | 47 | unsigned int d = 0; |
82 | 48 | unsigned char c; |
83 | | - unsigned char *utf = (unsigned char *)&input[i]; |
| 49 | + auto utf = &input[i]; |
84 | 50 |
|
85 | 51 | c = *utf; |
86 | 52 |
|
@@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input, |
108 | 74 | unicode_len = UNICODE_ERROR_INVALID_ENCODING; |
109 | 75 | } else { |
110 | 76 | unicode_len = 2; |
111 | | - count+=6; |
| 77 | + count += 6; |
112 | 78 | if (count <= len) { |
113 | 79 | int length = 0; |
114 | 80 | /* compute character number */ |
@@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input, |
138 | 104 | break; |
139 | 105 | } |
140 | 106 |
|
141 | | - for (j = 0; j < length; j++) { |
| 107 | + for (std::string::size_type j = 0; j < length; j++) { |
142 | 108 | *data++ = unicode[j]; |
143 | 109 | } |
144 | 110 |
|
145 | | - *changed = 1; |
| 111 | + changed = true; |
146 | 112 | } |
147 | 113 | } |
148 | 114 | } else if ((c & 0xF0) == 0xE0) { |
@@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input, |
190 | 156 | break; |
191 | 157 | } |
192 | 158 |
|
193 | | - for (j = 0; j < length; j++) { |
| 159 | + for (std::string::size_type j = 0; j < length; j++) { |
194 | 160 | *data++ = unicode[j]; |
195 | 161 | } |
196 | 162 |
|
197 | | - *changed = 1; |
| 163 | + changed = true; |
198 | 164 | } |
199 | 165 | } |
200 | 166 | } else if ((c & 0xF8) == 0xF0) { |
@@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input, |
252 | 218 | break; |
253 | 219 | } |
254 | 220 |
|
255 | | - for (j = 0; j < length; j++) { |
| 221 | + for (std::string::size_type j = 0; j < length; j++) { |
256 | 222 | *data++ = unicode[j]; |
257 | 223 | } |
258 | 224 |
|
259 | | - *changed = 1; |
| 225 | + changed = true; |
260 | 226 | } |
261 | 227 | } |
262 | 228 | } else { |
@@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input, |
300 | 266 |
|
301 | 267 | *data ='\0'; |
302 | 268 |
|
303 | | - return data_orig; |
| 269 | + ret.resize(data - ret.c_str()); |
| 270 | + std::swap(value, ret); |
| 271 | + return changed; |
| 272 | +} |
| 273 | + |
| 274 | + |
| 275 | +bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { |
| 276 | + return encode(value); |
304 | 277 | } |
305 | 278 |
|
306 | 279 |
|
|
0 commit comments