1- // Copyright (C) 2020-2023 Jonathan Müller and lexy contributors
1+ // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
22// SPDX-License-Identifier: BSL-1.0
33
44#ifndef LEXY_DETAIL_CODE_POINT_HPP_INCLUDED
@@ -133,9 +133,9 @@ enum class cp_error
133133template <typename Reader>
134134struct cp_result
135135{
136- char32_t cp;
137- cp_error error;
138- typename Reader::iterator end;
136+ char32_t cp;
137+ cp_error error;
138+ typename Reader::marker end;
139139};
140140
141141template <typename Reader>
@@ -144,16 +144,16 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
144144 if constexpr (std::is_same_v<typename Reader::encoding, lexy::ascii_encoding>)
145145 {
146146 if (reader.peek () == Reader::encoding::eof ())
147- return {{}, cp_error::eof, reader.position ()};
147+ return {{}, cp_error::eof, reader.current ()};
148148
149149 auto cur = reader.peek ();
150150 reader.bump ();
151151
152152 auto cp = static_cast <char32_t >(cur);
153153 if (cp <= 0x7F )
154- return {cp, cp_error::success, reader.position ()};
154+ return {cp, cp_error::success, reader.current ()};
155155 else
156- return {cp, cp_error::out_of_range, reader.position ()};
156+ return {cp, cp_error::out_of_range, reader.current ()};
157157 }
158158 else if constexpr (std::is_same_v<typename Reader::encoding, lexy::utf8_encoding> //
159159 || std::is_same_v<typename Reader::encoding, lexy::utf8_char_encoding>)
@@ -176,19 +176,19 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
176176 {
177177 // ASCII character.
178178 reader.bump ();
179- return {first, cp_error::success, reader.position ()};
179+ return {first, cp_error::success, reader.current ()};
180180 }
181181 else if ((first & ~payload_cont) == pattern_cont)
182182 {
183- return {{}, cp_error::leads_with_trailing, reader.position ()};
183+ return {{}, cp_error::leads_with_trailing, reader.current ()};
184184 }
185185 else if ((first & ~payload_lead2) == pattern_lead2)
186186 {
187187 reader.bump ();
188188
189189 auto second = uchar_t (reader.peek ());
190190 if ((second & ~payload_cont) != pattern_cont)
191- return {{}, cp_error::missing_trailing, reader.position ()};
191+ return {{}, cp_error::missing_trailing, reader.current ()};
192192 reader.bump ();
193193
194194 auto result = char32_t (first & payload_lead2);
@@ -197,22 +197,22 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
197197
198198 // C0 and C1 are overlong ASCII.
199199 if (first == 0xC0 || first == 0xC1 )
200- return {result, cp_error::overlong_sequence, reader.position ()};
200+ return {result, cp_error::overlong_sequence, reader.current ()};
201201 else
202- return {result, cp_error::success, reader.position ()};
202+ return {result, cp_error::success, reader.current ()};
203203 }
204204 else if ((first & ~payload_lead3) == pattern_lead3)
205205 {
206206 reader.bump ();
207207
208208 auto second = uchar_t (reader.peek ());
209209 if ((second & ~payload_cont) != pattern_cont)
210- return {{}, cp_error::missing_trailing, reader.position ()};
210+ return {{}, cp_error::missing_trailing, reader.current ()};
211211 reader.bump ();
212212
213213 auto third = uchar_t (reader.peek ());
214214 if ((third & ~payload_cont) != pattern_cont)
215- return {{}, cp_error::missing_trailing, reader.position ()};
215+ return {{}, cp_error::missing_trailing, reader.current ()};
216216 reader.bump ();
217217
218218 auto result = char32_t (first & payload_lead3);
@@ -223,29 +223,29 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
223223
224224 auto cp = result;
225225 if (0xD800 <= cp && cp <= 0xDFFF )
226- return {cp, cp_error::surrogate, reader.position ()};
226+ return {cp, cp_error::surrogate, reader.current ()};
227227 else if (first == 0xE0 && second < 0xA0 )
228- return {cp, cp_error::overlong_sequence, reader.position ()};
228+ return {cp, cp_error::overlong_sequence, reader.current ()};
229229 else
230- return {cp, cp_error::success, reader.position ()};
230+ return {cp, cp_error::success, reader.current ()};
231231 }
232232 else if ((first & ~payload_lead4) == pattern_lead4)
233233 {
234234 reader.bump ();
235235
236236 auto second = uchar_t (reader.peek ());
237237 if ((second & ~payload_cont) != pattern_cont)
238- return {{}, cp_error::missing_trailing, reader.position ()};
238+ return {{}, cp_error::missing_trailing, reader.current ()};
239239 reader.bump ();
240240
241241 auto third = uchar_t (reader.peek ());
242242 if ((third & ~payload_cont) != pattern_cont)
243- return {{}, cp_error::missing_trailing, reader.position ()};
243+ return {{}, cp_error::missing_trailing, reader.current ()};
244244 reader.bump ();
245245
246246 auto fourth = uchar_t (reader.peek ());
247247 if ((fourth & ~payload_cont) != pattern_cont)
248- return {{}, cp_error::missing_trailing, reader.position ()};
248+ return {{}, cp_error::missing_trailing, reader.current ()};
249249 reader.bump ();
250250
251251 auto result = char32_t (first & payload_lead4);
@@ -258,15 +258,15 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
258258
259259 auto cp = result;
260260 if (cp > 0x10'FFFF )
261- return {cp, cp_error::out_of_range, reader.position ()};
261+ return {cp, cp_error::out_of_range, reader.current ()};
262262 else if (first == 0xF0 && second < 0x90 )
263- return {cp, cp_error::overlong_sequence, reader.position ()};
263+ return {cp, cp_error::overlong_sequence, reader.current ()};
264264 else
265- return {cp, cp_error::success, reader.position ()};
265+ return {cp, cp_error::success, reader.current ()};
266266 }
267267 else // FE or FF
268268 {
269- return {{}, cp_error::eof, reader.position ()};
269+ return {{}, cp_error::eof, reader.current ()};
270270 }
271271 }
272272 else if constexpr (std::is_same_v<typename Reader::encoding, lexy::utf16_encoding>)
@@ -278,53 +278,53 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
278278 constexpr auto pattern2 = 0b110111 << 10 ;
279279
280280 if (reader.peek () == Reader::encoding::eof ())
281- return {{}, cp_error::eof, reader.position ()};
281+ return {{}, cp_error::eof, reader.current ()};
282282
283283 auto first = char16_t (reader.peek ());
284284 if ((first & ~payload1) == pattern1)
285285 {
286286 reader.bump ();
287287 if (reader.peek () == Reader::encoding::eof ())
288- return {{}, cp_error::missing_trailing, reader.position ()};
288+ return {{}, cp_error::missing_trailing, reader.current ()};
289289
290290 auto second = char16_t (reader.peek ());
291291 if ((second & ~payload2) != pattern2)
292- return {{}, cp_error::missing_trailing, reader.position ()};
292+ return {{}, cp_error::missing_trailing, reader.current ()};
293293 reader.bump ();
294294
295295 // We've got a valid code point.
296296 auto result = char32_t (first & payload1);
297297 result <<= 10 ;
298298 result |= char32_t (second & payload2);
299299 result |= 0x10000 ;
300- return {result, cp_error::success, reader.position ()};
300+ return {result, cp_error::success, reader.current ()};
301301 }
302302 else if ((first & ~payload2) == pattern2)
303303 {
304- return {{}, cp_error::leads_with_trailing, reader.position ()};
304+ return {{}, cp_error::leads_with_trailing, reader.current ()};
305305 }
306306 else
307307 {
308308 // Single code unit code point; always valid.
309309 reader.bump ();
310- return {first, cp_error::success, reader.position ()};
310+ return {first, cp_error::success, reader.current ()};
311311 }
312312 }
313313 else if constexpr (std::is_same_v<typename Reader::encoding, lexy::utf32_encoding>)
314314 {
315315 if (reader.peek () == Reader::encoding::eof ())
316- return {{}, cp_error::eof, reader.position ()};
316+ return {{}, cp_error::eof, reader.current ()};
317317
318318 auto cur = reader.peek ();
319319 reader.bump ();
320320
321321 auto cp = cur;
322322 if (cp > 0x10'FFFF )
323- return {cp, cp_error::out_of_range, reader.position ()};
323+ return {cp, cp_error::out_of_range, reader.current ()};
324324 else if (0xD800 <= cp && cp <= 0xDFFF )
325- return {cp, cp_error::surrogate, reader.position ()};
325+ return {cp, cp_error::surrogate, reader.current ()};
326326 else
327- return {cp, cp_error::success, reader.position ()};
327+ return {cp, cp_error::success, reader.current ()};
328328 }
329329 else
330330 {
@@ -341,15 +341,15 @@ constexpr void recover_code_point(Reader& reader, cp_result<Reader> result)
341341 {
342342 case cp_error::success:
343343 // Consume the entire code point.
344- reader.set_position (result.end );
344+ reader.reset (result.end );
345345 break ;
346346 case cp_error::eof:
347347 // We don't need to do anything to "recover" from EOF.
348348 break ;
349349
350350 case cp_error::leads_with_trailing:
351351 // Invalid code unit, consume to recover.
352- LEXY_PRECONDITION (result.end == reader.position ());
352+ LEXY_PRECONDITION (result.end . position () == reader.position ());
353353 reader.bump ();
354354 break ;
355355
@@ -358,7 +358,7 @@ constexpr void recover_code_point(Reader& reader, cp_result<Reader> result)
358358 case cp_error::out_of_range:
359359 case cp_error::overlong_sequence:
360360 // Consume all the invalid code units to recover.
361- reader.set_position (result.end );
361+ reader.reset (result.end );
362362 break ;
363363 }
364364}
0 commit comments