@@ -55,6 +55,7 @@ namespace
5555void grapheme_line_segmenter::reset (std::string_view buffer) noexcept
5656{
5757 _buffer = buffer;
58+ _next = buffer.data ();
5859
5960 _utf8 = {};
6061 _lastCodepointHint = 0 ;
@@ -70,7 +71,7 @@ void grapheme_line_segmenter::move_forward_to(char const* pos) noexcept
7071{
7172 assert (_buffer.data () <= pos && pos <= _buffer.data () + _buffer.size ());
7273 auto const skippedBytesCount = static_cast <size_t >(pos - _buffer.data ());
73- _buffer. remove_prefix ( skippedBytesCount) ;
74+ _next += skippedBytesCount;
7475 _lastCodepointHint = 0 ;
7576 _currentClusterWidth = 0 ;
7677 _utf8 = {};
@@ -83,7 +84,7 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned m
8384 if (_buffer.empty ())
8485 return result_type { .text = _buffer.substr (0 , 0 ), .width = 0 };
8586
86- char const * start = _buffer. data () ;
87+ char const * start = _next ;
8788 char const * const resultStart = _utf8.expectedLength ? start - _utf8.currentLength : start;
8889
8990 // Number of bytes used in the current line.
@@ -118,17 +119,20 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned m
118119 maxWidth -= count;
119120 totalWidthProcessed += count;
120121 totalByteCountProcessed += count;
121- _buffer. remove_prefix ( count) ;
122+ _next += count;
122123 break ;
123124 }
124125 case State::ComplexUnicode: {
125126 auto const sub = process_complex_unicode (maxWidth);
126- if (sub.width == 0 )
127+ if (sub.graphemeClusterCount == 0 )
128+ {
129+ _next += sub.byteCount ;
127130 return makeResult ();
128- maxWidth -= sub.width ;
129- totalWidthProcessed += sub.width ;
130- totalByteCountProcessed += sub.text .size ();
131- _buffer.remove_prefix (sub.text .size ());
131+ }
132+ maxWidth -= sub.graphemeClusterCount ;
133+ totalWidthProcessed += sub.graphemeClusterCount ;
134+ totalByteCountProcessed += sub.byteCount ;
135+ _next += sub.byteCount ;
132136 break ;
133137 }
134138 }
@@ -137,7 +141,7 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process(unsigned m
137141 return makeResult ();
138142}
139143
140- unsigned grapheme_line_segmenter::process_ascii (unsigned maxWidth) noexcept
144+ unsigned grapheme_line_segmenter::process_ascii (unsigned maxWidth) const noexcept
141145{
142146 auto input = _buffer.data ();
143147 auto const end = _buffer.data () + std::min (static_cast <unsigned >(_buffer.size ()), maxWidth);
@@ -173,20 +177,19 @@ unsigned grapheme_line_segmenter::process_ascii(unsigned maxWidth) noexcept
173177 return static_cast <unsigned >(std::distance (_buffer.data (), input));
174178}
175179
176- grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_unicode (
177- unsigned maxWidth) noexcept
180+ auto grapheme_line_segmenter::process_complex_unicode (unsigned maxWidth) noexcept -> unicode_process_result
178181{
179182 char const * const start = _buffer.data ();
180183 char const * const end = start + _buffer.size ();
181184
182- char const * input = start ; // current input processing position
185+ char const * input = _next ; // current input processing position
183186 char const * clusterStart = start; // start position of current grapheme cluster
184187 char const * lastCodepointStart = start; // start position of last codepoint
185188 unsigned consumedWidth = 0 ; // width consumed for the current line
186189 unsigned currentCodepointLength = 0 ; // bytes consumed for the current codepoint
187190
188191 char const * const lastClusterStart =
189- _utf8.expectedLength ? start - _utf8.currentLength : start ; // start position of last grapheme cluster
192+ _utf8.expectedLength ? input - _utf8.currentLength : input ; // start position of last grapheme cluster
190193
191194 char const * lastClusterEnd = lastClusterStart; // end position of last grapheme cluster
192195
@@ -228,8 +231,7 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_un
228231 {
229232 auto const prevCodepoint = _lastCodepointHint;
230233 auto const nextCodepoint = std::get<Success>(result).value ;
231- auto const nextWidth =
232- std::max (_currentClusterWidth, static_cast <unsigned >(unicode::width (nextCodepoint)));
234+ auto const nextWidth = std::max (_currentClusterWidth, static_cast <unsigned >(unicode::width (nextCodepoint)));
233235 _lastCodepointHint = nextCodepoint;
234236 if (grapheme_segmenter::breakable (prevCodepoint, nextCodepoint))
235237 {
@@ -293,16 +295,15 @@ grapheme_line_segmenter::result_type grapheme_line_segmenter::process_complex_un
293295
294296 _currentClusterWidth = 0 ;
295297
296- // if (currentCodepointLength <= _buffer.size())
297- // _buffer.remove_prefix(currentCodepointLength);
298- // else
299- // abort();
298+ _next = input;
300299
301300 assert (lastClusterStart <= lastClusterEnd);
302301
303302 auto const resultLength = static_cast <size_t >(std::distance (lastClusterStart, lastClusterEnd));
304303 printf (" lastClusterEnd: %p, size: %zu\n " , (void *) lastClusterEnd, resultLength);
305- return result_type { .text = std::string_view (lastClusterStart, resultLength), .width = consumedWidth };
304+ return unicode_process_result { .graphemeClusterCount = consumedWidth, .byteCount = resultLength };
305+ // return result_type { .text = std::string_view(lastClusterStart, resultLength), .width = consumedWidth
306+ // };
306307}
307308
308309ConvertResult grapheme_line_segmenter::process_single_byte (uint8_t byte) noexcept
0 commit comments