@@ -91,16 +91,20 @@ CXX20_CONSTEXPR fastfloat_really_inline bool is_made_of_eight_digits_fast(const
9191 return is_made_of_eight_digits_fast (read_u64 (chars));
9292}
9393
94+ typedef span<const char > byte_span;
95+
9496struct parsed_number_string {
95- int64_t exponent;
96- uint64_t mantissa;
97- const char *lastmatch;
98- bool negative;
99- bool valid;
100- bool too_many_digits;
97+ int64_t exponent{0 };
98+ uint64_t mantissa{0 };
99+ const char *lastmatch{nullptr };
100+ bool negative{false };
101+ bool valid{false };
102+ bool too_many_digits{false };
103+ // contains the range of the significant digits
104+ byte_span integer{}; // non-nullable
105+ byte_span fraction{}; // nullable
101106};
102107
103-
104108// Assuming that you use no more than 19 digits, this will
105109// parse an ASCII string.
106110CXX20_CONSTEXPR fastfloat_really_inline
@@ -125,6 +129,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
125129
126130 uint64_t i = 0 ; // an unsigned int avoids signed overflows (which are bad)
127131
132+ while ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
133+ i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
134+ p += 8 ;
135+ }
128136 while ((p != pend) && is_integer (*p)) {
129137 // a multiplication by 10 is cheaper than an arbitrary integer
130138 // multiplication
@@ -134,24 +142,24 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
134142 }
135143 const char *const end_of_integer_part = p;
136144 int64_t digit_count = int64_t (end_of_integer_part - start_digits);
145+ answer.integer = byte_span (start_digits, size_t (digit_count));
137146 int64_t exponent = 0 ;
138147 if ((p != pend) && (*p == decimal_point)) {
139148 ++p;
140- // Fast approach only tested under little endian systems
141- if ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
142- i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
143- p += 8 ;
144- if ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
149+ const char * before = p;
150+ // can occur at most twice without overflowing, but let it occur more, since
151+ // for integers with many digits, digit parsing is the primary bottleneck.
152+ while ((std::distance (p, pend) >= 8 ) && is_made_of_eight_digits_fast (p)) {
145153 i = i * 100000000 + parse_eight_digits_unrolled (p); // in rare cases, this will overflow, but that's ok
146154 p += 8 ;
147155 }
148- }
149156 while ((p != pend) && is_integer (*p)) {
150157 uint8_t digit = uint8_t (*p - ' 0' );
151158 ++p;
152159 i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
153160 }
154- exponent = end_of_integer_part + 1 - p;
161+ exponent = before - p;
162+ answer.fraction = byte_span (before, size_t (p - before));
155163 digit_count -= exponent;
156164 }
157165 // we must have encountered at least one integer!
@@ -179,7 +187,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
179187 } else {
180188 while ((p != pend) && is_integer (*p)) {
181189 uint8_t digit = uint8_t (*p - ' 0' );
182- if (exp_number < 0x10000 ) {
190+ if (exp_number < 0x10000000 ) {
183191 exp_number = 10 * exp_number + digit;
184192 }
185193 ++p;
@@ -212,23 +220,26 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
212220 if (digit_count > 19 ) {
213221 answer.too_many_digits = true ;
214222 // Let us start again, this time, avoiding overflows.
223+ // We don't need to check if is_integer, since we use the
224+ // pre-tokenized spans from above.
215225 i = 0 ;
216- p = start_digits;
226+ p = answer.integer .ptr ;
227+ const char * int_end = p + answer.integer .len ();
217228 const uint64_t minimal_nineteen_digit_integer{1000000000000000000 };
218- while ((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer (*p )) {
229+ while ((i < minimal_nineteen_digit_integer) && (p != int_end )) {
219230 i = i * 10 + uint64_t (*p - ' 0' );
220231 ++p;
221232 }
222233 if (i >= minimal_nineteen_digit_integer) { // We have a big integers
223234 exponent = end_of_integer_part - p + exp_number;
224235 } else { // We have a value with a fractional component.
225- p++; // skip the dot
226- const char *first_after_period = p;
227- while ((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer (*p )) {
236+ p = answer. fraction . ptr ;
237+ const char * frac_end = p + answer. fraction . len () ;
238+ while ((i < minimal_nineteen_digit_integer) && (p != frac_end )) {
228239 i = i * 10 + uint64_t (*p - ' 0' );
229240 ++p;
230241 }
231- exponent = first_after_period - p + exp_number;
242+ exponent = answer. fraction . ptr - p + exp_number;
232243 }
233244 // We have now corrected both exponent and i, to a truncated value
234245 }
@@ -238,108 +249,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
238249 return answer;
239250}
240251
241-
242- // This should always succeed since it follows a call to parse_number_string
243- // This function could be optimized. In particular, we could stop after 19 digits
244- // and try to bail out. Furthermore, we should be able to recover the computed
245- // exponent from the pass in parse_number_string.
246- CXX20_CONSTEXPR fastfloat_really_inline decimal parse_decimal (const char *p, const char *pend, parse_options options) noexcept {
247- const char decimal_point = options.decimal_point ;
248-
249- decimal answer;
250- answer.num_digits = 0 ;
251- answer.decimal_point = 0 ;
252- answer.truncated = false ;
253- answer.negative = (*p == ' -' );
254- if (*p == ' -' ) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
255- ++p;
256- }
257- // skip leading zeroes
258- while ((p != pend) && (*p == ' 0' )) {
259- ++p;
260- }
261- while ((p != pend) && is_integer (*p)) {
262- if (answer.num_digits < max_digits) {
263- answer.digits [answer.num_digits ] = uint8_t (*p - ' 0' );
264- }
265- answer.num_digits ++;
266- ++p;
267- }
268- if ((p != pend) && (*p == decimal_point)) {
269- ++p;
270- const char *first_after_period = p;
271- // if we have not yet encountered a zero, we have to skip it as well
272- if (answer.num_digits == 0 ) {
273- // skip zeros
274- while ((p != pend) && (*p == ' 0' )) {
275- ++p;
276- }
277- }
278- // We expect that this loop will often take the bulk of the running time
279- // because when a value has lots of digits, these digits often
280- while ((std::distance (p, pend) >= 8 ) && (answer.num_digits + 8 < max_digits)) {
281- uint64_t val = read_u64 (p);
282- if (! is_made_of_eight_digits_fast (val)) { break ; }
283- // We have eight digits, process them in one go!
284- val -= 0x3030303030303030 ;
285- write_u64 (answer.digits + answer.num_digits , val);
286- answer.num_digits += 8 ;
287- p += 8 ;
288- }
289- while ((p != pend) && is_integer (*p)) {
290- if (answer.num_digits < max_digits) {
291- answer.digits [answer.num_digits ] = uint8_t (*p - ' 0' );
292- }
293- answer.num_digits ++;
294- ++p;
295- }
296- answer.decimal_point = int32_t (first_after_period - p);
297- }
298- // We want num_digits to be the number of significant digits, excluding
299- // leading *and* trailing zeros! Otherwise the truncated flag later is
300- // going to be misleading.
301- if (answer.num_digits > 0 ) {
302- // We potentially need the answer.num_digits > 0 guard because we
303- // prune leading zeros. So with answer.num_digits > 0, we know that
304- // we have at least one non-zero digit.
305- const char *preverse = p - 1 ;
306- int32_t trailing_zeros = 0 ;
307- while ((*preverse == ' 0' ) || (*preverse == decimal_point)) {
308- if (*preverse == ' 0' ) { trailing_zeros++; };
309- --preverse;
310- }
311- answer.decimal_point += int32_t (answer.num_digits );
312- answer.num_digits -= uint32_t (trailing_zeros);
313- }
314- if (answer.num_digits > max_digits) {
315- answer.truncated = true ;
316- answer.num_digits = max_digits;
317- }
318- if ((p != pend) && ((' e' == *p) || (' E' == *p))) {
319- ++p;
320- bool neg_exp = false ;
321- if ((p != pend) && (' -' == *p)) {
322- neg_exp = true ;
323- ++p;
324- } else if ((p != pend) && (' +' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
325- ++p;
326- }
327- int32_t exp_number = 0 ; // exponential part
328- while ((p != pend) && is_integer (*p)) {
329- uint8_t digit = uint8_t (*p - ' 0' );
330- if (exp_number < 0x10000 ) {
331- exp_number = 10 * exp_number + digit;
332- }
333- ++p;
334- }
335- answer.decimal_point += (neg_exp ? -exp_number : exp_number);
336- }
337- // In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
338- // assume that all digits up to max_digit_without_overflow have been initialized.
339- for (uint32_t i = answer.num_digits ; i < max_digit_without_overflow; i++) { answer.digits [i] = 0 ; }
340-
341- return answer;
342- }
343252} // namespace fast_float
344253
345254#endif
0 commit comments