Skip to content

Commit bb1677c

Browse files
authored
Merge pull request #1 from aldanor/feature/bigints-optimization
Big ints / long floats optimization
2 parents 75bedd8 + 73ab635 commit bb1677c

File tree

5 files changed

+75
-79
lines changed

5 files changed

+75
-79
lines changed

extras/data-tests/src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ impl TestCase {
3030
dbg!(self);
3131
eprintln!("Failed to parse as f32: {:?}", self.string);
3232
}
33-
let (value, rest) = r.unwrap();
34-
if !rest.is_empty() || value != expected {
35-
if !rest.is_empty() {
36-
eprintln!("Expected empty string remainder, got: {:?}", rest);
33+
let (value, len) = r.unwrap();
34+
if len != s.len() || value != expected {
35+
if len != s.len() {
36+
eprintln!("Expected empty string remainder, got: {:?}", s.len() - len);
3737
}
3838
if value != expected {
3939
eprintln!("Expected output {}, got {}", expected, value);

src/decimal.rs

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,51 +32,6 @@ impl Decimal {
3232
parse_decimal(s)
3333
}
3434

35-
#[inline]
36-
pub fn to_truncated_mantissa(&self) -> u64 {
37-
let mut mantissa = 0;
38-
if cfg!(target_endian = "big") {
39-
for i in 0..Self::MAX_DIGITS_WITHOUT_OVERFLOW {
40-
mantissa = mantissa * 10 + self.digits[i] as u64;
41-
}
42-
} else {
43-
let mut val = self.digits.read_u64();
44-
val = val.wrapping_mul(2561) >> 8;
45-
val = (val & 0x00FF00FF00FF00FF).wrapping_mul(6553601) >> 16;
46-
mantissa =
47-
(((val & 0x0000FFFF0000FFFF).wrapping_mul(42949672960001) >> 32) as u32) as u64;
48-
let mut val = self.digits[8..].read_u64();
49-
val = val.wrapping_mul(2561) >> 8;
50-
val = (val & 0x00FF00FF00FF00FF).wrapping_mul(6553601) >> 16;
51-
let eight_digits_value =
52-
(((val & 0x0000FFFF0000FFFF).wrapping_mul(42949672960001) >> 32) as u32) as u64;
53-
mantissa = 100000000 * mantissa + eight_digits_value;
54-
for i in 16..Self::MAX_DIGITS_WITHOUT_OVERFLOW {
55-
mantissa = mantissa * 10 + self.digits[i] as u64;
56-
}
57-
if false {
58-
let mut val = self.digits.read_u64();
59-
val = val * 2561 >> 8;
60-
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
61-
mantissa = (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32;
62-
let mut val = self.digits[8..].read_u64();
63-
val = val * 2561 >> 8;
64-
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
65-
let eight_digits_value = (val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32;
66-
mantissa = 100000000 * mantissa + eight_digits_value;
67-
for i in 16..Self::MAX_DIGITS_WITHOUT_OVERFLOW {
68-
mantissa = mantissa * 10 + self.digits[i] as u64;
69-
}
70-
}
71-
}
72-
mantissa
73-
}
74-
75-
#[inline]
76-
pub fn to_truncated_exponent(&self) -> i32 {
77-
self.decimal_point - Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32
78-
}
79-
8035
#[inline]
8136
pub fn try_add_digit(&mut self, digit: u8) {
8237
if self.num_digits < Self::MAX_DIGITS {

src/number.rs

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@ use crate::common::{is_8digits_le, AsciiStr, ByteSlice};
22
use crate::float::Float;
33
use crate::format::FloatFormat;
44

5+
const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
6+
57
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
68
pub struct Number {
79
pub exponent: i64,
810
pub mantissa: u64,
911
pub negative: bool,
12+
pub many_digits: bool,
1013
}
1114

1215
impl Number {
@@ -15,6 +18,7 @@ impl Number {
1518
F::MIN_EXPONENT_FAST_PATH <= self.exponent
1619
&& self.exponent <= F::MAX_EXPONENT_FAST_PATH
1720
&& self.mantissa <= F::MAX_MANTISSA_FAST_PATH
21+
&& !self.many_digits
1822
}
1923

2024
#[inline]
@@ -56,6 +60,15 @@ fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) {
5660
});
5761
}
5862

63+
#[inline]
64+
fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
65+
while *x < MIN_19DIGIT_INT && !s.is_empty() && s.first().is_ascii_digit() {
66+
let digit = s.first() - b'0';
67+
*x = (*x * 10) + digit as u64; // no overflows here
68+
s.step();
69+
}
70+
}
71+
5972
#[inline]
6073
fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize {
6174
// may cause overflows, to be handled later
@@ -135,6 +148,7 @@ pub fn parse_number(s: &[u8], fmt: FloatFormat) -> Option<(Number, usize)> {
135148
// handle dot with the following digits
136149
let mut n_after_dot = 0;
137150
let mut exponent = 0i64;
151+
let int_end = s;
138152
if s.check_first(b'.') {
139153
s.step();
140154
let before = s;
@@ -150,33 +164,64 @@ pub fn parse_number(s: &[u8], fmt: FloatFormat) -> Option<(Number, usize)> {
150164
}
151165

152166
// handle scientific format
167+
let mut exp_number = 0i64;
153168
if fmt.scientific {
154169
if s.check_first_either(b'e', b'E') {
155-
parse_scientific(&mut s, &mut exponent, fmt.fixed)?;
170+
parse_scientific(&mut s, &mut exp_number, fmt.fixed)?;
171+
exponent += exp_number;
156172
} else if !fmt.fixed {
157173
return None; // error: scientific and not fixed
158174
}
159175
}
160176

177+
let len = s.offset_from(&start) as _;
178+
161179
// handle uncommon case with many digits
162180
n_digits -= 19;
181+
if n_digits <= 0 {
182+
return Some((
183+
Number {
184+
exponent,
185+
mantissa,
186+
negative,
187+
many_digits: false,
188+
},
189+
len,
190+
));
191+
}
192+
193+
let mut many_digits = false;
194+
let mut p = digits_start;
195+
while p.check_first_either(b'0', b'.') {
196+
n_digits -= p.first().saturating_sub(b'0' - 1) as isize; // '0' = b'.' + 2
197+
p.step();
198+
}
163199
if n_digits > 0 {
164-
let mut p = digits_start;
165-
while p.check_first_either(b'0', b'.') {
166-
n_digits -= p.first().saturating_sub(b'0' - 1) as isize; // '0' = b'.' + 2
167-
p.step();
168-
}
169-
if n_digits > 0 {
170-
mantissa = u64::MAX;
171-
}
200+
// at this point we have more than 19 significant digits, let's try again
201+
many_digits = true;
202+
mantissa = 0u64;
203+
let mut s = digits_start;
204+
try_parse_19digits(&mut s, &mut mantissa);
205+
exponent = if mantissa >= MIN_19DIGIT_INT {
206+
int_end.offset_from(&s) // big int
207+
} else {
208+
s.step(); // fractional component, skip the '.'
209+
let before = s;
210+
try_parse_19digits(&mut s, &mut mantissa);
211+
-s.offset_from(&before)
212+
} as i64;
213+
exponent += exp_number; // add back the explicit part
172214
}
173215

174-
let number = Number {
175-
exponent,
176-
mantissa,
177-
negative,
178-
};
179-
Some((number, s.offset_from(&start) as usize))
216+
Some((
217+
Number {
218+
exponent,
219+
mantissa,
220+
negative,
221+
many_digits,
222+
},
223+
len,
224+
))
180225
}
181226

182227
#[inline]

src/parse.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,25 @@ pub fn parse_float_fmt<F: Float>(mut s: &[u8], fmt: FloatFormat) -> Option<(F, u
1313
if s.is_empty() {
1414
return None;
1515
}
16+
1617
let (num, rest) = match parse_number(s, fmt) {
1718
Some(r) => r,
1819
None => return parse_inf_nan(s),
1920
};
2021
if let Some(value) = num.try_fast_path::<F>() {
2122
return Some((value, rest));
2223
}
23-
let mut am = if num.mantissa == u64::MAX {
24-
parse_long_mantissa::<F>(s)
25-
} else {
26-
compute_float_from_exp_mantissa::<F>(num.exponent, num.mantissa)
27-
};
24+
25+
let mut am = compute_float_from_exp_mantissa::<F>(num.exponent, num.mantissa);
26+
if num.many_digits {
27+
if am != compute_float_from_exp_mantissa::<F>(num.exponent, num.mantissa + 1) {
28+
am.power2 = -1;
29+
}
30+
}
2831
if am.power2 < 0 {
2932
am = parse_long_mantissa::<F>(s);
3033
}
34+
3135
let mut word = am.mantissa;
3236
word |= (am.power2 as u64) << F::MANTISSA_EXPLICIT_BITS;
3337
if num.negative {
@@ -40,6 +44,7 @@ pub fn parse_float_fmt<F: Float>(mut s: &[u8], fmt: FloatFormat) -> Option<(F, u
4044
*(&word as *const _ as *const F)
4145
}
4246
};
47+
4348
Some((value, rest))
4449
}
4550

src/simple.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,11 @@
1-
use crate::binary::compute_float_from_exp_mantissa;
21
use crate::common::AdjustedMantissa;
32
use crate::decimal::Decimal;
43
use crate::float::Float;
54

65
#[inline]
76
pub fn parse_long_mantissa<F: Float>(s: &[u8]) -> AdjustedMantissa {
87
let mut d = Decimal::parse(s);
9-
let mantissa = d.to_truncated_mantissa();
10-
let exponent = d.to_truncated_exponent() as i64;
11-
let am1 = compute_float_from_exp_mantissa::<F>(exponent, mantissa);
12-
let am2 = compute_float_from_exp_mantissa::<F>(exponent, mantissa + 1);
13-
if am1 == am2 && am1.power2 >= 0 {
14-
am1
15-
} else {
16-
compute_float_from_decimal::<F>(&mut d)
17-
}
8+
compute_float_from_decimal::<F>(&mut d)
189
}
1910

2011
#[inline]

0 commit comments

Comments
 (0)