@@ -2336,13 +2336,13 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
23362336 ///
23372337 /// Note, that this table does not contain values where inverse does not exist (i.e. for
23382338 /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2339- const INV_TABLE_MOD_16 : [ usize ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2339+ const INV_TABLE_MOD_16 : [ u8 ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
23402340 /// Modulo for which the `INV_TABLE_MOD_16` is intended.
23412341 const INV_TABLE_MOD : usize = 16 ;
23422342 /// INV_TABLE_MOD²
23432343 const INV_TABLE_MOD_SQUARED : usize = INV_TABLE_MOD * INV_TABLE_MOD ;
23442344
2345- let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] ;
2345+ let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] as usize ;
23462346 if m <= INV_TABLE_MOD {
23472347 table_inverse & ( m - 1 )
23482348 } else {
@@ -2395,36 +2395,23 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
23952395 let gcdpow = intrinsics:: cttz_nonzero ( stride) . min ( intrinsics:: cttz_nonzero ( a) ) ;
23962396 let gcd = 1usize << gcdpow;
23972397
2398- if gcd == 1 {
2399- // This branch solves for the variable $o$ in following linear congruence equation:
2400- //
2401- // ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2402- // ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2403- //
2404- // where
2398+ if p as usize & ( gcd - 1 ) == 0 {
2399+ // This branch solves for the following linear congruence equation:
24052400 //
2406- // * a, s are co-prime
2401+ // $$ p + so ≡ 0 mod a $$
24072402 //
2408- // This gives us the formula below:
2403+ // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the
2404+ // requested alignment.
24092405 //
2410- // o = (a - (p mod a)) * (s⁻¹ mod a) * s
2406+ // g = gcd(a, s)
2407+ // o = (a - (p mod a))/g * ((s/g)⁻¹ mod a)
24112408 //
24122409 // The first term is “the relative alignment of p to a”, the second term is “how does
2413- // incrementing p by one s change the relative alignment of p”, the third term is
2414- // translating change in units of s to a byte count .
2410+ // incrementing p by s bytes change the relative alignment of p”. Division by `g` is
2411+ // necessary to make this equation well formed if $a$ and $s$ are not co-prime .
24152412 //
24162413 // Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2417- // to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2418- // 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2419- //
2420- // (Author note: we decided later on to express the offset in "elements" rather than bytes,
2421- // which drops the multiplication by `s` on both sides of the modulo.)
2422- return intrinsics:: unchecked_rem ( a. wrapping_sub ( pmoda) . wrapping_mul ( mod_inv ( smoda, a) ) , a) ;
2423- }
2424-
2425- if p as usize & ( gcd - 1 ) == 0 {
2426- // This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2427- // formula is used.
2414+ // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a / g$.
24282415 let j = a. wrapping_sub ( pmoda) >> gcdpow;
24292416 let k = smoda >> gcdpow;
24302417 return intrinsics:: unchecked_rem ( j. wrapping_mul ( mod_inv ( k, a) ) , a >> gcdpow) ;
0 commit comments