@@ -1166,6 +1166,10 @@ pub unsafe fn write_volatile<T>(dst: *mut T, src: T) {
11661166/// Any questions go to @nagisa.
11671167#[ lang = "align_offset" ]
11681168pub ( crate ) unsafe fn align_offset < T : Sized > ( p : * const T , a : usize ) -> usize {
1169+ // FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
1170+ // 1, where the method versions of these operations are not inlined.
1171+ use intrinsics:: { unchecked_shl, unchecked_shr, unchecked_sub, wrapping_mul, wrapping_sub} ;
1172+
11691173 /// Calculate multiplicative modular inverse of `x` modulo `m`.
11701174 ///
11711175 /// This implementation is tailored for align_offset and has following preconditions:
@@ -1175,7 +1179,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
11751179 ///
11761180 /// Implementation of this function shall not panic. Ever.
11771181 #[ inline]
1178- fn mod_inv ( x : usize , m : usize ) -> usize {
1182+ unsafe fn mod_inv ( x : usize , m : usize ) -> usize {
11791183 /// Multiplicative modular inverse table modulo 2⁴ = 16.
11801184 ///
11811185 /// Note, that this table does not contain values where inverse does not exist (i.e., for
@@ -1187,8 +1191,10 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
11871191 const INV_TABLE_MOD_SQUARED : usize = INV_TABLE_MOD * INV_TABLE_MOD ;
11881192
11891193 let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] as usize ;
1194+ // SAFETY: `m` is required to be a power-of-two, hence non-zero.
1195+ let m_minus_one = unsafe { unchecked_sub ( m, 1 ) } ;
11901196 if m <= INV_TABLE_MOD {
1191- table_inverse & ( m - 1 )
1197+ table_inverse & m_minus_one
11921198 } else {
11931199 // We iterate "up" using the following formula:
11941200 //
@@ -1204,17 +1210,18 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
12041210 // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod
12051211 // usize::MAX` instead, because we take the result `mod n` at the end
12061212 // anyway.
1207- inverse = inverse . wrapping_mul ( 2usize . wrapping_sub ( x . wrapping_mul ( inverse) ) ) ;
1213+ inverse = wrapping_mul ( inverse , wrapping_sub ( 2usize , wrapping_mul ( x , inverse) ) ) ;
12081214 if going_mod >= m {
1209- return inverse & ( m - 1 ) ;
1215+ return inverse & m_minus_one ;
12101216 }
1211- going_mod = going_mod . wrapping_mul ( going_mod) ;
1217+ going_mod = wrapping_mul ( going_mod , going_mod) ;
12121218 }
12131219 }
12141220 }
12151221
12161222 let stride = mem:: size_of :: < T > ( ) ;
1217- let a_minus_one = a. wrapping_sub ( 1 ) ;
1223+ // SAFETY: `a` is a power-of-two, hence non-zero.
1224+ let a_minus_one = unsafe { unchecked_sub ( a, 1 ) } ;
12181225 let pmoda = p as usize & a_minus_one;
12191226
12201227 if pmoda == 0 {
@@ -1228,25 +1235,27 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
12281235 // elements will ever align the pointer.
12291236 !0
12301237 } else {
1231- a . wrapping_sub ( pmoda)
1238+ wrapping_sub ( a , pmoda)
12321239 } ;
12331240 }
12341241
12351242 let smoda = stride & a_minus_one;
1236- // SAFETY: a is power-of-two so cannot be 0 . stride = 0 is handled above.
1243+ // SAFETY: a is power-of-two hence non-zero . stride == 0 case is handled above.
12371244 let gcdpow = unsafe { intrinsics:: cttz_nonzero ( stride) . min ( intrinsics:: cttz_nonzero ( a) ) } ;
1238- let gcd = 1usize << gcdpow;
1245+ // SAFETY: gcdpow has an upper-bound that’s at most the number of bits in an usize.
1246+ let gcd = unsafe { unchecked_shl ( 1usize , gcdpow) } ;
12391247
1240- if p as usize & ( gcd. wrapping_sub ( 1 ) ) == 0 {
1248+ // SAFETY: gcd is always greater or equal to 1.
1249+ if p as usize & unsafe { unchecked_sub ( gcd, 1 ) } == 0 {
12411250 // This branch solves for the following linear congruence equation:
12421251 //
12431252 // ` p + so = 0 mod a `
12441253 //
12451254 // `p` here is the pointer value, `s` - stride of `T`, `o` offset in `T`s, and `a` - the
12461255 // requested alignment.
12471256 //
1248- // With `g = gcd(a, s)`, and the above asserting that `p` is also divisible by `g`, we can
1249- // denote `a' = a/g`, `s' = s/g`, `p' = p/g`, then this becomes equivalent to:
1257+ // With `g = gcd(a, s)`, and the above condition asserting that `p` is also divisible by
1258+ // `g`, we can denote `a' = a/g`, `s' = s/g`, `p' = p/g`, then this becomes equivalent to:
12501259 //
12511260 // ` p' + s'o = 0 mod a' `
12521261 // ` o = (a' - (p' mod a')) * (s'^-1 mod a') `
@@ -1259,11 +1268,23 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
12591268 //
12601269 // Furthermore, the result produced by this solution is not "minimal", so it is necessary
12611270 // to take the result `o mod lcm(s, a)`. We can replace `lcm(s, a)` with just a `a'`.
1262- let a2 = a >> gcdpow;
1263- let a2minus1 = a2. wrapping_sub ( 1 ) ;
1264- let s2 = smoda >> gcdpow;
1265- let minusp2 = a2. wrapping_sub ( pmoda >> gcdpow) ;
1266- return ( minusp2. wrapping_mul ( mod_inv ( s2, a2) ) ) & a2minus1;
1271+
1272+ // SAFETY: `gcdpow` has an upper-bound not greater than the number of trailing 0-bits in
1273+ // `a`.
1274+ let a2 = unsafe { unchecked_shr ( a, gcdpow) } ;
1275+ // SAFETY: `a2` is non-zero. Shifting `a` by `gcdpow` cannot shift out any of the set bits
1276+ // in `a` (of which it has exactly one).
1277+ let a2minus1 = unsafe { unchecked_sub ( a2, 1 ) } ;
1278+ // SAFETY: `gcdpow` has an upper-bound not greater than the number of trailing 0-bits in
1279+ // `a`.
1280+ let s2 = unsafe { unchecked_shr ( smoda, gcdpow) } ;
1281+ // SAFETY: `gcdpow` has an upper-bound not greater than the number of trailing 0-bits in
1282+ // `a`. Furthermore, the subtraction cannot overflow, because `a2 = a >> gcdpow` will
1283+ // always be strictly greater than `(p % a) >> gcdpow`.
1284+ let minusp2 = unsafe { unchecked_sub ( a2, unchecked_shr ( pmoda, gcdpow) ) } ;
1285+ // SAFETY: `a2` is a power-of-two, as proven above. `s2` is strictly less than `a2`
1286+ // because `(s % a) >> gcdpow` is strictly less than `a >> gcdpow`.
1287+ return wrapping_mul ( minusp2, unsafe { mod_inv ( s2, a2) } ) & a2minus1;
12671288 }
12681289
12691290 // Cannot be aligned at all.
0 commit comments