@@ -2336,13 +2336,13 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
2336
2336
///
2337
2337
/// Note, that this table does not contain values where inverse does not exist (i.e. for
2338
2338
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2339
- const INV_TABLE_MOD_16 : [ usize ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2339
+ const INV_TABLE_MOD_16 : [ u8 ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2340
2340
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
2341
2341
const INV_TABLE_MOD : usize = 16 ;
2342
2342
/// INV_TABLE_MOD²
2343
2343
const INV_TABLE_MOD_SQUARED : usize = INV_TABLE_MOD * INV_TABLE_MOD ;
2344
2344
2345
- let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] ;
2345
+ let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] as usize ;
2346
2346
if m <= INV_TABLE_MOD {
2347
2347
table_inverse & ( m - 1 )
2348
2348
} else {
@@ -2395,36 +2395,23 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
2395
2395
let gcdpow = intrinsics:: cttz_nonzero ( stride) . min ( intrinsics:: cttz_nonzero ( a) ) ;
2396
2396
let gcd = 1usize << gcdpow;
2397
2397
2398
- if gcd == 1 {
2399
- // This branch solves for the variable $o$ in following linear congruence equation:
2400
- //
2401
- // ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2402
- // ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2403
- //
2404
- // where
2398
+ if p as usize & ( gcd - 1 ) == 0 {
2399
+ // This branch solves for the following linear congruence equation:
2405
2400
//
2406
- // * a, s are co-prime
2401
+ // $$ p + so ≡ 0 mod a $$
2407
2402
//
2408
- // This gives us the formula below:
2403
+ // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the
2404
+ // requested alignment.
2409
2405
//
2410
- // o = (a - (p mod a)) * (s⁻¹ mod a) * s
2406
+ // g = gcd(a, s)
2407
+ // o = (a - (p mod a))/g * ((s/g)⁻¹ mod a)
2411
2408
//
2412
2409
// The first term is “the relative alignment of p to a”, the second term is “how does
2413
- // incrementing p by one s change the relative alignment of p”, the third term is
2414
- // translating change in units of s to a byte count .
2410
+ // incrementing p by s bytes change the relative alignment of p”. Division by `g` is
2411
+ // necessary to make this equation well formed if $a$ and $s$ are not co-prime .
2415
2412
//
2416
2413
// Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2417
- // to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2418
- // 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2419
- //
2420
- // (Author note: we decided later on to express the offset in "elements" rather than bytes,
2421
- // which drops the multiplication by `s` on both sides of the modulo.)
2422
- return intrinsics:: unchecked_rem ( a. wrapping_sub ( pmoda) . wrapping_mul ( mod_inv ( smoda, a) ) , a) ;
2423
- }
2424
-
2425
- if p as usize & ( gcd - 1 ) == 0 {
2426
- // This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2427
- // formula is used.
2414
+ // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a / g$.
2428
2415
let j = a. wrapping_sub ( pmoda) >> gcdpow;
2429
2416
let k = smoda >> gcdpow;
2430
2417
return intrinsics:: unchecked_rem ( j. wrapping_mul ( mod_inv ( k, a) ) , a >> gcdpow) ;
0 commit comments