Skip to content

Commit ad60b60

Browse files
bors[bot]smarnach
andcommitted
Merge #11
11: Optimise the GCD implementations. r=cuviper a=smarnach This change avoids using `swap()` in the GCD implementation, which results in a speedup of almost a factor of two on my system. Here are a few benchmark numbers for `u32` values, where `t_old` is the time per function call for the current implementation, and `t_new` is the time of the implementation introduced in this change. ``` | m | n | t_old (ns) | t_new (ns) | +---------------+---------------+------------+------------+ | 2_971_215_073 | 1_836_311_903 | 76 | 40 | | 253_241 | 489_997 | 26 | 12 | | 4_183_928_743 | 1_234_567 | 63 | 31 | | 3_221_225_469 | 3 | 93 | 47 | | 0xffff_ffff | 0x8000_0000 | 98 | 55 | ``` I'm aware that having tested this on a single system for a single type doesn't tell us too much, but before trying to perform a more thorough benchmark, I'd like to know whether you are interested in this kind of optimisation in principle, and what kind of benchmark results you would like to see to inlcude it. Co-authored-by: Sven Marnach <[email protected]>
2 parents 65eb4c5 + 1cb1d29 commit ad60b60

File tree

2 files changed

+196
-15
lines changed

2 files changed

+196
-15
lines changed

benches/gcd.rs

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
//! Benchmark comparing the current GCD implemtation against an older one.
2+
3+
#![feature(test)]
4+
5+
extern crate num_integer;
6+
extern crate num_traits;
7+
extern crate test;
8+
9+
use num_integer::Integer;
10+
use num_traits::{AsPrimitive, Bounded, Signed};
11+
use test::{black_box, Bencher};
12+
13+
trait GcdOld: Integer {
14+
fn gcd_old(&self, other: &Self) -> Self;
15+
}
16+
17+
macro_rules! impl_gcd_old_for_isize {
18+
($T:ty) => {
19+
impl GcdOld for $T {
20+
/// Calculates the Greatest Common Divisor (GCD) of the number and
21+
/// `other`. The result is always positive.
22+
#[inline]
23+
fn gcd_old(&self, other: &Self) -> Self {
24+
// Use Stein's algorithm
25+
let mut m = *self;
26+
let mut n = *other;
27+
if m == 0 || n == 0 {
28+
return (m | n).abs();
29+
}
30+
31+
// find common factors of 2
32+
let shift = (m | n).trailing_zeros();
33+
34+
// The algorithm needs positive numbers, but the minimum value
35+
// can't be represented as a positive one.
36+
// It's also a power of two, so the gcd can be
37+
// calculated by bitshifting in that case
38+
39+
// Assuming two's complement, the number created by the shift
40+
// is positive for all numbers except gcd = abs(min value)
41+
// The call to .abs() causes a panic in debug mode
42+
if m == Self::min_value() || n == Self::min_value() {
43+
return (1 << shift).abs();
44+
}
45+
46+
// guaranteed to be positive now, rest like unsigned algorithm
47+
m = m.abs();
48+
n = n.abs();
49+
50+
// divide n and m by 2 until odd
51+
// m inside loop
52+
n >>= n.trailing_zeros();
53+
54+
while m != 0 {
55+
m >>= m.trailing_zeros();
56+
if n > m {
57+
std::mem::swap(&mut n, &mut m)
58+
}
59+
m -= n;
60+
}
61+
62+
n << shift
63+
}
64+
}
65+
};
66+
}
67+
68+
impl_gcd_old_for_isize!(i8);
69+
impl_gcd_old_for_isize!(i16);
70+
impl_gcd_old_for_isize!(i32);
71+
impl_gcd_old_for_isize!(i64);
72+
impl_gcd_old_for_isize!(isize);
73+
impl_gcd_old_for_isize!(i128);
74+
75+
macro_rules! impl_gcd_old_for_usize {
76+
($T:ty) => {
77+
impl GcdOld for $T {
78+
/// Calculates the Greatest Common Divisor (GCD) of the number and
79+
/// `other`. The result is always positive.
80+
#[inline]
81+
fn gcd_old(&self, other: &Self) -> Self {
82+
// Use Stein's algorithm
83+
let mut m = *self;
84+
let mut n = *other;
85+
if m == 0 || n == 0 {
86+
return m | n;
87+
}
88+
89+
// find common factors of 2
90+
let shift = (m | n).trailing_zeros();
91+
92+
// divide n and m by 2 until odd
93+
// m inside loop
94+
n >>= n.trailing_zeros();
95+
96+
while m != 0 {
97+
m >>= m.trailing_zeros();
98+
if n > m {
99+
std::mem::swap(&mut n, &mut m)
100+
}
101+
m -= n;
102+
}
103+
104+
n << shift
105+
}
106+
}
107+
};
108+
}
109+
110+
impl_gcd_old_for_usize!(u8);
111+
impl_gcd_old_for_usize!(u16);
112+
impl_gcd_old_for_usize!(u32);
113+
impl_gcd_old_for_usize!(u64);
114+
impl_gcd_old_for_usize!(usize);
115+
impl_gcd_old_for_usize!(u128);
116+
117+
/// Return an iterator that yields all Fibonacci numbers fitting into a u128.
118+
fn fibonacci() -> impl Iterator<Item=u128> {
119+
(0..185).scan((0, 1), |&mut (ref mut a, ref mut b), _| {
120+
let tmp = *a;
121+
*a = *b;
122+
*b += tmp;
123+
Some(*b)
124+
})
125+
}
126+
127+
fn run_bench<T: Integer + Bounded + Copy + 'static>(b: &mut Bencher, gcd: fn(&T, &T) -> T)
128+
where
129+
T: AsPrimitive<u128>,
130+
u128: AsPrimitive<T>,
131+
{
132+
let max_value: u128 = T::max_value().as_();
133+
let pairs: Vec<(T, T)> = fibonacci()
134+
.collect::<Vec<_>>()
135+
.windows(2)
136+
.filter(|&pair| pair[0] <= max_value && pair[1] <= max_value)
137+
.map(|pair| (pair[0].as_(), pair[1].as_()))
138+
.collect();
139+
b.iter(|| {
140+
for &(ref m, ref n) in &pairs {
141+
black_box(gcd(m, n));
142+
}
143+
});
144+
}
145+
146+
macro_rules! bench_gcd {
147+
($T:ident) => {
148+
mod $T {
149+
use crate::{run_bench, GcdOld};
150+
use num_integer::Integer;
151+
use test::Bencher;
152+
153+
#[bench]
154+
fn bench_gcd(b: &mut Bencher) {
155+
run_bench(b, $T::gcd);
156+
}
157+
158+
#[bench]
159+
fn bench_gcd_old(b: &mut Bencher) {
160+
run_bench(b, $T::gcd_old);
161+
}
162+
}
163+
};
164+
}
165+
166+
bench_gcd!(u8);
167+
bench_gcd!(u16);
168+
bench_gcd!(u32);
169+
bench_gcd!(u64);
170+
bench_gcd!(u128);
171+
172+
bench_gcd!(i8);
173+
bench_gcd!(i16);
174+
bench_gcd!(i32);
175+
bench_gcd!(i64);
176+
bench_gcd!(i128);

src/lib.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ extern crate std;
2323
extern crate num_traits as traits;
2424

2525
use core::ops::Add;
26-
use core::mem;
2726

2827
use traits::{Num, Signed};
2928

@@ -275,16 +274,19 @@ macro_rules! impl_integer_for_isize {
275274
n = n.abs();
276275

277276
// divide n and m by 2 until odd
278-
// m inside loop
277+
m >>= m.trailing_zeros();
279278
n >>= n.trailing_zeros();
280279

281-
while m != 0 {
282-
m >>= m.trailing_zeros();
283-
if n > m { mem::swap(&mut n, &mut m) }
284-
m -= n;
280+
while m != n {
281+
if m > n {
282+
m -= n;
283+
m >>= m.trailing_zeros();
284+
} else {
285+
n -= m;
286+
n >>= n.trailing_zeros();
287+
}
285288
}
286-
287-
n << shift
289+
m << shift
288290
}
289291

290292
/// Calculates the Lowest Common Multiple (LCM) of the number and
@@ -537,16 +539,19 @@ macro_rules! impl_integer_for_usize {
537539
let shift = (m | n).trailing_zeros();
538540

539541
// divide n and m by 2 until odd
540-
// m inside loop
542+
m >>= m.trailing_zeros();
541543
n >>= n.trailing_zeros();
542544

543-
while m != 0 {
544-
m >>= m.trailing_zeros();
545-
if n > m { mem::swap(&mut n, &mut m) }
546-
m -= n;
545+
while m != n {
546+
if m > n {
547+
m -= n;
548+
m >>= m.trailing_zeros();
549+
} else {
550+
n -= m;
551+
n >>= n.trailing_zeros();
552+
}
547553
}
548-
549-
n << shift
554+
m << shift
550555
}
551556

552557
/// Calculates the Lowest Common Multiple (LCM) of the number and `other`.

0 commit comments

Comments
 (0)