Description
In the 2017-12-14 Nightly I've seen a significant (almost 20%) compilation-time increase when I use --emit=metadata
on my code. And the successive day (2017-12-15) the compilation got a bit slower still and the run-time of the binary also got about 8% slower. The successive days the situation kept going slightly worse. Extracting parts of my code to show compiler/code performance is not easy, so I've composed this benchmark function (that solves an Euler problem. If this code isn't enough I will write other similar benchmarks):
Source code
use std::mem::size_of;
macro_rules! isqrt { ($e:expr, $t:ident) => ((($e) as f64).sqrt() as $t) }
struct SieveBits { bits: Vec<usize> }
impl SieveBits {
const BPC: usize = size_of::<usize>() * 8;
fn is_bit_set(bits: &[usize], i: usize) -> bool {
debug_assert!(i < bits.len() * Self::BPC);
let offset = i / Self::BPC;
let mask = 1 << (i % Self::BPC);
unsafe { (*bits.get_unchecked(offset) & mask) != 0 }
}
fn reset_bit(bits: &mut [usize], i: usize) {
debug_assert!(i < bits.len() * Self::BPC);
let offset = i / Self::BPC;
let mask = 1 << (i % Self::BPC);
unsafe { *bits.get_unchecked_mut(offset) &= !mask; }
}
fn new(m: usize) -> Self {
if m < 2 {
return Self { bits: vec![] };
}
let mut primes = vec![std::usize::MAX; (m / 3) / Self::BPC + 1];
Self::reset_bit(&mut primes, 0);
let lim = isqrt!(m, usize) + 1;
let mut i = 5;
let mut step = 2;
while i < lim {
if Self::is_bit_set(&primes, i / 3) {
let mut j = i * i;
let mut step2 = step;
while j < m {
Self::reset_bit(&mut primes, j / 3);
j += step2 * i;
step2 = 6 - step2;
}
}
i += step;
step = 6 - step;
}
Self { bits: primes }
}
fn is_prime(&self, i: usize) -> bool {
if i == 2 || i == 3 { true }
else if i % 2 == 0 || i % 3 == 0 { false }
else { Self::is_bit_set(&self.bits, i / 3) }
}
}
fn eratosthenes_sieve_u32(limit: usize) -> Vec<u32> {
assert!(limit <= std::u32::MAX as usize);
#[inline(always)]
fn fill_u8(data: &mut [u8], value: u8) {
unsafe {
std::ptr::write_bytes(data.as_mut_ptr(), value, data.len());
}
}
const L1D_CACHE_SIZE:u32 = 32_768;
let mut result = vec![];
if limit < 2 {
return result;
} else {
result.push(2);
}
let lsqrt = isqrt!(limit, u32);
let mut is_prime = vec![1u8; lsqrt as usize + 1];
let mut i = 2;
while i * i <= lsqrt {
unsafe {
if *is_prime.get_unchecked(i as usize) != 0 {
let mut j = i * i;
while j <= lsqrt {
*is_prime.get_unchecked_mut(j as usize) = 0;
j += i;
}
}
}
i += 1;
}
let segment_size = lsqrt.max(L1D_CACHE_SIZE);
let mut s: usize = 3;
let mut n: usize = 3;
let mut sieve = vec![1u8; segment_size as usize]; // Vector used for sieving.
let mut primes: Vec<u32> = vec![];
let mut next: Vec<u32> = vec![];
let mut low: usize = 0;
while low <= limit {
fill_u8(&mut sieve, 1);
let high = (low + segment_size as usize - 1).min(limit);
unsafe {
while s * s <= high {
if *is_prime.get_unchecked(s) != 0 {
primes.push(s as u32);
next.push((s * s - low) as u32);
}
s += 2;
}
for (i, &p) in primes.iter().enumerate() {
let k = p * 2;
let mut j = *next.get_unchecked(i);
while j < segment_size {
*sieve.get_unchecked_mut(j as usize) = 0;
j += k;
}
*next.get_unchecked_mut(i) = j - segment_size;
}
while n <= high {
if *sieve.get_unchecked(n - low) != 0 { // n is a prime.
result.push(n as u32);
}
n += 2;
}
}
low += segment_size as usize;
}
result
}
fn main() {
fn e249() -> u64 {
const N: usize = 5_000;
const M: u64 = 10_000_000_000_000_000_u64;
let check_every = (((1u64 << 63) / M) as f64).log2().ceil() as usize;
let primes = eratosthenes_sieve_u32(N);
let mut ways = vec![0u64; primes.iter().sum::<u32>() as usize + 1];
ways[0] = 1;
let mut max: usize = 0;
for (i, &p) in primes.iter().enumerate() {
for j in (0 .. max + 1).rev() {
unsafe {
*ways.get_unchecked_mut(j + p as usize) =
*ways.get_unchecked(j + p as usize) +
*ways.get_unchecked(j);
}
}
if (i + 1) % check_every == 0 {
for w in ways[.. max + 1].iter_mut() {
*w = *w % M;
}
}
max += p as usize;
}
let sieve = SieveBits::new(max);
let mut count = ways[2];
let mut i = 3;
while i <= max {
if sieve.is_prime(i) {
count = (count + ways[i]) % M;
}
i += 2;
}
count
}
assert_eq!(e249(), 9_275_262_564_250_418);
}
Compiled with:
rustc -C opt-level=3 -C target-cpu=native -C panic=abort test.rs
host: x86_64-pc-windows-gnu
Runtime:
f8af59d 2017-12-13: 0.30 seconds
edbd7d2 2017-12-20: 0.53 seconds
Asm from the the 2017-12-13 compiler:
https://gist.github.com/anonymous/5c475819a2b7d0e01c036582c76cbd19
Asm from the the 2017-12-20 compiler:
https://gist.github.com/anonymous/bb984787c802da22b76c182d0f260872