Skip to content

Slower binary, slower compiler, bigger binary since some days #46897

Closed
@leonardo-m

Description

@leonardo-m

In the 2017-12-14 Nightly I've seen a significant (almost 20%) compilation-time increase when I use --emit=metadata on my code. And the successive day (2017-12-15) the compilation got a bit slower still and the run-time of the binary also got about 8% slower. The successive days the situation kept going slightly worse. Extracting parts of my code to show compiler/code performance is not easy, so I've composed this benchmark function (that solves an Euler problem. If this code isn't enough I will write other similar benchmarks):

Source code
use std::mem::size_of;

macro_rules! isqrt { ($e:expr, $t:ident) => ((($e) as f64).sqrt() as $t)  }

struct SieveBits { bits: Vec<usize> }

impl SieveBits {
    const BPC: usize = size_of::<usize>() * 8;

    fn is_bit_set(bits: &[usize], i: usize) -> bool {
        debug_assert!(i < bits.len() * Self::BPC);
        let offset = i / Self::BPC;
        let mask = 1 << (i % Self::BPC);
        unsafe { (*bits.get_unchecked(offset) & mask) != 0 }
    }

    fn reset_bit(bits: &mut [usize], i: usize) {
        debug_assert!(i < bits.len() * Self::BPC);
        let offset = i / Self::BPC;
        let mask = 1 << (i % Self::BPC);
        unsafe { *bits.get_unchecked_mut(offset) &= !mask; }
    }

    fn new(m: usize) -> Self {
        if m < 2 {
            return Self { bits: vec![] };
        }
        let mut primes = vec![std::usize::MAX; (m / 3) / Self::BPC + 1];
        Self::reset_bit(&mut primes, 0);

        let lim = isqrt!(m, usize) + 1;
        let mut i = 5;
        let mut step = 2;
        while i < lim {
            if Self::is_bit_set(&primes, i / 3) {
                let mut j = i * i;
                let mut step2 = step;
                while j < m {
                    Self::reset_bit(&mut primes, j / 3);
                    j += step2 * i;
                    step2 = 6 - step2;
                }
            }
            i += step;
            step = 6 - step;
        }

        Self { bits: primes }
    }

    fn is_prime(&self, i: usize) -> bool {
        if i == 2 || i == 3 { true }
        else if i % 2 == 0 || i % 3 == 0 { false }
        else { Self::is_bit_set(&self.bits, i / 3) }
    }
}


fn eratosthenes_sieve_u32(limit: usize) -> Vec<u32> {
    assert!(limit <= std::u32::MAX as usize);

    #[inline(always)]
    fn fill_u8(data: &mut [u8], value: u8) {
        unsafe {
            std::ptr::write_bytes(data.as_mut_ptr(), value, data.len());
        }
    }

    const L1D_CACHE_SIZE:u32 = 32_768;

    let mut result = vec![];
    if limit < 2 {
        return result;
    } else {
        result.push(2);
    }

    let lsqrt = isqrt!(limit, u32);

    let mut is_prime = vec![1u8; lsqrt as usize + 1];
    let mut i = 2;
    while i * i <= lsqrt {
        unsafe {
            if *is_prime.get_unchecked(i as usize) != 0 {
                let mut j = i * i;
                while j <= lsqrt {
                    *is_prime.get_unchecked_mut(j as usize) = 0;
                    j += i;
                }
            }
        }
        i += 1;
    }

    let segment_size = lsqrt.max(L1D_CACHE_SIZE);
    let mut s: usize = 3;
    let mut n: usize = 3;

    let mut sieve = vec![1u8; segment_size as usize]; // Vector used for sieving.
    let mut primes: Vec<u32> = vec![];
    let mut next: Vec<u32> = vec![];
    let mut low: usize = 0;

    while low <= limit {
        fill_u8(&mut sieve, 1);

        let high = (low + segment_size as usize - 1).min(limit);

        unsafe {
            while s * s <= high {
                if *is_prime.get_unchecked(s) != 0 {
                    primes.push(s as u32);
                    next.push((s * s - low) as u32);
                }
                s += 2;
            }

            for (i, &p) in primes.iter().enumerate() {
                let k = p * 2;
                let mut j = *next.get_unchecked(i);
                while j < segment_size {
                    *sieve.get_unchecked_mut(j as usize) = 0;
                    j += k;
                }
                *next.get_unchecked_mut(i) = j - segment_size;
            }

            while n <= high {
                if *sieve.get_unchecked(n - low) != 0 { // n is a prime.
                    result.push(n as u32);
                }
                n += 2;
            }
        }

        low += segment_size as usize;
    }

    result
}


fn main() {
    fn e249() -> u64 {
        const N: usize = 5_000;
        const M: u64 = 10_000_000_000_000_000_u64;

        let check_every = (((1u64 << 63) / M) as f64).log2().ceil() as usize;

        let primes = eratosthenes_sieve_u32(N);
        let mut ways = vec![0u64; primes.iter().sum::<u32>() as usize + 1];
        ways[0] = 1;
        let mut max: usize = 0;

        for (i, &p) in primes.iter().enumerate() {
            for j in (0 .. max + 1).rev() {
                unsafe {
                    *ways.get_unchecked_mut(j + p as usize) =
                        *ways.get_unchecked(j + p as usize) +
                        *ways.get_unchecked(j);
                }
            }

            if (i + 1) % check_every == 0 {
                for w in ways[.. max + 1].iter_mut() {
                    *w = *w % M;
                }
            }
            max += p as usize;
        }

        let sieve = SieveBits::new(max);
        let mut count = ways[2];
        let mut i = 3;
        while i <= max {
            if sieve.is_prime(i) {
                count = (count + ways[i]) % M;
            }
            i += 2;
        }
        count
    }
    assert_eq!(e249(), 9_275_262_564_250_418);
}

Compiled with:
rustc -C opt-level=3 -C target-cpu=native -C panic=abort test.rs

host: x86_64-pc-windows-gnu

Runtime:
f8af59d 2017-12-13: 0.30 seconds
edbd7d2 2017-12-20: 0.53 seconds

Asm from the the 2017-12-13 compiler:
https://gist.github.com/anonymous/5c475819a2b7d0e01c036582c76cbd19

Asm from the the 2017-12-20 compiler:
https://gist.github.com/anonymous/bb984787c802da22b76c182d0f260872

Metadata

Metadata

Assignees

No one assigned

    Labels

    I-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.regression-from-stable-to-betaPerformance or correctness regression from stable to beta.regression-from-stable-to-nightlyPerformance or correctness regression from stable to nightly.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions