Skip to content

Remove SSE2 path from rustc_span analyze_source_file #137110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 7 additions & 255 deletions compiler/rustc_span/src/analyze_source_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ mod tests;

/// Finds all newlines, multi-byte characters, and non-narrow characters in a
/// SourceFile.
///
/// This function will use an SSE2 enhanced implementation if hardware support
/// is detected at runtime.
pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
let mut lines = vec![RelativeBytePos::from_u32(0)];
let mut multi_byte_chars = vec![];

// Calls the right implementation, depending on hardware support available.
analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);
analyze_source_file_generic(
src,
src.len(),
RelativeBytePos(0),
&mut lines,
&mut multi_byte_chars,
);

// The code above optimistically registers a new line *after* each \n
// it encounters. If that point is already outside the source_file, remove
Expand All @@ -29,256 +31,6 @@ pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<Multi
(lines, multi_byte_chars)
}

#[cfg(bootstrap)]
cfg_match! {
cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
fn analyze_source_file_dispatch(
src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
) {
if is_x86_feature_detected!("sse2") {
unsafe {
analyze_source_file_sse2(src, lines, multi_byte_chars);
}
} else {
analyze_source_file_generic(
src,
src.len(),
RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
);
}
}

/// Checks 16 byte chunks of text at a time. If the chunk contains
/// something other than printable ASCII characters and newlines, the
/// function falls back to the generic implementation. Otherwise it uses
/// SSE2 intrinsics to quickly find all newlines.
#[target_feature(enable = "sse2")]
unsafe fn analyze_source_file_sse2(
src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
) {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

const CHUNK_SIZE: usize = 16;

let src_bytes = src.as_bytes();

let chunk_count = src.len() / CHUNK_SIZE;

// This variable keeps track of where we should start decoding a
// chunk. If a multi-byte character spans across chunk boundaries,
// we need to skip that part in the next chunk because we already
// handled it.
let mut intra_chunk_offset = 0;

for chunk_index in 0..chunk_count {
let ptr = src_bytes.as_ptr() as *const __m128i;
// We don't know if the pointer is aligned to 16 bytes, so we
// use `loadu`, which supports unaligned loading.
let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };

// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
// Create a bit mask from the comparison results.
let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };

// If the bit mask is all zero, we only have ASCII chars here:
if multibyte_mask == 0 {
assert!(intra_chunk_offset == 0);

// Check for newlines in the chunk
let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };

let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);

while newlines_mask != 0 {
let index = newlines_mask.trailing_zeros();

lines.push(RelativeBytePos(index) + output_offset);

// Clear the bit, so we can find the next one.
newlines_mask &= newlines_mask - 1;
}
} else {
// The slow path.
// There are multibyte chars in here, fallback to generic decoding.
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
intra_chunk_offset = analyze_source_file_generic(
&src[scan_start..],
CHUNK_SIZE - intra_chunk_offset,
RelativeBytePos::from_usize(scan_start),
lines,
multi_byte_chars,
);
}
}

// There might still be a tail left to analyze
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
if tail_start < src.len() {
analyze_source_file_generic(
&src[tail_start..],
src.len() - tail_start,
RelativeBytePos::from_usize(tail_start),
lines,
multi_byte_chars,
);
}
}
}
_ => {
// The target (or compiler version) does not support SSE2 ...
fn analyze_source_file_dispatch(
src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
) {
analyze_source_file_generic(
src,
src.len(),
RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
);
}
}
}

#[cfg(not(bootstrap))]
cfg_match! {
any(target_arch = "x86", target_arch = "x86_64") => {
fn analyze_source_file_dispatch(
src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
) {
if is_x86_feature_detected!("sse2") {
unsafe {
analyze_source_file_sse2(src, lines, multi_byte_chars);
}
} else {
analyze_source_file_generic(
src,
src.len(),
RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
);
}
}

/// Checks 16 byte chunks of text at a time. If the chunk contains
/// something other than printable ASCII characters and newlines, the
/// function falls back to the generic implementation. Otherwise it uses
/// SSE2 intrinsics to quickly find all newlines.
#[target_feature(enable = "sse2")]
unsafe fn analyze_source_file_sse2(
src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
) {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

const CHUNK_SIZE: usize = 16;

let src_bytes = src.as_bytes();

let chunk_count = src.len() / CHUNK_SIZE;

// This variable keeps track of where we should start decoding a
// chunk. If a multi-byte character spans across chunk boundaries,
// we need to skip that part in the next chunk because we already
// handled it.
let mut intra_chunk_offset = 0;

for chunk_index in 0..chunk_count {
let ptr = src_bytes.as_ptr() as *const __m128i;
// We don't know if the pointer is aligned to 16 bytes, so we
// use `loadu`, which supports unaligned loading.
let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };

// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
// Create a bit mask from the comparison results.
let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };

// If the bit mask is all zero, we only have ASCII chars here:
if multibyte_mask == 0 {
assert!(intra_chunk_offset == 0);

// Check for newlines in the chunk
let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };

let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);

while newlines_mask != 0 {
let index = newlines_mask.trailing_zeros();

lines.push(RelativeBytePos(index) + output_offset);

// Clear the bit, so we can find the next one.
newlines_mask &= newlines_mask - 1;
}
} else {
// The slow path.
// There are multibyte chars in here, fallback to generic decoding.
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
intra_chunk_offset = analyze_source_file_generic(
&src[scan_start..],
CHUNK_SIZE - intra_chunk_offset,
RelativeBytePos::from_usize(scan_start),
lines,
multi_byte_chars,
);
}
}

// There might still be a tail left to analyze
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
if tail_start < src.len() {
analyze_source_file_generic(
&src[tail_start..],
src.len() - tail_start,
RelativeBytePos::from_usize(tail_start),
lines,
multi_byte_chars,
);
}
}
}
_ => {
// The target (or compiler version) does not support SSE2 ...
fn analyze_source_file_dispatch(
src: &str,
lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
) {
analyze_source_file_generic(
src,
src.len(),
RelativeBytePos::from_u32(0),
lines,
multi_byte_chars,
);
}
}
}

// `scan_len` determines the number of bytes in `src` to scan. Note that the
// function can read past `scan_len` if a multi-byte character start within the
// range but extends past it. The overflow is returned by the function.
Expand Down
Loading