Skip to content

Simplify const memchr. #101784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 17, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions library/core/src/slice/memchr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch

use crate::cmp;
use crate::intrinsics;
use crate::mem;

const LO_USIZE: usize = usize::repeat_u8(0x01);
Expand All @@ -17,53 +16,51 @@ const USIZE_BYTES: usize = mem::size_of::<usize>();
/// bytes where the borrow propagated all the way to the most significant
/// bit."
#[inline]
fn contains_zero_byte(x: usize) -> bool {
const fn contains_zero_byte(x: usize) -> bool {
x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
}

#[cfg(target_pointer_width = "16")]
#[inline]
fn repeat_byte(b: u8) -> usize {
const fn repeat_byte(b: u8) -> usize {
(b as usize) << 8 | b as usize
}

#[cfg(not(target_pointer_width = "16"))]
#[inline]
fn repeat_byte(b: u8) -> usize {
const fn repeat_byte(b: u8) -> usize {
(b as usize) * (usize::MAX / 255)
}

/// Returns the first index matching the byte `x` in `text`.
#[must_use]
#[inline]
pub const fn memchr(x: u8, text: &[u8]) -> Option<usize> {
#[inline]
fn rt_impl(x: u8, text: &[u8]) -> Option<usize> {
// Fast path for small slices
if text.len() < 2 * USIZE_BYTES {
return text.iter().position(|elt| *elt == x);
}

memchr_general_case(x, text)
// Fast path for small slices.
if text.len() < 2 * USIZE_BYTES {
return memchr_naive(x, text);
}

const fn const_impl(x: u8, bytes: &[u8]) -> Option<usize> {
let mut i = 0;
while i < bytes.len() {
if bytes[i] == x {
return Some(i);
}
i += 1;
memchr_aligned(x, text)
}

#[inline]
const fn memchr_naive(x: u8, text: &[u8]) -> Option<usize> {
let mut i = 0;

// FIXME(const-hack): Replace with `text.iter().pos(|c| *c == x)`.
while i < text.len() {
if text[i] == x {
return Some(i);
}

None
i += 1;
}

// SAFETY: The const and runtime versions have identical behavior
unsafe { intrinsics::const_eval_select((x, text), const_impl, rt_impl) }
None
}

fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
const fn memchr_aligned(x: u8, text: &[u8]) -> Option<usize> {
// Scan for a single byte value by reading two `usize` words at a time.
//
// Split `text` in three parts
Expand All @@ -78,7 +75,7 @@ fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {

if offset > 0 {
offset = cmp::min(offset, len);
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
if let Some(index) = memchr_naive(x, &text[..offset]) {
return Some(index);
}
}
Expand All @@ -103,7 +100,8 @@ fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
}

// Find the byte after the point the body loop stopped.
text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
// FIXME(const-hack): Use `?` instead.
if let Some(i) = memchr_naive(x, &text[offset..]) { Some(offset + i) } else { None }
}

/// Returns the last index matching the byte `x` in `text`.
Expand Down