Skip to content

Commit 1efec71

Browse files
committed
Process partial chunks at beginning and remove unlikely hints
Hope to have a better latency on short strings and/or the immediate-fail path.
1 parent a0a5af9 commit 1efec71

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

library/core/src/str/validations.rs

+9-11
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Operations related to UTF-8 validation.
22
33
use super::Utf8Error;
4-
use crate::intrinsics::{const_eval_select, unlikely};
4+
use crate::intrinsics::const_eval_select;
55

66
/// Returns the initial codepoint accumulator for the first byte.
77
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
@@ -243,7 +243,7 @@ const unsafe fn run_with_error_handling(
243243
) -> Result<(), Utf8Error> {
244244
while i < bytes.len() {
245245
let new_st = next_state(*st, bytes[i]);
246-
if unlikely(new_st & STATE_MASK == ST_ERROR) {
246+
if new_st & STATE_MASK == ST_ERROR {
247247
// SAFETY: Guaranteed by the caller.
248248
let (valid_up_to, error_len) = unsafe { resolve_error_location(*st, bytes, i) };
249249
return Err(Utf8Error { valid_up_to, error_len: Some(error_len) });
@@ -287,7 +287,9 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
287287
const { assert!(ASCII_CHUNK_SIZE % MAIN_CHUNK_SIZE == 0) };
288288

289289
let mut st = ST_ACCEPT;
290-
let mut i = 0usize;
290+
let mut i = bytes.len() % MAIN_CHUNK_SIZE;
291+
// SAFETY: Start at initial state ACCEPT.
292+
unsafe { run_with_error_handling(&mut st, &bytes[..i], 0)? };
291293

292294
while i + MAIN_CHUNK_SIZE <= bytes.len() {
293295
// Fast path: if the current state is ACCEPT, we can skip to the next non-ASCII chunk.
@@ -320,20 +322,16 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
320322
for &b in chunk {
321323
new_st = next_state(new_st, b);
322324
}
323-
if unlikely(new_st & STATE_MASK == ST_ERROR) {
324-
// Discard the current chunk erronous result, and reuse the trailing chunk handling to
325-
// report the error location.
326-
break;
325+
if new_st & STATE_MASK == ST_ERROR {
326+
// SAFETY: `st` is the last state after executing `bytes[..i]` without encountering any error.
327+
return unsafe { run_with_error_handling(&mut st, bytes, i) };
327328
}
328329

329330
st = new_st;
330331
i += MAIN_CHUNK_SIZE;
331332
}
332333

333-
// SAFETY: `st` is the last state after executing `bytes[..i]` without encountering any error.
334-
unsafe { run_with_error_handling(&mut st, bytes, i)? };
335-
336-
if unlikely(st & STATE_MASK != ST_ACCEPT) {
334+
if st & STATE_MASK != ST_ACCEPT {
337335
// SAFETY: Same as above.
338336
let (valid_up_to, _) = unsafe { resolve_error_location(st, bytes, bytes.len()) };
339337
return Err(Utf8Error { valid_up_to, error_len: None });

0 commit comments

Comments
 (0)