Skip to content

Commit cadcd70

Browse files
committed
UTF-8 validation: Add missing if conditional for short input
We need to guard that `len` is large enough for the fast skip loop.
1 parent 11e3de3 commit cadcd70

File tree

1 file changed

+16
-13
lines changed

1 file changed

+16
-13
lines changed

src/libcore/str/mod.rs

+16-13
Original file line numberDiff line numberDiff line change
@@ -1158,24 +1158,27 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
11581158
offset += 1;
11591159
} else {
11601160
// Ascii case, try to skip forward quickly.
1161+
// When the pointer is aligned, read 2 words of data per iteration
1162+
// until we find a word containing a non-ascii byte.
1163+
const BYTES_PER_ITERATION: usize = 2 * usize::BYTES;
11611164
let ptr = v.as_ptr();
11621165
let align = (ptr as usize + offset) & (usize::BYTES - 1);
11631166
if align == 0 {
1164-
// When the pointer is aligned, read 2 words of data per iteration
1165-
// until we find a word containing a non-ascii byte.
1166-
while offset <= len - 2 * usize::BYTES {
1167-
unsafe {
1168-
let u = *(ptr.offset(offset as isize) as *const usize);
1169-
let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
1170-
1171-
// break if there is a nonascii byte
1172-
let zu = contains_nonascii(u);
1173-
let zv = contains_nonascii(v);
1174-
if zu || zv {
1175-
break;
1167+
if len >= BYTES_PER_ITERATION {
1168+
while offset <= len - BYTES_PER_ITERATION {
1169+
unsafe {
1170+
let u = *(ptr.offset(offset as isize) as *const usize);
1171+
let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
1172+
1173+
// break if there is a nonascii byte
1174+
let zu = contains_nonascii(u);
1175+
let zv = contains_nonascii(v);
1176+
if zu || zv {
1177+
break;
1178+
}
11761179
}
1180+
offset += BYTES_PER_ITERATION;
11771181
}
1178-
offset += usize::BYTES * 2;
11791182
}
11801183
// step from the point where the wordwise loop stopped
11811184
while offset < len && v[offset] < 128 {

0 commit comments

Comments
 (0)