@@ -1158,24 +1158,27 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1158
1158
offset += 1 ;
1159
1159
} else {
1160
1160
// Ascii case, try to skip forward quickly.
1161
+ // When the pointer is aligned, read 2 words of data per iteration
1162
+ // until we find a word containing a non-ascii byte.
1163
+ const BYTES_PER_ITERATION : usize = 2 * usize:: BYTES ;
1161
1164
let ptr = v. as_ptr ( ) ;
1162
1165
let align = ( ptr as usize + offset) & ( usize:: BYTES - 1 ) ;
1163
1166
if align == 0 {
1164
- // When the pointer is aligned, read 2 words of data per iteration
1165
- // until we find a word containing a non-ascii byte.
1166
- while offset <= len - 2 * usize :: BYTES {
1167
- unsafe {
1168
- let u = * ( ptr. offset ( offset as isize ) as * const usize ) ;
1169
- let v = * ( ptr . offset ( ( offset + usize :: BYTES ) as isize ) as * const usize ) ;
1170
-
1171
- // break if there is a nonascii byte
1172
- let zu = contains_nonascii ( u ) ;
1173
- let zv = contains_nonascii ( v ) ;
1174
- if zu || zv {
1175
- break ;
1167
+ if len >= BYTES_PER_ITERATION {
1168
+ while offset <= len - BYTES_PER_ITERATION {
1169
+ unsafe {
1170
+ let u = * ( ptr . offset ( offset as isize ) as * const usize ) ;
1171
+ let v = * ( ptr. offset ( ( offset + usize :: BYTES ) as isize ) as * const usize ) ;
1172
+
1173
+ // break if there is a nonascii byte
1174
+ let zu = contains_nonascii ( u ) ;
1175
+ let zv = contains_nonascii ( v ) ;
1176
+ if zu || zv {
1177
+ break ;
1178
+ }
1176
1179
}
1180
+ offset += BYTES_PER_ITERATION ;
1177
1181
}
1178
- offset += usize:: BYTES * 2 ;
1179
1182
}
1180
1183
// step from the point where the wordwise loop stopped
1181
1184
while offset < len && v[ offset] < 128 {
0 commit comments