1
1
//! Operations related to UTF-8 validation.
2
2
3
3
use super :: Utf8Error ;
4
+ use super :: error:: Utf8ErrorLen ;
4
5
use crate :: intrinsics:: const_eval_select;
5
6
6
7
/// Returns the initial codepoint accumulator for the first byte.
@@ -210,25 +211,26 @@ const fn is_utf8_first_byte(byte: u8) -> bool {
210
211
/// The caller must ensure `bytes[..i]` is a valid UTF-8 prefix and `st` is the DFA state after
211
212
/// executing on `bytes[..i]`.
212
213
#[ inline]
213
- const unsafe fn resolve_error_location ( st : u32 , bytes : & [ u8 ] , i : usize ) -> ( usize , u8 ) {
214
+ const unsafe fn resolve_error_location ( st : u32 , bytes : & [ u8 ] , i : usize ) -> Utf8Error {
214
215
// There are two cases:
215
216
// 1. [valid UTF-8..] | *here
216
217
// The previous state must be ACCEPT for the case 1, and `valid_up_to = i`.
217
218
// 2. [valid UTF-8..] | valid first byte, [valid continuation byte...], *here
218
219
// `valid_up_to` is at the latest non-continuation byte, which must exist and
219
220
// be in range `(i-3)..i`.
220
- if st & STATE_MASK == ST_ACCEPT {
221
- ( i, 1 )
221
+ let ( valid_up_to , error_len ) = if st & STATE_MASK == ST_ACCEPT {
222
+ ( i, Utf8ErrorLen :: One )
222
223
// SAFETY: UTF-8 first byte must exist if we are in an intermediate state.
223
224
// We use pointer here because `get_unchecked` is not const fn.
224
225
} else if is_utf8_first_byte ( unsafe { bytes. as_ptr ( ) . add ( i - 1 ) . read ( ) } ) {
225
- ( i - 1 , 1 )
226
+ ( i - 1 , Utf8ErrorLen :: One )
226
227
// SAFETY: Same as above.
227
228
} else if is_utf8_first_byte ( unsafe { bytes. as_ptr ( ) . add ( i - 2 ) . read ( ) } ) {
228
- ( i - 2 , 2 )
229
+ ( i - 2 , Utf8ErrorLen :: Two )
229
230
} else {
230
- ( i - 3 , 3 )
231
- }
231
+ ( i - 3 , Utf8ErrorLen :: Three )
232
+ } ;
233
+ Utf8Error { valid_up_to, error_len }
232
234
}
233
235
234
236
// The simpler but slower algorithm to run DFA with error handling.
@@ -245,8 +247,7 @@ const unsafe fn run_with_error_handling(
245
247
let new_st = next_state ( * st, bytes[ i] ) ;
246
248
if new_st & STATE_MASK == ST_ERROR {
247
249
// SAFETY: Guaranteed by the caller.
248
- let ( valid_up_to, error_len) = unsafe { resolve_error_location ( * st, bytes, i) } ;
249
- return Err ( Utf8Error { valid_up_to, error_len : Some ( error_len) } ) ;
250
+ return Err ( unsafe { resolve_error_location ( * st, bytes, i) } ) ;
250
251
}
251
252
* st = new_st;
252
253
i += 1 ;
@@ -256,7 +257,7 @@ const unsafe fn run_with_error_handling(
256
257
257
258
/// Walks through `v` checking that it's a valid UTF-8 sequence,
258
259
/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
259
- #[ inline( always ) ]
260
+ #[ inline]
260
261
#[ rustc_allow_const_fn_unstable( const_eval_select) ] // fallback impl has same behavior
261
262
pub ( super ) const fn run_utf8_validation ( bytes : & [ u8 ] ) -> Result < ( ) , Utf8Error > {
262
263
const_eval_select ( ( bytes, ) , run_utf8_validation_const, run_utf8_validation_rt)
@@ -273,8 +274,9 @@ const fn run_utf8_validation_const(bytes: &[u8]) -> Result<(), Utf8Error> {
273
274
Ok ( ( ) )
274
275
} else {
275
276
// SAFETY: `st` is the last state after execution without encountering any error.
276
- let ( valid_up_to, _) = unsafe { resolve_error_location ( st, bytes, bytes. len ( ) ) } ;
277
- Err ( Utf8Error { valid_up_to, error_len : None } )
277
+ let mut err = unsafe { resolve_error_location ( st, bytes, bytes. len ( ) ) } ;
278
+ err. error_len = Utf8ErrorLen :: Eof ;
279
+ Err ( err)
278
280
}
279
281
}
280
282
}
@@ -333,8 +335,9 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
333
335
334
336
if st & STATE_MASK != ST_ACCEPT {
335
337
// SAFETY: Same as above.
336
- let ( valid_up_to, _) = unsafe { resolve_error_location ( st, bytes, bytes. len ( ) ) } ;
337
- return Err ( Utf8Error { valid_up_to, error_len : None } ) ;
338
+ let mut err = unsafe { resolve_error_location ( st, bytes, bytes. len ( ) ) } ;
339
+ err. error_len = Utf8ErrorLen :: Eof ;
340
+ return Err ( err) ;
338
341
}
339
342
340
343
Ok ( ( ) )
0 commit comments