@@ -76,15 +76,14 @@ use iter::MatchIndicesInternal;
76
76
use iter:: SplitInternal ;
77
77
use iter:: { MatchesInternal , SplitNInternal } ;
78
78
79
- use validations:: truncate_to_char_boundary;
80
-
81
79
#[ inline( never) ]
82
80
#[ cold]
83
81
#[ track_caller]
84
82
fn slice_error_fail ( s : & str , begin : usize , end : usize ) -> ! {
85
83
const MAX_DISPLAY_LENGTH : usize = 256 ;
86
- let ( truncated, s_trunc) = truncate_to_char_boundary ( s, MAX_DISPLAY_LENGTH ) ;
87
- let ellipsis = if truncated { "[...]" } else { "" } ;
84
+ let trunc_len = s. floor_char_boundary ( MAX_DISPLAY_LENGTH ) ;
85
+ let s_trunc = & s[ ..trunc_len] ;
86
+ let ellipsis = if trunc_len < s. len ( ) { "[...]" } else { "" } ;
88
87
89
88
// 1. out of bounds
90
89
if begin > s. len ( ) || end > s. len ( ) {
@@ -105,10 +104,7 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
105
104
// 3. character boundary
106
105
let index = if !s. is_char_boundary ( begin) { begin } else { end } ;
107
106
// find the character
108
- let mut char_start = index;
109
- while !s. is_char_boundary ( char_start) {
110
- char_start -= 1 ;
111
- }
107
+ let char_start = s. floor_char_boundary ( index) ;
112
108
// `char_start` must be less than len and a char boundary
113
109
let ch = s[ char_start..] . chars ( ) . next ( ) . unwrap ( ) ;
114
110
let char_range = char_start..char_start + ch. len_utf8 ( ) ;
@@ -215,8 +211,80 @@ impl str {
215
211
// code on higher opt-levels. See PR #84751 for more details.
216
212
None => index == self . len ( ) ,
217
213
218
- // This is bit magic equivalent to: b < 128 || b >= 192
219
- Some ( & b) => ( b as i8 ) >= -0x40 ,
214
+ Some ( & b) => b. is_utf8_char_boundary ( ) ,
215
+ }
216
+ }
217
+
218
+ /// Finds the closest `x` not exceeding `index` where `is_char_boundary(x)` is `true`.
219
+ ///
220
+ /// This method can help you truncate a string so that it's still valid UTF-8, but doesn't
221
+ /// exceed a given number of bytes. Note that this is done purely at the character level
222
+ /// and can still visually split graphemes, even though the underlying characters aren't
223
+ /// split. For example, the emoji 🧑🔬 (scientist) could be split so that the string only
224
+ /// includes 🧑 (person) instead.
225
+ ///
226
+ /// # Examples
227
+ ///
228
+ /// ```
229
+ /// #![feature(round_char_boundary)]
230
+ /// let s = "❤️🧡💛💚💙💜";
231
+ /// assert_eq!(s.len(), 26);
232
+ /// assert!(!s.is_char_boundary(13));
233
+ ///
234
+ /// let closest = s.floor_char_boundary(13);
235
+ /// assert_eq!(closest, 10);
236
+ /// assert_eq!(&s[..closest], "❤️🧡");
237
+ /// ```
238
+ #[ unstable( feature = "round_char_boundary" , issue = "93743" ) ]
239
+ #[ inline]
240
+ pub fn floor_char_boundary ( & self , index : usize ) -> usize {
241
+ if index >= self . len ( ) {
242
+ self . len ( )
243
+ } else {
244
+ let lower_bound = index. saturating_sub ( 3 ) ;
245
+ let new_index = self . as_bytes ( ) [ lower_bound..=index]
246
+ . iter ( )
247
+ . rposition ( |b| b. is_utf8_char_boundary ( ) ) ;
248
+
249
+ // SAFETY: we know that the character boundary will be within four bytes
250
+ unsafe { lower_bound + new_index. unwrap_unchecked ( ) }
251
+ }
252
+ }
253
+
254
+ /// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`.
255
+ ///
256
+ /// This method is the natural complement to [`floor_char_boundary`]. See that method
257
+ /// for more details.
258
+ ///
259
+ /// [`floor_char_boundary`]: str::floor_char_boundary
260
+ ///
261
+ /// # Panics
262
+ ///
263
+ /// Panics if `index > self.len()`.
264
+ ///
265
+ /// # Examples
266
+ ///
267
+ /// ```
268
+ /// #![feature(round_char_boundary)]
269
+ /// let s = "❤️🧡💛💚💙💜";
270
+ /// assert_eq!(s.len(), 26);
271
+ /// assert!(!s.is_char_boundary(13));
272
+ ///
273
+ /// let closest = s.ceil_char_boundary(13);
274
+ /// assert_eq!(closest, 14);
275
+ /// assert_eq!(&s[..closest], "❤️🧡💛");
276
+ /// ```
277
+ #[ unstable( feature = "round_char_boundary" , issue = "93743" ) ]
278
+ #[ inline]
279
+ pub fn ceil_char_boundary ( & self , index : usize ) -> usize {
280
+ if index > self . len ( ) {
281
+ slice_error_fail ( self , index, index)
282
+ } else {
283
+ let upper_bound = Ord :: min ( index + 4 , self . len ( ) ) ;
284
+ self . as_bytes ( ) [ index..upper_bound]
285
+ . iter ( )
286
+ . position ( |b| b. is_utf8_char_boundary ( ) )
287
+ . map_or ( upper_bound, |pos| pos + index)
220
288
}
221
289
}
222
290
0 commit comments