@@ -38,6 +38,24 @@ impl InnerSpan {
38
38
}
39
39
}
40
40
41
+ /// The location and before/after width of a character whose width has changed from its source code
42
+ /// representation
43
+ #[ derive( Copy , Clone , PartialEq , Eq ) ]
44
+ pub struct InnerWidthMapping {
45
+ /// Index of the character in the source
46
+ pub position : usize ,
47
+ /// The inner width in characters
48
+ pub before : usize ,
49
+ /// The transformed width in characters
50
+ pub after : usize ,
51
+ }
52
+
53
+ impl InnerWidthMapping {
54
+ pub fn new ( position : usize , before : usize , after : usize ) -> InnerWidthMapping {
55
+ InnerWidthMapping { position, before, after }
56
+ }
57
+ }
58
+
41
59
/// The type of format string that we are parsing.
42
60
#[ derive( Copy , Clone , Debug , Eq , PartialEq ) ]
43
61
pub enum ParseMode {
@@ -200,8 +218,8 @@ pub struct Parser<'a> {
200
218
style : Option < usize > ,
201
219
/// Start and end byte offset of every successfully parsed argument
202
220
pub arg_places : Vec < InnerSpan > ,
203
- /// Characters that need to be shifted
204
- skips : Vec < usize > ,
221
+ /// Characters whose length has been changed from their in-code representation
222
+ width_map : Vec < InnerWidthMapping > ,
205
223
/// Span of the last opening brace seen, used for error reporting
206
224
last_opening_brace : Option < InnerSpan > ,
207
225
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
@@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> {
224
242
'{' => {
225
243
let curr_last_brace = self . last_opening_brace ;
226
244
let byte_pos = self . to_span_index ( pos) ;
227
- let lbrace_end = self . to_span_index ( pos + 1 ) ;
245
+ let lbrace_end = InnerOffset ( byte_pos . 0 + self . to_span_width ( pos ) ) ;
228
246
self . last_opening_brace = Some ( byte_pos. to ( lbrace_end) ) ;
229
247
self . cur . next ( ) ;
230
248
if self . consume ( '{' ) {
@@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> {
233
251
Some ( String ( self . string ( pos + 1 ) ) )
234
252
} else {
235
253
let arg = self . argument ( lbrace_end) ;
236
- if let Some ( rbrace_byte_idx) = self . must_consume ( '}' ) {
237
- let lbrace_inner_offset = self . to_span_index ( pos) ;
238
- let rbrace_inner_offset = self . to_span_index ( rbrace_byte_idx) ;
254
+ if let Some ( rbrace_pos) = self . must_consume ( '}' ) {
239
255
if self . is_literal {
256
+ let lbrace_byte_pos = self . to_span_index ( pos) ;
257
+ let rbrace_byte_pos = self . to_span_index ( rbrace_pos) ;
258
+
259
+ let width = self . to_span_width ( rbrace_pos) ;
260
+
240
261
self . arg_places . push (
241
- lbrace_inner_offset . to ( InnerOffset ( rbrace_inner_offset . 0 + 1 ) ) ,
262
+ lbrace_byte_pos . to ( InnerOffset ( rbrace_byte_pos . 0 + width ) ) ,
242
263
) ;
243
264
}
244
265
} else {
@@ -285,7 +306,7 @@ impl<'a> Parser<'a> {
285
306
append_newline : bool ,
286
307
mode : ParseMode ,
287
308
) -> Parser < ' a > {
288
- let ( skips , is_literal) = find_skips_from_snippet ( snippet, style) ;
309
+ let ( width_map , is_literal) = find_width_map_from_snippet ( snippet, style) ;
289
310
Parser {
290
311
mode,
291
312
input : s,
@@ -294,7 +315,7 @@ impl<'a> Parser<'a> {
294
315
curarg : 0 ,
295
316
style,
296
317
arg_places : vec ! [ ] ,
297
- skips ,
318
+ width_map ,
298
319
last_opening_brace : None ,
299
320
append_newline,
300
321
is_literal,
@@ -367,21 +388,34 @@ impl<'a> Parser<'a> {
367
388
None
368
389
}
369
390
391
+ fn remap_pos ( & self , mut pos : usize ) -> InnerOffset {
392
+ for width in & self . width_map {
393
+ if pos > width. position {
394
+ pos += width. before - width. after ;
395
+ } else if pos == width. position && width. after == 0 {
396
+ pos += width. before ;
397
+ } else {
398
+ break ;
399
+ }
400
+ }
401
+
402
+ InnerOffset ( pos)
403
+ }
404
+
370
405
fn to_span_index ( & self , pos : usize ) -> InnerOffset {
371
- let mut pos = pos;
372
406
// This handles the raw string case, the raw argument is the number of #
373
407
// in r###"..."### (we need to add one because of the `r`).
374
408
let raw = self . style . map_or ( 0 , |raw| raw + 1 ) ;
375
- for skip in & self . skips {
376
- if pos > * skip {
377
- pos += 1 ;
378
- } else if pos == * skip && raw == 0 {
379
- pos += 1 ;
380
- } else {
381
- break ;
382
- }
409
+ let pos = self . remap_pos ( pos) ;
410
+ InnerOffset ( raw + pos. 0 + 1 )
411
+ }
412
+
413
+ fn to_span_width ( & self , pos : usize ) -> usize {
414
+ let pos = self . remap_pos ( pos) ;
415
+ match self . width_map . iter ( ) . find ( |w| w. position == pos. 0 ) {
416
+ Some ( w) => w. before ,
417
+ None => 1 ,
383
418
}
384
- InnerOffset ( raw + pos + 1 )
385
419
}
386
420
387
421
fn span ( & self , start_pos : usize , end_pos : usize ) -> InnerSpan {
@@ -809,10 +843,10 @@ impl<'a> Parser<'a> {
809
843
/// Finds the indices of all characters that have been processed and differ between the actual
810
844
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
811
845
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
812
- fn find_skips_from_snippet (
846
+ fn find_width_map_from_snippet (
813
847
snippet : Option < string:: String > ,
814
848
str_style : Option < usize > ,
815
- ) -> ( Vec < usize > , bool ) {
849
+ ) -> ( Vec < InnerWidthMapping > , bool ) {
816
850
let snippet = match snippet {
817
851
Some ( ref s) if s. starts_with ( '"' ) || s. starts_with ( "r\" " ) || s. starts_with ( "r#" ) => s,
818
852
_ => return ( vec ! [ ] , false ) ,
@@ -825,43 +859,39 @@ fn find_skips_from_snippet(
825
859
let snippet = & snippet[ 1 ..snippet. len ( ) - 1 ] ;
826
860
827
861
let mut s = snippet. char_indices ( ) ;
828
- let mut skips = vec ! [ ] ;
862
+ let mut width_mappings = vec ! [ ] ;
829
863
while let Some ( ( pos, c) ) = s. next ( ) {
830
864
match ( c, s. clone ( ) . next ( ) ) {
831
865
// skip whitespace and empty lines ending in '\\'
832
- ( '\\' , Some ( ( next_pos, '\n' ) ) ) => {
833
- skips. push ( pos) ;
834
- skips. push ( next_pos) ;
866
+ ( '\\' , Some ( ( _, '\n' ) ) ) => {
835
867
let _ = s. next ( ) ;
868
+ let mut width = 2 ;
836
869
837
- while let Some ( ( pos , c) ) = s. clone ( ) . next ( ) {
870
+ while let Some ( ( _ , c) ) = s. clone ( ) . next ( ) {
838
871
if matches ! ( c, ' ' | '\n' | '\t' ) {
839
- skips . push ( pos ) ;
872
+ width += 1 ;
840
873
let _ = s. next ( ) ;
841
874
} else {
842
875
break ;
843
876
}
844
877
}
878
+
879
+ width_mappings. push ( InnerWidthMapping :: new ( pos, width, 0 ) ) ;
845
880
}
846
- ( '\\' , Some ( ( next_pos , 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"' ) ) ) => {
847
- skips . push ( next_pos ) ;
881
+ ( '\\' , Some ( ( _ , 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"' ) ) ) => {
882
+ width_mappings . push ( InnerWidthMapping :: new ( pos , 2 , 1 ) ) ;
848
883
let _ = s. next ( ) ;
849
884
}
850
885
( '\\' , Some ( ( _, 'x' ) ) ) => {
851
- for _ in 0 ..3 {
852
- // consume `\xAB` literal
853
- if let Some ( ( pos, _) ) = s. next ( ) {
854
- skips. push ( pos) ;
855
- } else {
856
- break ;
857
- }
858
- }
886
+ // consume `\xAB` literal
887
+ s. nth ( 2 ) ;
888
+ width_mappings. push ( InnerWidthMapping :: new ( pos, 4 , 1 ) ) ;
859
889
}
860
890
( '\\' , Some ( ( _, 'u' ) ) ) => {
861
- if let Some ( ( pos , _ ) ) = s . next ( ) {
862
- skips . push ( pos ) ;
863
- }
864
- if let Some ( ( next_pos , next_c) ) = s. next ( ) {
891
+ let mut width = 2 ;
892
+ let _ = s . next ( ) ;
893
+
894
+ if let Some ( ( _ , next_c) ) = s. next ( ) {
865
895
if next_c == '{' {
866
896
// consume up to 6 hexanumeric chars
867
897
let digits_len =
@@ -881,31 +911,32 @@ fn find_skips_from_snippet(
881
911
let required_skips = digits_len. saturating_sub ( len_utf8. saturating_sub ( 1 ) ) ;
882
912
883
913
// skip '{' and '}' also
884
- for pos in ( next_pos..) . take ( required_skips + 2 ) {
885
- skips. push ( pos)
886
- }
914
+ width += required_skips + 2 ;
887
915
888
916
s. nth ( digits_len) ;
889
917
} else if next_c. is_digit ( 16 ) {
890
- skips. push ( next_pos) ;
918
+ width += 1 ;
919
+
891
920
// We suggest adding `{` and `}` when appropriate, accept it here as if
892
921
// it were correct
893
922
let mut i = 0 ; // consume up to 6 hexanumeric chars
894
- while let ( Some ( ( next_pos , c) ) , _) = ( s. next ( ) , i < 6 ) {
923
+ while let ( Some ( ( _ , c) ) , _) = ( s. next ( ) , i < 6 ) {
895
924
if c. is_digit ( 16 ) {
896
- skips . push ( next_pos ) ;
925
+ width += 1 ;
897
926
} else {
898
927
break ;
899
928
}
900
929
i += 1 ;
901
930
}
902
931
}
903
932
}
933
+
934
+ width_mappings. push ( InnerWidthMapping :: new ( pos, width, 1 ) ) ;
904
935
}
905
936
_ => { }
906
937
}
907
938
}
908
- ( skips , true )
939
+ ( width_mappings , true )
909
940
}
910
941
911
942
#[ cfg( test) ]
0 commit comments