@@ -4,7 +4,7 @@ use diagnostics::make_unclosed_delims_error;
4
4
use rustc_ast:: ast:: { self , AttrStyle } ;
5
5
use rustc_ast:: token:: { self , CommentKind , Delimiter , IdentIsRaw , Token , TokenKind } ;
6
6
use rustc_ast:: tokenstream:: TokenStream ;
7
- use rustc_ast:: util:: unicode:: contains_text_flow_control_chars;
7
+ use rustc_ast:: util:: unicode:: { TEXT_FLOW_CONTROL_CHARS , contains_text_flow_control_chars} ;
8
8
use rustc_errors:: codes:: * ;
9
9
use rustc_errors:: { Applicability , Diag , DiagCtxtHandle , StashKey } ;
10
10
use rustc_lexer:: {
@@ -14,7 +14,7 @@ use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}
14
14
use rustc_session:: lint:: BuiltinLintDiag ;
15
15
use rustc_session:: lint:: builtin:: {
16
16
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX , RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX ,
17
- TEXT_DIRECTION_CODEPOINT_IN_COMMENT ,
17
+ TEXT_DIRECTION_CODEPOINT_IN_COMMENT , TEXT_DIRECTION_CODEPOINT_IN_LITERAL ,
18
18
} ;
19
19
use rustc_session:: parse:: ParseSess ;
20
20
use rustc_span:: { BytePos , Pos , Span , Symbol , sym} ;
@@ -174,6 +174,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
174
174
// Opening delimiter of the length 3 is not included into the symbol.
175
175
let content_start = start + BytePos ( 3 ) ;
176
176
let content = self . str_from ( content_start) ;
177
+ self . lint_doc_comment_unicode_text_flow ( start, content) ;
177
178
self . cook_doc_comment ( content_start, content, CommentKind :: Line , doc_style)
178
179
}
179
180
rustc_lexer:: TokenKind :: BlockComment { doc_style, terminated } => {
@@ -193,6 +194,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
193
194
let content_start = start + BytePos ( 3 ) ;
194
195
let content_end = self . pos - BytePos ( if terminated { 2 } else { 0 } ) ;
195
196
let content = self . str_from_to ( content_start, content_end) ;
197
+ self . lint_doc_comment_unicode_text_flow ( start, content) ;
196
198
self . cook_doc_comment ( content_start, content, CommentKind :: Block , doc_style)
197
199
}
198
200
rustc_lexer:: TokenKind :: Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
@@ -287,6 +289,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
287
289
} else {
288
290
None
289
291
} ;
292
+ self . lint_literal_unicode_text_flow ( symbol, kind, self . mk_sp ( start, self . pos ) , "literal" ) ;
290
293
token:: Literal ( token:: Lit { kind, symbol, suffix } )
291
294
}
292
295
rustc_lexer:: TokenKind :: Lifetime { starts_with_number } => {
@@ -481,6 +484,88 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
481
484
}
482
485
}
483
486
487
+ fn lint_doc_comment_unicode_text_flow ( & mut self , start : BytePos , content : & str ) {
488
+ if contains_text_flow_control_chars ( content) {
489
+ self . report_text_direction_codepoint (
490
+ content,
491
+ self . mk_sp ( start, self . pos ) ,
492
+ 0 ,
493
+ false ,
494
+ "doc comment" ,
495
+ ) ;
496
+ }
497
+ }
498
+
499
+ fn lint_literal_unicode_text_flow (
500
+ & mut self ,
501
+ text : Symbol ,
502
+ lit_kind : token:: LitKind ,
503
+ span : Span ,
504
+ label : & ' static str ,
505
+ ) {
506
+ if !contains_text_flow_control_chars ( text. as_str ( ) ) {
507
+ return ;
508
+ }
509
+ let ( padding, point_at_inner_spans) = match lit_kind {
510
+ // account for `"` or `'`
511
+ token:: LitKind :: Str | token:: LitKind :: Char => ( 1 , true ) ,
512
+ // account for `c"`
513
+ token:: LitKind :: CStr => ( 2 , true ) ,
514
+ // account for `r###"`
515
+ token:: LitKind :: StrRaw ( n) => ( n as u32 + 2 , true ) ,
516
+ // account for `cr###"`
517
+ token:: LitKind :: CStrRaw ( n) => ( n as u32 + 3 , true ) ,
518
+ // suppress bad literals.
519
+ token:: LitKind :: Err ( _) => return ,
520
+ // Be conservative just in case new literals do support these.
521
+ _ => ( 0 , false ) ,
522
+ } ;
523
+ self . report_text_direction_codepoint (
524
+ text. as_str ( ) ,
525
+ span,
526
+ padding,
527
+ point_at_inner_spans,
528
+ label,
529
+ ) ;
530
+ }
531
+
532
+ fn report_text_direction_codepoint (
533
+ & self ,
534
+ text : & str ,
535
+ span : Span ,
536
+ padding : u32 ,
537
+ point_at_inner_spans : bool ,
538
+ label : & str ,
539
+ ) {
540
+ // Obtain the `Span`s for each of the forbidden chars.
541
+ let spans: Vec < _ > = text
542
+ . char_indices ( )
543
+ . filter_map ( |( i, c) | {
544
+ TEXT_FLOW_CONTROL_CHARS . contains ( & c) . then ( || {
545
+ let lo = span. lo ( ) + BytePos ( i as u32 + padding) ;
546
+ ( c, span. with_lo ( lo) . with_hi ( lo + BytePos ( c. len_utf8 ( ) as u32 ) ) )
547
+ } )
548
+ } )
549
+ . collect ( ) ;
550
+
551
+ let count = spans. len ( ) ;
552
+ let labels = point_at_inner_spans. then_some ( spans. clone ( ) ) ;
553
+
554
+ self . psess . buffer_lint (
555
+ TEXT_DIRECTION_CODEPOINT_IN_LITERAL ,
556
+ span,
557
+ ast:: CRATE_NODE_ID ,
558
+ BuiltinLintDiag :: HiddenUnicodeCodepoints {
559
+ label : label. to_string ( ) ,
560
+ count,
561
+ span_label : span,
562
+ labels,
563
+ escape : point_at_inner_spans && !spans. is_empty ( ) ,
564
+ spans,
565
+ } ,
566
+ ) ;
567
+ }
568
+
484
569
fn validate_frontmatter (
485
570
& self ,
486
571
start : BytePos ,
0 commit comments