Skip to content

Commit 7ee6319

Browse files
authored
[Parse] [Sema] Update confusables diagnostic to mention the character names as well (#33105)
* [Parser] Update 'Confusables.def' file to include confusable and base character names * [Parser] Add a new utility method to return the names of the confusable and base characters for a given confusable codepoint * [Parser] Update diagnostic for confusable character during lexing to mention confusable and base character names * [Sema] If there is just a single confusable character, emit a tailored diagnostic that also mentions the character names * [Diagnostics] Add new diagnostic messages to the localization file * [Test] Update confusables test * [Utils] Update unicode confusables txt file and update script to regenerate confusables def file * [Parse] Regenerate 'Confusables.def' using updated script * [Utils] Adjust generate_confusables script based on review feedback Fix a mistake with name mapping. Updated header comment. Fix a couple of linting issues. * [Parse] Regenerate 'Confusables.def' file once again after script changes * [Parse] Add the newline after end of 'getConfusableAndBaseCodepointNames' method * [Test] Update diagnostic message in 'Syntax/Parser/diags.swift'
1 parent 0f86137 commit 7ee6319

File tree

12 files changed

+570
-271
lines changed

12 files changed

+570
-271
lines changed

include/swift/AST/DiagnosticsParse.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ ERROR(lex_single_quote_string,none,
112112
ERROR(lex_invalid_curly_quote,none,
113113
"unicode curly quote found, replace with '\"'", ())
114114
NOTE(lex_confusable_character,none,
115-
"unicode character '%0' looks similar to '%1'; did you mean to use '%1'?",
116-
(StringRef, StringRef))
115+
"unicode character '%0' (%1) looks similar to '%2' (%3); did you mean to use '%2' (%3)?",
116+
(StringRef, StringRef, StringRef, StringRef))
117117
WARNING(lex_nonbreaking_space,none,
118118
"non-breaking space (U+00A0) used instead of regular space", ())
119119

include/swift/AST/DiagnosticsSema.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,9 @@ NOTE(confusable_character,none,
832832
"%select{identifier|operator}0 '%1' contains possibly confused characters; "
833833
"did you mean to use '%2'?",
834834
(bool, StringRef, StringRef))
835+
NOTE(single_confusable_character,none,
836+
"%select{identifier|operator}0 '%1' (%2) looks similar to '%3' (%4); did you mean '%3' (%4)?",
837+
(bool, StringRef, StringRef, StringRef, StringRef))
835838
ERROR(cannot_find_type_in_scope,none,
836839
"cannot find type %0 in scope", (DeclNameRef))
837840
ERROR(cannot_find_type_in_scope_did_you_mean,none,

include/swift/Parse/Confusables.def

Lines changed: 122 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -2,127 +2,135 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2020 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
99
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
// CONFUSABLE(CONFUSABLE_POINT, BASEPOINT)
13+
////////////////////////////////////////////////////////////////////////////////
14+
// WARNING: This file is manually generated from
15+
// utils/UnicodeData/confusables.txt and should not be directly modified.
16+
// Run utils/generate_confusables.py to regenerate this file.
17+
////////////////////////////////////////////////////////////////////////////////
1418

15-
CONFUSABLE(0x2010, 0x2d)
16-
CONFUSABLE(0x2011, 0x2d)
17-
CONFUSABLE(0x2012, 0x2d)
18-
CONFUSABLE(0x2013, 0x2d)
19-
CONFUSABLE(0xfe58, 0x2d)
20-
CONFUSABLE(0x6d4, 0x2d)
21-
CONFUSABLE(0x2043, 0x2d)
22-
CONFUSABLE(0x2d7, 0x2d)
23-
CONFUSABLE(0x2212, 0x2d)
24-
CONFUSABLE(0x2796, 0x2d)
25-
CONFUSABLE(0x2cba, 0x2d)
26-
CONFUSABLE(0x60d, 0x2c)
27-
CONFUSABLE(0x66b, 0x2c)
28-
CONFUSABLE(0x201a, 0x2c)
29-
CONFUSABLE(0xb8, 0x2c)
30-
CONFUSABLE(0xa4f9, 0x2c)
31-
CONFUSABLE(0x903, 0x3a)
32-
CONFUSABLE(0xa83, 0x3a)
33-
CONFUSABLE(0xff1a, 0x3a)
34-
CONFUSABLE(0x589, 0x3a)
35-
CONFUSABLE(0x703, 0x3a)
36-
CONFUSABLE(0x704, 0x3a)
37-
CONFUSABLE(0x16ec, 0x3a)
38-
CONFUSABLE(0xfe30, 0x3a)
39-
CONFUSABLE(0x1803, 0x3a)
40-
CONFUSABLE(0x1809, 0x3a)
41-
CONFUSABLE(0x205a, 0x3a)
42-
CONFUSABLE(0x5c3, 0x3a)
43-
CONFUSABLE(0x2f8, 0x3a)
44-
CONFUSABLE(0xa789, 0x3a)
45-
CONFUSABLE(0x2236, 0x3a)
46-
CONFUSABLE(0x2d0, 0x3a)
47-
CONFUSABLE(0xa4fd, 0x3a)
48-
CONFUSABLE(0xff01, 0x21)
49-
CONFUSABLE(0x1c3, 0x21)
50-
CONFUSABLE(0x2d51, 0x21)
51-
CONFUSABLE(0x294, 0x3f)
52-
CONFUSABLE(0x241, 0x3f)
53-
CONFUSABLE(0x97d, 0x3f)
54-
CONFUSABLE(0x13ae, 0x3f)
55-
CONFUSABLE(0xa6eb, 0x3f)
56-
CONFUSABLE(0x1d16d, 0x2e)
57-
CONFUSABLE(0x2024, 0x2e)
58-
CONFUSABLE(0x701, 0x2e)
59-
CONFUSABLE(0x702, 0x2e)
60-
CONFUSABLE(0xa60e, 0x2e)
61-
CONFUSABLE(0x10a50, 0x2e)
62-
CONFUSABLE(0x660, 0x2e)
63-
CONFUSABLE(0x6f0, 0x2e)
64-
CONFUSABLE(0xa4f8, 0x2e)
65-
CONFUSABLE(0xff3b, 0x28)
66-
CONFUSABLE(0x2768, 0x28)
67-
CONFUSABLE(0x2772, 0x28)
68-
CONFUSABLE(0x3014, 0x28)
69-
CONFUSABLE(0xfd3e, 0x28)
70-
CONFUSABLE(0xff3d, 0x29)
71-
CONFUSABLE(0x2769, 0x29)
72-
CONFUSABLE(0x2773, 0x29)
73-
CONFUSABLE(0x3015, 0x29)
74-
CONFUSABLE(0xfd3f, 0x29)
75-
CONFUSABLE(0x2774, 0x7b)
76-
CONFUSABLE(0x1d114, 0x7b)
77-
CONFUSABLE(0x2775, 0x7d)
78-
CONFUSABLE(0x204e, 0x2a)
79-
CONFUSABLE(0x66d, 0x2a)
80-
CONFUSABLE(0x2217, 0x2a)
81-
CONFUSABLE(0x1031f, 0x2a)
82-
CONFUSABLE(0x1735, 0x2f)
83-
CONFUSABLE(0x2041, 0x2f)
84-
CONFUSABLE(0x2215, 0x2f)
85-
CONFUSABLE(0x2044, 0x2f)
86-
CONFUSABLE(0x2571, 0x2f)
87-
CONFUSABLE(0x27cb, 0x2f)
88-
CONFUSABLE(0x29f8, 0x2f)
89-
CONFUSABLE(0x1d23a, 0x2f)
90-
CONFUSABLE(0x31d3, 0x2f)
91-
CONFUSABLE(0x3033, 0x2f)
92-
CONFUSABLE(0x2cc6, 0x2f)
93-
CONFUSABLE(0x30ce, 0x2f)
94-
CONFUSABLE(0x4e3f, 0x2f)
95-
CONFUSABLE(0x2f03, 0x2f)
96-
CONFUSABLE(0xff3c, 0x5c)
97-
CONFUSABLE(0xfe68, 0x5c)
98-
CONFUSABLE(0x2216, 0x5c)
99-
CONFUSABLE(0x27cd, 0x5c)
100-
CONFUSABLE(0x29f5, 0x5c)
101-
CONFUSABLE(0x29f9, 0x5c)
102-
CONFUSABLE(0x1d20f, 0x5c)
103-
CONFUSABLE(0x1d23b, 0x5c)
104-
CONFUSABLE(0x31d4, 0x5c)
105-
CONFUSABLE(0x4e36, 0x5c)
106-
CONFUSABLE(0x2f02, 0x5c)
107-
CONFUSABLE(0xa778, 0x26)
108-
CONFUSABLE(0x16ed, 0x2b)
109-
CONFUSABLE(0x2795, 0x2b)
110-
CONFUSABLE(0x1029b, 0x2b)
111-
CONFUSABLE(0x2039, 0x3c)
112-
CONFUSABLE(0x276e, 0x3c)
113-
CONFUSABLE(0x2c2, 0x3c)
114-
CONFUSABLE(0x1d236, 0x3c)
115-
CONFUSABLE(0x1438, 0x3c)
116-
CONFUSABLE(0x16b2, 0x3c)
117-
CONFUSABLE(0x1400, 0x3d)
118-
CONFUSABLE(0x2e40, 0x3d)
119-
CONFUSABLE(0x30a0, 0x3d)
120-
CONFUSABLE(0xa4ff, 0x3d)
121-
CONFUSABLE(0x203a, 0x3e)
122-
CONFUSABLE(0x276f, 0x3e)
123-
CONFUSABLE(0x2c3, 0x3e)
124-
CONFUSABLE(0x1d237, 0x3e)
125-
CONFUSABLE(0x1433, 0x3e)
126-
CONFUSABLE(0x037e, 0x3b)
19+
20+
// CONFUSABLE(CONFUSABLE_POINT, CONFUSABLE_NAME, BASE_POINT, BASE_NAME)
21+
22+
CONFUSABLE(0x2010, "Hyphen", 0x2d, "Hyphen Minus")
23+
CONFUSABLE(0x2011, "Non-Breaking Hyphen", 0x2d, "Hyphen Minus")
24+
CONFUSABLE(0x2012, "Figure Dash", 0x2d, "Hyphen Minus")
25+
CONFUSABLE(0x2013, "En Dash", 0x2d, "Hyphen Minus")
26+
CONFUSABLE(0xfe58, "Small Em Dash", 0x2d, "Hyphen Minus")
27+
CONFUSABLE(0x6d4, "Arabic Full Stop", 0x2d, "Hyphen Minus")
28+
CONFUSABLE(0x2043, "Hyphen Bullet", 0x2d, "Hyphen Minus")
29+
CONFUSABLE(0x2d7, "Modifier Letter Minus Sign", 0x2d, "Hyphen Minus")
30+
CONFUSABLE(0x2212, "Minus Sign", 0x2d, "Hyphen Minus")
31+
CONFUSABLE(0x2796, "Heavy Minus Sign", 0x2d, "Hyphen Minus")
32+
CONFUSABLE(0x2cba, "Coptic Capital Letter Dialect-P Ni", 0x2d, "Hyphen Minus")
33+
CONFUSABLE(0x60d, "Arabic Date Separator", 0x2c, "Comma")
34+
CONFUSABLE(0x66b, "Arabic Decimal Separator", 0x2c, "Comma")
35+
CONFUSABLE(0x201a, "Single Low-9 Quotation Mark", 0x2c, "Comma")
36+
CONFUSABLE(0xb8, "Cedilla", 0x2c, "Comma")
37+
CONFUSABLE(0xa4f9, "Lisu Letter Tone Na Po", 0x2c, "Comma")
38+
CONFUSABLE(0x37e, "Greek Question Mark", 0x3b, "Semicolon")
39+
CONFUSABLE(0x903, "Devanagari Sign Visarga", 0x3a, "Colon")
40+
CONFUSABLE(0xa83, "Gujarati Sign Visarga", 0x3a, "Colon")
41+
CONFUSABLE(0xff1a, "Fullwidth Colon", 0x3a, "Colon")
42+
CONFUSABLE(0x589, "Armenian Full Stop", 0x3a, "Colon")
43+
CONFUSABLE(0x703, "Syriac Supralinear Colon", 0x3a, "Colon")
44+
CONFUSABLE(0x704, "Syriac Sublinear Colon", 0x3a, "Colon")
45+
CONFUSABLE(0x16ec, "Runic Multiple Punctuation", 0x3a, "Colon")
46+
CONFUSABLE(0xfe30, "Presentation Form For Vertical Two Dot Leader", 0x3a, "Colon")
47+
CONFUSABLE(0x1803, "Mongolian Full Stop", 0x3a, "Colon")
48+
CONFUSABLE(0x1809, "Mongolian Manchu Full Stop", 0x3a, "Colon")
49+
CONFUSABLE(0x205a, "Two Dot Punctuation", 0x3a, "Colon")
50+
CONFUSABLE(0x5c3, "Hebrew Punctuation Sof Pasuq", 0x3a, "Colon")
51+
CONFUSABLE(0x2f8, "Modifier Letter Raised Colon", 0x3a, "Colon")
52+
CONFUSABLE(0xa789, "Modifier Letter Colon", 0x3a, "Colon")
53+
CONFUSABLE(0x2236, "Ratio", 0x3a, "Colon")
54+
CONFUSABLE(0x2d0, "Modifier Letter Triangular Colon", 0x3a, "Colon")
55+
CONFUSABLE(0xa4fd, "Lisu Letter Tone Mya Jeu", 0x3a, "Colon")
56+
CONFUSABLE(0xff01, "Fullwidth Exclamation Mark", 0x21, "Exclamation Mark")
57+
CONFUSABLE(0x1c3, "Latin Letter Retroflex Click", 0x21, "Exclamation Mark")
58+
CONFUSABLE(0x2d51, "Tifinagh Letter Tuareg Yang", 0x21, "Exclamation Mark")
59+
CONFUSABLE(0x294, "Latin Letter Glottal Stop", 0x3f, "Question Mark")
60+
CONFUSABLE(0x241, "Latin Capital Letter Glottal Stop", 0x3f, "Question Mark")
61+
CONFUSABLE(0x97d, "Devanagari Letter Glottal Stop", 0x3f, "Question Mark")
62+
CONFUSABLE(0x13ae, "Cherokee Letter He", 0x3f, "Question Mark")
63+
CONFUSABLE(0xa6eb, "Bamum Letter Ntuu", 0x3f, "Question Mark")
64+
CONFUSABLE(0x1d16d, "Musical Symbol Combining Augmentation Dot", 0x2e, "Full Stop")
65+
CONFUSABLE(0x2024, "One Dot Leader", 0x2e, "Full Stop")
66+
CONFUSABLE(0x701, "Syriac Supralinear Full Stop", 0x2e, "Full Stop")
67+
CONFUSABLE(0x702, "Syriac Sublinear Full Stop", 0x2e, "Full Stop")
68+
CONFUSABLE(0xa60e, "Vai Full Stop", 0x2e, "Full Stop")
69+
CONFUSABLE(0x10a50, "Kharoshthi Punctuation Dot", 0x2e, "Full Stop")
70+
CONFUSABLE(0x660, "Arabic-Indic Digit Zero", 0x2e, "Full Stop")
71+
CONFUSABLE(0x6f0, "Extended Arabic-Indic Digit Zero", 0x2e, "Full Stop")
72+
CONFUSABLE(0xa4f8, "Lisu Letter Tone Mya Ti", 0x2e, "Full Stop")
73+
CONFUSABLE(0xff3b, "Fullwidth Left Square Bracket", 0x28, "Left Parenthesis")
74+
CONFUSABLE(0x2768, "Medium Left Parenthesis Ornament", 0x28, "Left Parenthesis")
75+
CONFUSABLE(0x2772, "Light Left Tortoise Shell Bracket Ornament", 0x28, "Left Parenthesis")
76+
CONFUSABLE(0x3014, "Left Tortoise Shell Bracket", 0x28, "Left Parenthesis")
77+
CONFUSABLE(0xfd3e, "Ornate Left Parenthesis", 0x28, "Left Parenthesis")
78+
CONFUSABLE(0xff3d, "Fullwidth Right Square Bracket", 0x29, "Right Parenthesis")
79+
CONFUSABLE(0x2769, "Medium Right Parenthesis Ornament", 0x29, "Right Parenthesis")
80+
CONFUSABLE(0x2773, "Light Right Tortoise Shell Bracket Ornament", 0x29, "Right Parenthesis")
81+
CONFUSABLE(0x3015, "Right Tortoise Shell Bracket", 0x29, "Right Parenthesis")
82+
CONFUSABLE(0xfd3f, "Ornate Right Parenthesis", 0x29, "Right Parenthesis")
83+
CONFUSABLE(0x2774, "Medium Left Curly Bracket Ornament", 0x7b, "Left Curly Bracket")
84+
CONFUSABLE(0x1d114, "Musical Symbol Brace", 0x7b, "Left Curly Bracket")
85+
CONFUSABLE(0x2775, "Medium Right Curly Bracket Ornament", 0x7d, "Right Curly Bracket")
86+
CONFUSABLE(0x204e, "Low Asterisk", 0x2a, "Asterisk")
87+
CONFUSABLE(0x66d, "Arabic Five Pointed Star", 0x2a, "Asterisk")
88+
CONFUSABLE(0x2217, "Asterisk Operator", 0x2a, "Asterisk")
89+
CONFUSABLE(0x1031f, "Old Italic Letter Ess", 0x2a, "Asterisk")
90+
CONFUSABLE(0x1735, "Philippine Single Punctuation", 0x2f, "Forward Slash")
91+
CONFUSABLE(0x2041, "Caret Insertion Point", 0x2f, "Forward Slash")
92+
CONFUSABLE(0x2215, "Division Slash", 0x2f, "Forward Slash")
93+
CONFUSABLE(0x2044, "Fraction Slash", 0x2f, "Forward Slash")
94+
CONFUSABLE(0x2571, "Box Drawings Light Diagonal Upper Right To Lower Left", 0x2f, "Forward Slash")
95+
CONFUSABLE(0x27cb, "Mathematical Rising Diagonal", 0x2f, "Forward Slash")
96+
CONFUSABLE(0x29f8, "Big Solidus", 0x2f, "Forward Slash")
97+
CONFUSABLE(0x1d23a, "Greek Instrumental Notation Symbol-47", 0x2f, "Forward Slash")
98+
CONFUSABLE(0x31d3, "Cjk Stroke Sp", 0x2f, "Forward Slash")
99+
CONFUSABLE(0x3033, "Vertical Kana Repeat Mark Upper Half", 0x2f, "Forward Slash")
100+
CONFUSABLE(0x2cc6, "Coptic Capital Letter Old Coptic Esh", 0x2f, "Forward Slash")
101+
CONFUSABLE(0x30ce, "Katakana Letter No", 0x2f, "Forward Slash")
102+
CONFUSABLE(0x4e3f, "Cjk Unified Ideograph-4E3F", 0x2f, "Forward Slash")
103+
CONFUSABLE(0x2f03, "Kangxi Radical Slash", 0x2f, "Forward Slash")
104+
CONFUSABLE(0xff3c, "Fullwidth Reverse Solidus", 0x5c, "Back Slash")
105+
CONFUSABLE(0xfe68, "Small Reverse Solidus", 0x5c, "Back Slash")
106+
CONFUSABLE(0x2216, "Set Minus", 0x5c, "Back Slash")
107+
CONFUSABLE(0x27cd, "Mathematical Falling Diagonal", 0x5c, "Back Slash")
108+
CONFUSABLE(0x29f5, "Reverse Solidus Operator", 0x5c, "Back Slash")
109+
CONFUSABLE(0x29f9, "Big Reverse Solidus", 0x5c, "Back Slash")
110+
CONFUSABLE(0x1d20f, "Greek Vocal Notation Symbol-16", 0x5c, "Back Slash")
111+
CONFUSABLE(0x1d23b, "Greek Instrumental Notation Symbol-48", 0x5c, "Back Slash")
112+
CONFUSABLE(0x31d4, "Cjk Stroke D", 0x5c, "Back Slash")
113+
CONFUSABLE(0x4e36, "Cjk Unified Ideograph-4E36", 0x5c, "Back Slash")
114+
CONFUSABLE(0x2f02, "Kangxi Radical Dot", 0x5c, "Back Slash")
115+
CONFUSABLE(0xa778, "Latin Small Letter Um", 0x26, "Ampersand")
116+
CONFUSABLE(0x16ed, "Runic Cross Punctuation", 0x2b, "Plus Sign")
117+
CONFUSABLE(0x2795, "Heavy Plus Sign", 0x2b, "Plus Sign")
118+
CONFUSABLE(0x1029b, "Lycian Letter H", 0x2b, "Plus Sign")
119+
CONFUSABLE(0x2039, "Single Left-Pointing Angle Quotation Mark", 0x3c, "Less Than Sign")
120+
CONFUSABLE(0x276e, "Heavy Left-Pointing Angle Quotation Mark Ornament", 0x3c, "Less Than Sign")
121+
CONFUSABLE(0x2c2, "Modifier Letter Left Arrowhead", 0x3c, "Less Than Sign")
122+
CONFUSABLE(0x1d236, "Greek Instrumental Notation Symbol-40", 0x3c, "Less Than Sign")
123+
CONFUSABLE(0x1438, "Canadian Syllabics Pa", 0x3c, "Less Than Sign")
124+
CONFUSABLE(0x16b2, "Runic Letter Kauna", 0x3c, "Less Than Sign")
125+
CONFUSABLE(0x1400, "Canadian Syllabics Hyphen", 0x3d, "Equals Sign")
126+
CONFUSABLE(0x2e40, "Double Hyphen", 0x3d, "Equals Sign")
127+
CONFUSABLE(0x30a0, "Katakana-Hiragana Double Hyphen", 0x3d, "Equals Sign")
128+
CONFUSABLE(0xa4ff, "Lisu Punctuation Full Stop", 0x3d, "Equals Sign")
129+
CONFUSABLE(0x203a, "Single Right-Pointing Angle Quotation Mark", 0x3e, "Greater Than Sign")
130+
CONFUSABLE(0x276f, "Heavy Right-Pointing Angle Quotation Mark Ornament", 0x3e, "Greater Than Sign")
131+
CONFUSABLE(0x2c3, "Modifier Letter Right Arrowhead", 0x3e, "Greater Than Sign")
132+
CONFUSABLE(0x1d237, "Greek Instrumental Notation Symbol-42", 0x3e, "Greater Than Sign")
133+
CONFUSABLE(0x1433, "Canadian Syllabics Po", 0x3e, "Greater Than Sign")
134+
CONFUSABLE(0x16f3f, "Miao Letter Archaic Zza", 0x3e, "Greater Than Sign")
127135

128136
#undef CONFUSABLE

include/swift/Parse/Confusables.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef SWIFT_CONFUSABLES_H
1414
#define SWIFT_CONFUSABLES_H
1515

16+
#include "llvm/ADT/StringRef.h"
1617
#include <stdint.h>
1718

1819
namespace swift {
@@ -21,6 +22,12 @@ namespace confusable {
2122
/// specification table of confusable characters and maps to punctuation,
2223
/// and either returns either the expected ASCII character or 0.
2324
char tryConvertConfusableCharacterToASCII(uint32_t codepoint);
25+
26+
/// Given a UTF-8 codepoint which is previously determined to be confusable,
27+
/// return the name of the confusable character and the name of the base
28+
/// character.
29+
std::pair<llvm::StringRef, llvm::StringRef>
30+
getConfusableAndBaseCodepointNames(uint32_t codepoint);
2431
}
2532
}
2633

lib/Parse/Confusables.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,22 @@
1414

1515
char swift::confusable::tryConvertConfusableCharacterToASCII(uint32_t codepoint) {
1616
switch (codepoint) {
17-
#define CONFUSABLE(CONFUSABLE_POINT, BASEPOINT) \
18-
case CONFUSABLE_POINT: return BASEPOINT;
17+
#define CONFUSABLE(CONFUSABLE_POINT, CONFUSABLE_NAME, BASE_POINT, BASE_NAME) \
18+
case CONFUSABLE_POINT: \
19+
return BASE_POINT;
1920
#include "swift/Parse/Confusables.def"
2021
default: return 0;
2122
}
2223
}
24+
25+
std::pair<llvm::StringRef, llvm::StringRef>
26+
swift::confusable::getConfusableAndBaseCodepointNames(uint32_t codepoint) {
27+
switch (codepoint) {
28+
#define CONFUSABLE(CONFUSABLE_POINT, CONFUSABLE_NAME, BASE_POINT, BASE_NAME) \
29+
case CONFUSABLE_POINT: \
30+
return std::make_pair(CONFUSABLE_NAME, BASE_NAME);
31+
#include "swift/Parse/Confusables.def"
32+
default:
33+
return std::make_pair("", "");
34+
}
35+
}

lib/Parse/Lexer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2097,8 +2097,9 @@ bool Lexer::lexUnknown(bool EmitDiagnosticsIfToken) {
20972097
EncodeToUTF8(Codepoint, ConfusedChar);
20982098
llvm::SmallString<1> ExpectedChar;
20992099
ExpectedChar += ExpectedCodepoint;
2100+
auto charNames = confusable::getConfusableAndBaseCodepointNames(Codepoint);
21002101
diagnose(CurPtr - 1, diag::lex_confusable_character, ConfusedChar,
2101-
ExpectedChar)
2102+
charNames.first, ExpectedChar, charNames.second)
21022103
.fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp),
21032104
ExpectedChar);
21042105
}

lib/Sema/TypeCheckConstraints.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ Expr *TypeChecker::resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE,
525525
llvm::SmallString<64> expectedIdentifier;
526526
bool isConfused = false;
527527
uint32_t codepoint;
528+
uint32_t firstConfusableCodepoint = 0;
529+
int totalCodepoints = 0;
528530
int offset = 0;
529531
while ((codepoint = validateUTF8CharacterAndAdvance(buffer,
530532
buffer +
@@ -533,12 +535,17 @@ Expr *TypeChecker::resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE,
533535
int length = (buffer - simpleName.get()) - offset;
534536
if (auto expectedCodepoint =
535537
confusable::tryConvertConfusableCharacterToASCII(codepoint)) {
538+
if (firstConfusableCodepoint == 0) {
539+
firstConfusableCodepoint = codepoint;
540+
}
536541
isConfused = true;
537542
expectedIdentifier += expectedCodepoint;
538543
} else {
539544
expectedIdentifier += (char)codepoint;
540545
}
541546

547+
totalCodepoints++;
548+
542549
offset += length;
543550
}
544551

@@ -580,11 +587,21 @@ Expr *TypeChecker::resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE,
580587
} else {
581588
emitBasicError();
582589

583-
Context.Diags
584-
.diagnose(Loc, diag::confusable_character,
585-
UDRE->getName().isOperator(), simpleName.str(),
586-
expectedIdentifier)
587-
.fixItReplace(Loc, expectedIdentifier);
590+
if (totalCodepoints == 1) {
591+
auto charNames = confusable::getConfusableAndBaseCodepointNames(
592+
firstConfusableCodepoint);
593+
Context.Diags
594+
.diagnose(Loc, diag::single_confusable_character,
595+
UDRE->getName().isOperator(), simpleName.str(),
596+
charNames.first, expectedIdentifier, charNames.second)
597+
.fixItReplace(Loc, expectedIdentifier);
598+
} else {
599+
Context.Diags
600+
.diagnose(Loc, diag::confusable_character,
601+
UDRE->getName().isOperator(), simpleName.str(),
602+
expectedIdentifier)
603+
.fixItReplace(Loc, expectedIdentifier);
604+
}
588605
}
589606

590607
// TODO: consider recovering from here. We may want some way to suppress

0 commit comments

Comments
 (0)