Skip to content

Commit a2c1d8a

Browse files
committed
Do not consider # an emoji in the lexer
Fix #109746.
1 parent 2fb0e8d commit a2c1d8a

File tree

1 file changed

+6
-3
lines changed
  • compiler/rustc_lexer/src

1 file changed

+6
-3
lines changed

compiler/rustc_lexer/src/lib.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ impl Cursor<'_> {
638638
|| self.first().is_digit(10)
639639
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
640640
// 5.0, but Unicode is already newer than this.
641-
|| unic_emoji_char::is_emoji(self.first())
641+
|| !self.first().is_ascii() && unic_emoji_char::is_emoji(self.first())
642642
};
643643

644644
if !can_be_a_lifetime {
@@ -658,7 +658,7 @@ impl Cursor<'_> {
658658

659659
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
660660
// 5.0, but Unicode is already newer than this.
661-
if unic_emoji_char::is_emoji(self.first()) {
661+
if !self.first().is_ascii() && unic_emoji_char::is_emoji(self.first()) {
662662
contains_emoji = true;
663663
} else {
664664
// Skip the literal contents.
@@ -671,7 +671,10 @@ impl Cursor<'_> {
671671
true
672672
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
673673
// 5.0, but Unicode is already newer than this.
674-
} else if unic_emoji_char::is_emoji(c) {
674+
// `#` ends an identifier, but is counted as an emoji because of
675+
// https://github.com/open-i18n/rust-unic/issues/280. These can be common on macros, so
676+
// we need to handle them properly. (#109746)
677+
} else if !c.is_ascii() && unic_emoji_char::is_emoji(c) {
675678
contains_emoji = true;
676679
true
677680
} else {

0 commit comments

Comments
 (0)