Skip to content

Commit d5e5da6

Browse files
committed
syntax: fix 'C' alias bug
This re-generates the Unicode table for property name aliases after fixing a bug in property name canonicalization. Namely, the 'isc' alias of the 'ISO_Comment' property was being canonicalized to 'c', which is actually an alias of the 'Other' general category. This is a result of the canonicalization procedure ignoring 'is' prefixes, as permitted by UTS#18. Fixes #466
1 parent f7ea409 commit d5e5da6

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

regex-syntax/src/hir/translate.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,6 +1897,12 @@ mod tests {
18971897
assert_eq!(
18981898
t(r"\p{gc=Separator}"),
18991899
hir_uclass_query(ClassQuery::Binary("Z")));
1900+
assert_eq!(
1901+
t(r"\p{Other}"),
1902+
hir_uclass_query(ClassQuery::Binary("Other")));
1903+
assert_eq!(
1904+
t(r"\pC"),
1905+
hir_uclass_query(ClassQuery::Binary("Other")));
19001906

19011907
assert_eq!(
19021908
t(r"\PZ"),

regex-syntax/src/unicode.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ impl<'a> ClassQuery<'a> {
217217
/// Like ClassQuery, but its parameters have been canonicalized. This also
218218
/// differentiates binary properties from flattened general categories and
219219
/// scripts.
220-
#[derive(Debug)]
220+
#[derive(Debug, Eq, PartialEq)]
221221
enum CanonicalClassQuery {
222222
/// The canonical binary property name.
223223
Binary(&'static str),
@@ -459,4 +459,14 @@ mod tests {
459459

460460
assert!(!contains_simple_case_mapping('☃', '☃'));
461461
}
462+
463+
#[test]
464+
fn regression_466() {
465+
use super::{CanonicalClassQuery, ClassQuery};
466+
467+
let q = ClassQuery::OneLetter('C');
468+
assert_eq!(
469+
q.canonicalize().unwrap(),
470+
CanonicalClassQuery::GeneralCategory("Other"));
471+
}
462472
}

regex-syntax/src/unicode_tables/property_names.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
1515
("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"), ("blk", "Block"),
1616
("block", "Block"), ("bmg", "Bidi_Mirroring_Glyph"),
1717
("bpb", "Bidi_Paired_Bracket"), ("bpt", "Bidi_Paired_Bracket_Type"),
18-
("c", "ISO_Comment"),
1918
("canonicalcombiningclass", "Canonical_Combining_Class"),
2019
("cased", "Cased"), ("casefolding", "Case_Folding"),
2120
("caseignorable", "Case_Ignorable"), ("ccc", "Canonical_Combining_Class"),
@@ -71,11 +70,11 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
7170
("indicpositionalcategory", "Indic_Positional_Category"),
7271
("indicsyllabiccategory", "Indic_Syllabic_Category"),
7372
("inpc", "Indic_Positional_Category"), ("insc", "Indic_Syllabic_Category"),
74-
("jamoshortname", "Jamo_Short_Name"), ("jg", "Joining_Group"),
75-
("joinc", "Join_Control"), ("joincontrol", "Join_Control"),
76-
("joininggroup", "Joining_Group"), ("joiningtype", "Joining_Type"),
77-
("jsn", "Jamo_Short_Name"), ("jt", "Joining_Type"),
78-
("kaccountingnumeric", "kAccountingNumeric"),
73+
("isc", "ISO_Comment"), ("jamoshortname", "Jamo_Short_Name"),
74+
("jg", "Joining_Group"), ("joinc", "Join_Control"),
75+
("joincontrol", "Join_Control"), ("joininggroup", "Joining_Group"),
76+
("joiningtype", "Joining_Type"), ("jsn", "Jamo_Short_Name"),
77+
("jt", "Joining_Type"), ("kaccountingnumeric", "kAccountingNumeric"),
7978
("kcompatibilityvariant", "kCompatibilityVariant"), ("kiicore", "kIICore"),
8079
("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"),
8180
("kirgjsource", "kIRG_JSource"), ("kirgkpsource", "kIRG_KPSource"),

0 commit comments

Comments
 (0)