Skip to content

Commit ade8f01

Browse files
authored
Merge pull request #373 from hamishknight/case-in-prop
2 parents e748aea + 925f51b commit ade8f01

File tree

5 files changed

+20
-5
lines changed

5 files changed

+20
-5
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
.DS_Store
22

3+
# The current toolchain is dumping files in the package root, rude
4+
*.emit-module.*
5+
36
# Xcode
47
#
58
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore

Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ extension Source {
3232
static private func classifyGeneralCategory(
3333
_ str: String
3434
) -> Unicode.ExtendedGeneralCategory? {
35-
// This uses the aliases defined in
36-
// https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
35+
// This uses the aliases defined in https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
36+
// Additionally, uses the `L& = Lc` alias defined by PCRE.
3737
withNormalizedForms(str) { str in
3838
switch str {
3939
case "c", "other": return .other
@@ -43,7 +43,7 @@ extension Source {
4343
case "co", "privateuse": return .privateUse
4444
case "cs", "surrogate": return .surrogate
4545
case "l", "letter": return .letter
46-
case "lc", "casedletter": return .casedLetter
46+
case "lc", "l&", "casedletter": return .casedLetter
4747
case "ll", "lowercaseletter": return .lowercaseLetter
4848
case "lm", "modifierletter": return .modifierLetter
4949
case "lo", "otherletter": return .otherLetter

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,9 @@ extension Unicode.ExtendedGeneralCategory {
691691
])
692692

693693
case .casedLetter:
694-
throw Unsupported(
695-
"TODO: cased letter? not the property?")
694+
return consumeScalarGCs([
695+
.uppercaseLetter, .lowercaseLetter, .titlecaseLetter
696+
])
696697

697698
case .control:
698699
return consumeScalarGC(.control)

Tests/RegexTests/MatchTests.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,14 @@ extension RegexTests {
693693
firstMatchTest(#"\p{gc=L}"#, input: "123abcXYZ", match: "a")
694694
firstMatchTest(#"\p{Lu}"#, input: "123abcXYZ", match: "X")
695695

696+
// U+0374 GREEK NUMERAL SIGN (Lm)
697+
// U+00AA FEMININE ORDINAL INDICATOR (Lo)
698+
firstMatchTest(#"\p{L}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "\u{0374}")
699+
firstMatchTest(#"\p{Lc}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "a")
700+
firstMatchTest(#"\p{Lc}"#, input: "\u{0374}\u{00AA}123XYZ", match: "X")
701+
firstMatchTest(#"\p{L&}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "a")
702+
firstMatchTest(#"\p{L&}"#, input: "\u{0374}\u{00AA}123XYZ", match: "X")
703+
696704
firstMatchTest(
697705
#"\P{Cc}"#, input: "\n\n\nXYZ", match: "X")
698706
firstMatchTest(

Tests/RegexTests/ParseTests.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,9 @@ extension RegexTests {
11561156
#"\p{C}+"#,
11571157
oneOrMore(of: prop(.generalCategory(.other))))
11581158

1159+
// L& defined by PCRE.
1160+
parseTest(#"\p{L&}"#, prop(.generalCategory(.casedLetter)))
1161+
11591162
// UAX44-LM3 means all of the below are equivalent.
11601163
let lowercaseLetter = prop(.generalCategory(.lowercaseLetter))
11611164
parseTest(#"\p{ll}"#, lowercaseLetter)

0 commit comments

Comments
 (0)