Skip to content

Commit 571c34c

Browse files
committed
Parse Java character properties
These correspond to various `is`-prefixed accessors on `java.lang.Character`. For now, parse them, but mark them unsupported.
1 parent 0c5d625 commit 571c34c

File tree

5 files changed

+40
-3
lines changed

5 files changed

+40
-3
lines changed

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,21 +450,46 @@ extension AST.Atom.CharacterProperty {
450450
/// Some special properties implemented by PCRE and Oniguruma.
451451
case pcreSpecial(PCRESpecialCategory)
452452

453+
/// Some special properties implemented by Java.
454+
case javaSpecial(JavaSpecial)
455+
453456
public enum MapKind: Hashable {
454457
case lowercase
455458
case uppercase
456459
case titlecase
457460
}
458461
}
459462

460-
// TODO: erm, separate out or fold into something? splat it in?
461463
public enum PCRESpecialCategory: String, Hashable {
462464
case alphanumeric = "Xan"
463465
case posixSpace = "Xps"
464466
case perlSpace = "Xsp"
465467
case universallyNamed = "Xuc"
466468
case perlWord = "Xwd"
467469
}
470+
471+
/// Special Java properties that correspond to methods on
472+
/// `java.lang.Character`, with the `java` prefix replaced by `is`.
473+
public enum JavaSpecial: String, Hashable, CaseIterable {
474+
case alphabetic = "javaAlphabetic"
475+
case defined = "javaDefined"
476+
case digit = "javaDigit"
477+
case identifierIgnorable = "javaIdentifierIgnorable"
478+
case ideographic = "javaIdeographic"
479+
case isoControl = "javaISOControl"
480+
case javaIdentifierPart = "javaJavaIdentifierPart" // not a typo, that's actually the name
481+
case javaIdentifierStart = "javaJavaIdentifierStart" // not a typo, that's actually the name
482+
case javaLetter = "javaLetter"
483+
case javaLetterOrDigit = "javaLetterOrDigit"
484+
case lowerCase = "javaLowerCase"
485+
case mirrored = "javaMirrored"
486+
case spaceChar = "javaSpaceChar"
487+
case titleCase = "javaTitleCase"
488+
case unicodeIdentifierPart = "javaUnicodeIdentifierPart"
489+
case unicodeIdentifierStart = "javaUnicodeIdentifierStart"
490+
case upperCase = "javaUpperCase"
491+
case whitespace = "javaWhitespace"
492+
}
468493
}
469494

470495
extension AST.Atom {

Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,12 +764,15 @@ extension Source {
764764
return .block(block)
765765
}
766766

767-
// PCRE special properties.
768-
// TODO: Normalize?
767+
// Special properties from other engines.
769768
typealias PCRESpecial = AST.Atom.CharacterProperty.PCRESpecialCategory
770769
if let pcreSpecial = PCRESpecial(rawValue: value) {
771770
return .pcreSpecial(pcreSpecial)
772771
}
772+
typealias JavaSpecial = AST.Atom.CharacterProperty.JavaSpecial
773+
if let javaSpecial = JavaSpecial(rawValue: value) {
774+
return .javaSpecial(javaSpecial)
775+
}
773776

774777
// TODO: This should be versioned, and do we want a more lax behavior for
775778
// the runtime?

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ extension RegexValidator {
175175
throw error(.unsupported("PCRE property"), at: loc)
176176
case .block:
177177
throw error(.unsupported("Unicode block property"), at: loc)
178+
case .javaSpecial:
179+
throw error(.unsupported("Java property"), at: loc)
178180
}
179181
}
180182

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,9 @@ extension AST.Atom.CharacterProperty {
520520

521521
case .pcreSpecial(let s):
522522
throw Unsupported("TODO: map PCRE special: \(s)")
523+
524+
case .javaSpecial(let s):
525+
throw Unsupported("TODO: map Java special: \(s)")
523526
}
524527
}()
525528

Tests/RegexTests/ParseTests.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,10 @@ extension RegexTests {
13851385
parseTest(#"\p{is\#(b.rawValue)}"#, prop(.binary(b, value: true)), throwsError: .unchecked)
13861386
}
13871387

1388+
for j in AST.Atom.CharacterProperty.JavaSpecial.allCases {
1389+
parseTest(#"\p{\#(j.rawValue)}"#, prop(.javaSpecial(j)), throwsError: .unsupported)
1390+
}
1391+
13881392
// Try prefixing each block property with "in" to make sure we don't stomp
13891393
// on any other property shorthands.
13901394
for b in Unicode.Block.allCases {

0 commit comments

Comments
 (0)