Skip to content

Commit 4d04019

Browse files
authored
Merge pull request #440 from hamishknight/chunk-loader
Additional character property parsing
2 parents a936e9e + 05f73db commit 4d04019

File tree

7 files changed

+1106
-347
lines changed

7 files changed

+1106
-347
lines changed

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -441,28 +441,55 @@ extension AST.Atom.CharacterProperty {
441441

442442
/// Character age, as per UnicodeScalar.Properties.age.
443443
case age(major: Int, minor: Int)
444-
444+
445+
/// A block property.
446+
case block(Unicode.Block)
447+
445448
case posix(Unicode.POSIXProperty)
446449

447450
/// Some special properties implemented by PCRE and Oniguruma.
448451
case pcreSpecial(PCRESpecialCategory)
449-
case onigurumaSpecial(OnigurumaSpecialProperty)
450-
452+
453+
/// Some special properties implemented by Java.
454+
case javaSpecial(JavaSpecial)
455+
451456
public enum MapKind: Hashable {
452457
case lowercase
453458
case uppercase
454459
case titlecase
455460
}
456461
}
457462

458-
// TODO: erm, separate out or fold into something? splat it in?
459463
public enum PCRESpecialCategory: String, Hashable {
460464
case alphanumeric = "Xan"
461465
case posixSpace = "Xps"
462466
case perlSpace = "Xsp"
463467
case universallyNamed = "Xuc"
464468
case perlWord = "Xwd"
465469
}
470+
471+
/// Special Java properties that correspond to methods on
472+
/// `java.lang.Character`, with the `java` prefix replaced by `is`.
473+
public enum JavaSpecial: String, Hashable, CaseIterable {
474+
case alphabetic = "javaAlphabetic"
475+
case defined = "javaDefined"
476+
case digit = "javaDigit"
477+
case identifierIgnorable = "javaIdentifierIgnorable"
478+
case ideographic = "javaIdeographic"
479+
case isoControl = "javaISOControl"
480+
case javaIdentifierPart = "javaJavaIdentifierPart" // not a typo, that's actually the name
481+
case javaIdentifierStart = "javaJavaIdentifierStart" // not a typo, that's actually the name
482+
case javaLetter = "javaLetter"
483+
case javaLetterOrDigit = "javaLetterOrDigit"
484+
case lowerCase = "javaLowerCase"
485+
case mirrored = "javaMirrored"
486+
case spaceChar = "javaSpaceChar"
487+
case titleCase = "javaTitleCase"
488+
case unicodeIdentifierPart = "javaUnicodeIdentifierPart"
489+
case unicodeIdentifierStart = "javaUnicodeIdentifierStart"
490+
case upperCase = "javaUpperCase"
491+
case whitespace = "javaWhitespace"
492+
}
466493
}
467494

468495
extension AST.Atom {

Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

Lines changed: 355 additions & 6 deletions
Large diffs are not rendered by default.

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ enum ParseError: Error, Hashable {
6262
case unknownProperty(key: String?, value: String)
6363
case unrecognizedScript(String)
6464
case unrecognizedCategory(String)
65+
case unrecognizedBlock(String)
6566
case invalidAge(String)
6667
case invalidNumericValue(String)
6768
case unrecognizedNumericType(String)
@@ -195,6 +196,8 @@ extension ParseError: CustomStringConvertible {
195196
return "unrecognized script '\(value)'"
196197
case .unrecognizedCategory(let value):
197198
return "unrecognized category '\(value)'"
199+
case .unrecognizedBlock(let value):
200+
return "unrecognized block '\(value)'"
198201
case .unrecognizedNumericType(let value):
199202
return "unrecognized numeric type '\(value)'"
200203
case .invalidAge(let value):

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,10 @@ extension RegexValidator {
173173
break
174174
case .pcreSpecial:
175175
throw error(.unsupported("PCRE property"), at: loc)
176-
case .onigurumaSpecial:
176+
case .block:
177177
throw error(.unsupported("Unicode block property"), at: loc)
178+
case .javaSpecial:
179+
throw error(.unsupported("Java property"), at: loc)
178180
}
179181
}
180182

0 commit comments

Comments
 (0)