Skip to content

Commit e01e43d

Browse files
authored
Remove the unsupported anyScalar case (#650)
We decided not to support the `anyScalar` character class, which would match a single Unicode scalar regardless of matching mode. However, its representation was still included in the various character class types in the regex engine, leading to unreachable code and unclear requirements when changing or adding new code. This change removes that representation where possible. The `DSLTree.Atom.CharacterClass` enum is left unchanged, since it is marked `@_spi(RegexBuilder) public`. Any use of that enum case is handled with a `fatalError("Unsupported")`, and it isn't produced on any code path.
1 parent a7ba701 commit e01e43d

File tree

7 files changed

+6
-30
lines changed

7 files changed

+6
-30
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -702,9 +702,6 @@ fileprivate extension Compiler.ByteCodeGen {
702702
case .characterClass(let cc):
703703
// Custom character class that consumes a single grapheme
704704
let model = cc.asRuntimeModel(options)
705-
guard model.consumesSingleGrapheme else {
706-
return false
707-
}
708705
builder.buildQuantify(
709706
model: model,
710707
kind,

Sources/_StringProcessing/Engine/MEBuiltins.swift

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,6 @@ extension String {
193193
switch (isScalarSemantics, cc) {
194194
case (_, .anyGrapheme):
195195
next = index(after: currentPosition)
196-
case (_, .anyScalar):
197-
next = unicodeScalars.index(after: currentPosition)
198196
case (true, _):
199197
next = unicodeScalars.index(after: currentPosition)
200198
case (false, _):
@@ -204,12 +202,6 @@ extension String {
204202
switch cc {
205203
case .any, .anyGrapheme:
206204
matched = true
207-
case .anyScalar:
208-
if isScalarSemantics {
209-
matched = true
210-
} else {
211-
matched = isOnGraphemeClusterBoundary(next)
212-
}
213205
case .digit:
214206
if isScalarSemantics {
215207
matched = scalar.properties.numericType != nil && asciiCheck

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -760,8 +760,6 @@ extension DSLTree.Atom.CharacterClass {
760760
switch self {
761761
case .anyGrapheme:
762762
return ".anyGraphemeCluster"
763-
case .anyUnicodeScalar:
764-
return ".anyUnicodeScalar"
765763
case .digit:
766764
return ".digit"
767765
case .notDigit:
@@ -786,6 +784,8 @@ extension DSLTree.Atom.CharacterClass {
786784
return ".whitespace"
787785
case .notWhitespace:
788786
return ".whitespace.inverted"
787+
case .anyUnicodeScalar:
788+
fatalError("Unsupported")
789789
}
790790
}
791791
}

Sources/_StringProcessing/Regex/ASTConversion.swift

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,6 @@ extension AST.Atom.EscapedBuiltin {
183183
case .wordCharacter: return .word
184184
case .notWordCharacter: return .notWord
185185
case .graphemeCluster: return .anyGrapheme
186-
case .trueAnychar: return .anyUnicodeScalar
187186
default: return nil
188187
}
189188
}

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,6 @@ extension DSLTree.Atom.CharacterClass {
260260
public var inverted: DSLTree.Atom.CharacterClass? {
261261
switch self {
262262
case .anyGrapheme: return nil
263-
case .anyUnicodeScalar: return nil
264263
case .digit: return .notDigit
265264
case .notDigit: return .digit
266265
case .word: return .notWord
@@ -273,6 +272,8 @@ extension DSLTree.Atom.CharacterClass {
273272
case .notVerticalWhitespace: return .verticalWhitespace
274273
case .whitespace: return .notWhitespace
275274
case .notWhitespace: return .whitespace
275+
case .anyUnicodeScalar:
276+
fatalError("Unsupported")
276277
}
277278
}
278279
}

Sources/_StringProcessing/Unicode/ASCII.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,7 @@ extension String {
134134

135135
// TODO: bitvectors
136136
switch cc {
137-
case .any, .anyGrapheme, .anyScalar:
138-
// TODO: should any scalar not consume CR-LF in scalar semantic mode?
137+
case .any, .anyGrapheme:
139138
return (next, true)
140139

141140
case .digit:

Sources/_StringProcessing/_CharacterClassModel.swift

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ struct _CharacterClassModel: Hashable {
4545
case any = 0
4646
/// Any grapheme cluster
4747
case anyGrapheme
48-
/// Any Unicode scalar
49-
case anyScalar
5048
/// Character.isDigit
5149
case digit
5250
/// Horizontal whitespace: `[:blank:]`, i.e
@@ -90,15 +88,6 @@ struct _CharacterClassModel: Hashable {
9088
}
9189
}
9290

93-
extension _CharacterClassModel {
94-
var consumesSingleGrapheme: Bool {
95-
switch self.cc {
96-
case .anyScalar: return false
97-
default: return true
98-
}
99-
}
100-
}
101-
10291
extension _CharacterClassModel.Representation {
10392
/// Returns true if this CharacterClass should be matched by strict ascii under the given options
10493
func isStrictAscii(options: MatchingOptions) -> Bool {
@@ -119,7 +108,6 @@ extension _CharacterClassModel.Representation: CustomStringConvertible {
119108
switch self {
120109
case .any: return "<any>"
121110
case .anyGrapheme: return "<any grapheme>"
122-
case .anyScalar: return "<any scalar>"
123111
case .digit: return "<digit>"
124112
case .horizontalWhitespace: return "<horizontal whitespace>"
125113
case .newlineSequence: return "<newline sequence>"
@@ -185,7 +173,7 @@ extension DSLTree.Atom.CharacterClass {
185173
case .anyGrapheme:
186174
cc = .anyGrapheme
187175
case .anyUnicodeScalar:
188-
cc = .anyScalar
176+
fatalError("Unsupported")
189177
}
190178
return _CharacterClassModel(cc: cc, options: options, isInverted: inverted)
191179
}

0 commit comments

Comments
 (0)