Skip to content

Commit f64d020

Browse files
committed
Throw RegexCompilationError for invalid character class bounds
Make sure we throw the right error for ranges that are invalid in grapheme mode, but are valid in scalar mode.
1 parent addf750 commit f64d020

File tree

3 files changed

+77
-5
lines changed

3 files changed

+77
-5
lines changed

Sources/_StringProcessing/Compiler.swift

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,43 @@ class Compiler {
4242
}
4343
}
4444

45+
/// Hashable wrapper for `Any.Type`.
46+
struct AnyHashableType: CustomStringConvertible, Hashable {
47+
var ty: Any.Type
48+
init(_ ty: Any.Type) {
49+
self.ty = ty
50+
}
51+
var description: String { "\(ty)" }
52+
53+
static func == (lhs: Self, rhs: Self) -> Bool {
54+
lhs.ty == rhs.ty
55+
}
56+
func hash(into hasher: inout Hasher) {
57+
hasher.combine(ObjectIdentifier(ty))
58+
}
59+
}
60+
4561
// An error produced when compiling a regular expression.
46-
enum RegexCompilationError: Error, CustomStringConvertible {
62+
enum RegexCompilationError: Error, Hashable, CustomStringConvertible {
4763
// TODO: Source location?
4864
case uncapturedReference
65+
case incorrectOutputType(incorrect: AnyHashableType, correct: AnyHashableType)
66+
case invalidCharacterClassRangeOperand(Character)
67+
68+
static func incorrectOutputType(
69+
incorrect: Any.Type, correct: Any.Type
70+
) -> Self {
71+
.incorrectOutputType(incorrect: .init(incorrect), correct: .init(correct))
72+
}
4973

50-
case incorrectOutputType(incorrect: Any.Type, correct: Any.Type)
51-
5274
var description: String {
5375
switch self {
5476
case .uncapturedReference:
5577
return "Found a reference used before it captured any match."
5678
case .incorrectOutputType(let incorrect, let correct):
5779
return "Cast to incorrect type 'Regex<\(incorrect)>', expected 'Regex<\(correct)>'"
80+
case .invalidCharacterClassRangeOperand(let c):
81+
return "'\(c)' is an invalid bound for character class range"
5882
}
5983
}
6084
}

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,20 @@ extension DSLTree.CustomCharacterClass.Member {
362362
}
363363
return c
364364
case let .range(low, high):
365-
guard let lhs = low.literalCharacterValue?.singleScalar, lhs.isNFC else {
365+
guard let lhsChar = low.literalCharacterValue else {
366366
throw Unsupported("\(low) in range")
367367
}
368-
guard let rhs = high.literalCharacterValue?.singleScalar, rhs.isNFC else {
368+
guard let rhsChar = high.literalCharacterValue else {
369369
throw Unsupported("\(high) in range")
370370
}
371+
372+
// We must have NFC single scalar bounds.
373+
guard let lhs = lhsChar.singleScalar, lhs.isNFC else {
374+
throw RegexCompilationError.invalidCharacterClassRangeOperand(lhsChar)
375+
}
376+
guard let rhs = rhsChar.singleScalar, rhs.isNFC else {
377+
throw RegexCompilationError.invalidCharacterClassRangeOperand(rhsChar)
378+
}
371379
guard lhs <= rhs else {
372380
throw Unsupported("Invalid range \(low)-\(high)")
373381
}

Tests/RegexTests/CompileTests.swift

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
@testable import _RegexParser
1313
@testable import _StringProcessing
14+
import TestSupport
1415

1516
import XCTest
1617

@@ -168,6 +169,45 @@ extension RegexTests {
168169
}
169170
}
170171

172+
private func testCompileError(
173+
_ regex: String, _ error: RegexCompilationError,
174+
file: StaticString = #file, line: UInt = #line
175+
) {
176+
do {
177+
_ = try _compileRegex(regex)
178+
XCTFail("Expected compile error", file: file, line: line)
179+
} catch let err as RegexCompilationError {
180+
XCTAssertEqual(err, error, file: file, line: line)
181+
} catch {
182+
XCTFail("Unknown compile error", file: file, line: line)
183+
}
184+
}
185+
186+
func testInvalidScalarCoalescing() throws {
187+
guard ensureNewStdlib() else { return }
188+
189+
// Non-single-scalar bounds.
190+
testCompileError(
191+
#"[a\u{302}-✅]"#, .invalidCharacterClassRangeOperand("a\u{302}"))
192+
testCompileError(
193+
#"[e\u{301}-\u{302}]"#, .invalidCharacterClassRangeOperand("e\u{301}"))
194+
testCompileError(
195+
#"[\u{73}\u{323}\u{307}-\u{1E00}]"#,
196+
.invalidCharacterClassRangeOperand("\u{73}\u{323}\u{307}"))
197+
testCompileError(
198+
#"[a\u{315}\u{301}-\u{302}]"#,
199+
.invalidCharacterClassRangeOperand("a\u{315}\u{301}")
200+
)
201+
testCompileError(
202+
#"[a-z1e\u{301}-\u{302}\u{E1}3-59]"#,
203+
.invalidCharacterClassRangeOperand("e\u{301}")
204+
)
205+
testCompileError(
206+
#"[[e\u{301}-\u{302}]&&e\u{303}]"#,
207+
.invalidCharacterClassRangeOperand("e\u{301}")
208+
)
209+
}
210+
171211
func testCompileQuantification() throws {
172212

173213
// NOTE: While we might change how we compile

0 commit comments

Comments
 (0)