Skip to content

Commit 6a1f6e9

Browse files
committed
Fix compile tests
1 parent 76667a2 commit 6a1f6e9

File tree

1 file changed

+216
-96
lines changed

1 file changed

+216
-96
lines changed

Tests/RegexTests/CompileTests.swift

Lines changed: 216 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,125 @@
1414

1515
import XCTest
1616

17+
enum DecodedInstr {
18+
case invalid
19+
case moveImmediate
20+
case branch
21+
case condBranchZeroElseDecrement
22+
case save
23+
case saveAddress
24+
case splitSaving
25+
case clear
26+
case clearThrough
27+
case accept
28+
case fail
29+
case advance
30+
case match
31+
case matchCaseInsensitive
32+
case matchScalar
33+
case matchScalarCaseInsensitiveUnchecked
34+
case matchScalarCaseInsensitive
35+
case matchScalarUnchecked
36+
case matchBitsetScalar
37+
case matchBitset
38+
case consumeBy
39+
case assertBy
40+
case matchBy
41+
case backreference
42+
case beginCapture
43+
case endCapture
44+
case transformCapture
45+
case captureValue
46+
case builtinAssertion
47+
case builtinCharacterClass
48+
}
49+
50+
extension DecodedInstr {
51+
/// Decode the given instruction by looking at the opcode and payload, expanding out certain instructions
52+
/// like matchScalar and match into their variants
53+
///
54+
/// Must stay in sync with Processor.cycle
55+
static func decode(_ instruction: Instruction) -> DecodedInstr {
56+
let (opcode, payload) = instruction.destructure
57+
58+
switch opcode {
59+
case .invalid:
60+
fatalError("Invalid program")
61+
case .moveImmediate:
62+
return .moveImmediate
63+
case .branch:
64+
return .branch
65+
case .condBranchZeroElseDecrement:
66+
return .condBranchZeroElseDecrement
67+
case .save:
68+
return .save
69+
case .saveAddress:
70+
return .saveAddress
71+
case .splitSaving:
72+
return .splitSaving
73+
case .clear:
74+
return .clear
75+
case .clearThrough:
76+
return .clearThrough
77+
case .accept:
78+
return .accept
79+
case .fail:
80+
return .fail
81+
case .advance:
82+
return .advance
83+
case .match:
84+
let (isCaseInsensitive, _) = payload.elementPayload
85+
if isCaseInsensitive {
86+
return .matchCaseInsensitive
87+
} else {
88+
return .match
89+
}
90+
case .matchScalar:
91+
let (_, caseInsensitive, boundaryCheck) = payload.scalarPayload
92+
if caseInsensitive {
93+
if boundaryCheck {
94+
return .matchScalarCaseInsensitive
95+
} else {
96+
return .matchScalarCaseInsensitiveUnchecked
97+
}
98+
} else {
99+
if boundaryCheck {
100+
return .matchScalar
101+
} else {
102+
return .matchScalarUnchecked
103+
}
104+
}
105+
case .matchBitset:
106+
let (isScalar, _) = payload.bitsetPayload
107+
if isScalar {
108+
return .matchBitsetScalar
109+
} else {
110+
return .matchBitset
111+
}
112+
case .consumeBy:
113+
return consumeBy
114+
case .assertBy:
115+
return .assertBy
116+
case .matchBy:
117+
return .matchBy
118+
case .backreference:
119+
return .backreference
120+
case .beginCapture:
121+
return .beginCapture
122+
case .endCapture:
123+
return .endCapture
124+
case .transformCapture:
125+
return .transformCapture
126+
case .captureValue:
127+
return .captureValue
128+
case .builtinAssertion:
129+
return .builtinAssertion
130+
case .builtinCharacterClass:
131+
return .builtinCharacterClass
132+
}
133+
}
134+
}
135+
17136
extension RegexTests {
18137

19138
private func testCompilationEquivalence(
@@ -147,20 +266,21 @@ extension RegexTests {
147266
for regex: String,
148267
syntax: SyntaxOptions = .traditional,
149268
semanticLevel: RegexSemanticLevel? = nil,
150-
contains targets: Set<Instruction.OpCode> = [],
151-
doesNotContain invalid: Set<Instruction.OpCode> = [],
269+
contains targets: Set<DecodedInstr> = [],
270+
doesNotContain invalid: Set<DecodedInstr> = [],
152271
file: StaticString = #file,
153272
line: UInt = #line
154273
) {
155274
do {
156275
let prog = try _compileRegex(regex, syntax, semanticLevel)
157-
var found: Set<Instruction.OpCode> = []
276+
var found: Set<DecodedInstr> = []
158277
for inst in prog.engine.instructions {
159-
found.insert(inst.opcode)
278+
let decoded = DecodedInstr.decode(inst)
279+
found.insert(decoded)
160280

161-
if invalid.contains(inst.opcode) {
281+
if invalid.contains(decoded) {
162282
XCTFail(
163-
"Compiled regex '\(regex)' contains incorrect opcode \(inst.opcode)",
283+
"Compiled regex '\(regex)' contains incorrect opcode \(decoded)",
164284
file: file,
165285
line: line)
166286
return
@@ -181,94 +301,94 @@ extension RegexTests {
181301
}
182302
}
183303

184-
// func testBitsetCompile() {
185-
// expectProgram(
186-
// for: "[abc]",
187-
// contains: [.matchBitset],
188-
// doesNotContain: [.consumeBy, .matchBitsetScalar])
189-
// expectProgram(
190-
// for: "[abc]",
191-
// semanticLevel: .unicodeScalar,
192-
// contains: [.matchBitsetScalar],
193-
// doesNotContain: [.matchBitset, .consumeBy])
194-
// }
195-
//
196-
// func testScalarOptimizeCompilation() {
197-
// // all ascii quoted literal -> elide boundary checks
198-
// expectProgram(
199-
// for: "abcd",
200-
// contains: [.matchScalar, .matchScalarUnchecked],
201-
// doesNotContain: [.match, .matchSequence, .consumeBy])
202-
// // ascii character -> matchScalar with boundary check
203-
// expectProgram(
204-
// for: "a",
205-
// contains: [.matchScalar],
206-
// doesNotContain: [.match, .matchSequence, .consumeBy, .matchScalarUnchecked])
207-
// // quoted literal is not all ascii -> match scalar when possible, always do boundary checks
208-
// expectProgram(
209-
// for: "aaa\u{301}",
210-
// contains: [.match, .matchScalar],
211-
// doesNotContain: [.consumeBy, .matchScalarUnchecked])
212-
// // scalar mode -> always emit match scalar without boundary checks
213-
// expectProgram(
214-
// for: "abcd",
215-
// semanticLevel: .unicodeScalar,
216-
// contains: [.matchScalarUnchecked],
217-
// doesNotContain: [.match, .matchSequence, .consumeBy, .matchScalar])
218-
// expectProgram(
219-
// for: "a",
220-
// semanticLevel: .unicodeScalar,
221-
// contains: [.matchScalarUnchecked],
222-
// doesNotContain: [.match, .matchSequence, .consumeBy, .matchScalar])
223-
// expectProgram(
224-
// for: "aaa\u{301}",
225-
// semanticLevel: .unicodeScalar,
226-
// contains: [.matchScalarUnchecked],
227-
// doesNotContain: [.match, .matchSequence, .consumeBy, .matchScalar])
228-
// }
229-
//
230-
// func testCaseInsensitivityCompilation() {
231-
// // quoted literal is all ascii -> match scalar case insensitive and skip
232-
// // boundary checks
233-
// expectProgram(
234-
// for: "(?i)abcd",
235-
// contains: [.matchScalarCaseInsensitiveUnchecked, .matchScalarCaseInsensitive],
236-
// doesNotContain: [.match, .matchCaseInsensitive, .matchScalar, .matchScalarUnchecked])
237-
// // quoted literal is all non-cased ascii -> emit match scalar instructions
238-
// expectProgram(
239-
// for: "(?i)&&&&",
240-
// contains: [.matchScalar, .matchScalarUnchecked],
241-
// doesNotContain: [.match, .matchCaseInsensitive,
242-
// .matchScalarCaseInsensitive, .matchScalarCaseInsensitiveUnchecked])
243-
// // quoted literal is not all ascii -> match scalar case insensitive when
244-
// // possible, match character case insensitive when needed, always perform
245-
// // boundary check
246-
// expectProgram(
247-
// for: "(?i)abcd\u{301}",
248-
// contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive],
249-
// doesNotContain: [.matchScalarCaseInsensitiveUnchecked, .match, .matchScalar])
250-
// // same as before but contains ascii non cased characters -> emit matchScalar for them
251-
// expectProgram(
252-
// for: "(?i)abcd\u{301};.'!",
253-
// contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive, .matchScalar],
254-
// doesNotContain: [.matchScalarCaseInsensitiveUnchecked, .match])
255-
// // contains non-ascii non-cased characters -> emit match
256-
// expectProgram(
257-
// for: "(?i)abcd\u{301};.'!💖",
258-
// contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive, .matchScalar, .match],
259-
// doesNotContain: [.matchScalarCaseInsensitiveUnchecked])
260-
//
261-
// // scalar mode -> emit unchecked scalar match only, emit case insensitive
262-
// // only if the scalar is cased
263-
// expectProgram(
264-
// for: "(?i);.'!💖",
265-
// semanticLevel: .unicodeScalar,
266-
// contains: [.matchScalarUnchecked],
267-
// doesNotContain: [.matchScalarCaseInsensitiveUnchecked])
268-
// expectProgram(
269-
// for: "(?i)abcdé",
270-
// semanticLevel: .unicodeScalar,
271-
// contains: [.matchScalarCaseInsensitiveUnchecked],
272-
// doesNotContain: [.matchScalarUnchecked])
273-
// }
304+
func testBitsetCompile() {
305+
expectProgram(
306+
for: "[abc]",
307+
contains: [.matchBitset],
308+
doesNotContain: [.consumeBy, .matchBitsetScalar])
309+
expectProgram(
310+
for: "[abc]",
311+
semanticLevel: .unicodeScalar,
312+
contains: [.matchBitsetScalar],
313+
doesNotContain: [.matchBitset, .consumeBy])
314+
}
315+
316+
func testScalarOptimizeCompilation() {
317+
// all ascii quoted literal -> elide boundary checks
318+
expectProgram(
319+
for: "abcd",
320+
contains: [.matchScalar, .matchScalarUnchecked],
321+
doesNotContain: [.match, .consumeBy])
322+
// ascii character -> matchScalar with boundary check
323+
expectProgram(
324+
for: "a",
325+
contains: [.matchScalar],
326+
doesNotContain: [.match, .consumeBy, .matchScalarUnchecked])
327+
// quoted literal is not all ascii -> match scalar when possible, always do boundary checks
328+
expectProgram(
329+
for: "aaa\u{301}",
330+
contains: [.match, .matchScalar],
331+
doesNotContain: [.consumeBy, .matchScalarUnchecked])
332+
// scalar mode -> always emit match scalar without boundary checks
333+
expectProgram(
334+
for: "abcd",
335+
semanticLevel: .unicodeScalar,
336+
contains: [.matchScalarUnchecked],
337+
doesNotContain: [.match, .consumeBy, .matchScalar])
338+
expectProgram(
339+
for: "a",
340+
semanticLevel: .unicodeScalar,
341+
contains: [.matchScalarUnchecked],
342+
doesNotContain: [.match, .consumeBy, .matchScalar])
343+
expectProgram(
344+
for: "aaa\u{301}",
345+
semanticLevel: .unicodeScalar,
346+
contains: [.matchScalarUnchecked],
347+
doesNotContain: [.match, .consumeBy, .matchScalar])
348+
}
349+
350+
func testCaseInsensitivityCompilation() {
351+
// quoted literal is all ascii -> match scalar case insensitive and skip
352+
// boundary checks
353+
expectProgram(
354+
for: "(?i)abcd",
355+
contains: [.matchScalarCaseInsensitiveUnchecked, .matchScalarCaseInsensitive],
356+
doesNotContain: [.match, .matchCaseInsensitive, .matchScalar, .matchScalarUnchecked])
357+
// quoted literal is all non-cased ascii -> emit match scalar instructions
358+
expectProgram(
359+
for: "(?i)&&&&",
360+
contains: [.matchScalar, .matchScalarUnchecked],
361+
doesNotContain: [.match, .matchCaseInsensitive,
362+
.matchScalarCaseInsensitive, .matchScalarCaseInsensitiveUnchecked])
363+
// quoted literal is not all ascii -> match scalar case insensitive when
364+
// possible, match character case insensitive when needed, always perform
365+
// boundary check
366+
expectProgram(
367+
for: "(?i)abcd\u{301}",
368+
contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive],
369+
doesNotContain: [.matchScalarCaseInsensitiveUnchecked, .match, .matchScalar])
370+
// same as before but contains ascii non cased characters -> emit matchScalar for them
371+
expectProgram(
372+
for: "(?i)abcd\u{301};.'!",
373+
contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive, .matchScalar],
374+
doesNotContain: [.matchScalarCaseInsensitiveUnchecked, .match])
375+
// contains non-ascii non-cased characters -> emit match
376+
expectProgram(
377+
for: "(?i)abcd\u{301};.'!💖",
378+
contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive, .matchScalar, .match],
379+
doesNotContain: [.matchScalarCaseInsensitiveUnchecked])
380+
381+
// scalar mode -> emit unchecked scalar match only, emit case insensitive
382+
// only if the scalar is cased
383+
expectProgram(
384+
for: "(?i);.'!💖",
385+
semanticLevel: .unicodeScalar,
386+
contains: [.matchScalarUnchecked],
387+
doesNotContain: [.matchScalarCaseInsensitiveUnchecked])
388+
expectProgram(
389+
for: "(?i)abcdé",
390+
semanticLevel: .unicodeScalar,
391+
contains: [.matchScalarCaseInsensitiveUnchecked],
392+
doesNotContain: [.matchScalarUnchecked])
393+
}
274394
}

0 commit comments

Comments
 (0)