Skip to content

Commit 1dc7755

Browse files
milsemanrctcwyvrn
authored andcommitted
[benchmark] Simplify and add more benchmarks (swiftlang#501)
* [benchmark] Add no-capture version of grapheme breaking exercise * [benchmark] Add cross-engine benchmark helpers * [benchmark] Hangul Syllable finding benchmark
1 parent 7f5bffd commit 1dc7755

9 files changed

+1603
-151
lines changed

Sources/RegexBenchmark/Benchmark.swift

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ public protocol RegexBenchmark {
99
public struct Benchmark: RegexBenchmark {
1010
public let name: String
1111
let regex: Regex<Substring>
12-
let ty: MatchType
12+
let type: MatchType
1313
let target: String
1414

1515
public enum MatchType {
@@ -19,7 +19,7 @@ public struct Benchmark: RegexBenchmark {
1919
}
2020

2121
public func run() {
22-
switch ty {
22+
switch type {
2323
case .whole: blackHole(target.wholeMatch(of: regex))
2424
case .allMatches: blackHole(target.matches(of: regex))
2525
case .first: blackHole(target.firstMatch(of: regex))
@@ -30,21 +30,21 @@ public struct Benchmark: RegexBenchmark {
3030
public struct NSBenchmark: RegexBenchmark {
3131
public let name: String
3232
let regex: NSRegularExpression
33-
let ty: NSMatchType
33+
let type: NSMatchType
3434
let target: String
3535

3636
var range: NSRange {
3737
NSRange(target.startIndex..<target.endIndex, in: target)
3838
}
3939

4040
public enum NSMatchType {
41-
case all
41+
case allMatches
4242
case first
4343
}
4444

4545
public func run() {
46-
switch ty {
47-
case .all: blackHole(regex.matches(in: target, range: range))
46+
switch type {
47+
case .allMatches: blackHole(regex.matches(in: target, range: range))
4848
case .first: blackHole(regex.firstMatch(in: target, range: range))
4949
}
5050
}
@@ -110,6 +110,80 @@ public struct BenchmarkRunner {
110110
}
111111
}
112112

113+
/// A benchmark meant to be ran across multiple engines
114+
struct CrossBenchmark {
115+
/// The base name of the benchmark
116+
var baseName: String
117+
118+
/// The string to compile in differnet engines
119+
var regex: String
120+
121+
/// The text to search
122+
var input: String
123+
124+
// TODO: var output, for validation
125+
126+
/// Whether this is whole string matching or a searching benchmark
127+
///
128+
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
129+
/// accomodate multi-line matching, etc.
130+
var isWhole: Bool = false
131+
132+
func register(_ runner: inout BenchmarkRunner) {
133+
let swiftRegex = try! Regex(regex, as: Substring.self)
134+
135+
let nsPattern = isWhole ? "^" + regex + "$" : regex
136+
let nsRegex: NSRegularExpression
137+
if isWhole {
138+
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
139+
} else {
140+
nsRegex = try! NSRegularExpression(pattern: regex)
141+
}
142+
143+
if isWhole {
144+
runner.register(
145+
Benchmark(
146+
name: baseName + "Whole",
147+
regex: swiftRegex,
148+
type: .whole,
149+
target: input))
150+
runner.register(
151+
NSBenchmark(
152+
name: baseName + "Whole_NS",
153+
regex: nsRegex,
154+
type: .first,
155+
target: input))
156+
} else {
157+
runner.register(
158+
Benchmark(
159+
name: baseName + "First",
160+
regex: swiftRegex,
161+
type: .first,
162+
target: input))
163+
runner.register(
164+
Benchmark(
165+
name: baseName + "All",
166+
regex: swiftRegex,
167+
type: .allMatches,
168+
target: input))
169+
runner.register(
170+
NSBenchmark(
171+
name: baseName + "First_NS",
172+
regex: nsRegex,
173+
type: .first,
174+
target: input))
175+
runner.register(
176+
NSBenchmark(
177+
name: baseName + "All_NS",
178+
regex: nsRegex,
179+
type: .allMatches,
180+
target: input))
181+
}
182+
}
183+
}
184+
185+
// TODO: Capture-containing benchmarks
186+
113187
// nom nom nom, consume the argument
114188
@inline(never)
115189
public func blackHole<T>(_ x: T) {

Sources/RegexBenchmark/CLI.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ struct Runner: ParsableCommand {
1414
func makeRunner() -> BenchmarkRunner {
1515
var benchmark = BenchmarkRunner("RegexBench", samples)
1616
benchmark.addReluctantQuant()
17-
benchmark.addBacktracking()
1817
benchmark.addCSS()
19-
benchmark.addFirstMatch()
18+
benchmark.addNotFound()
19+
benchmark.addGraphemeBreak()
20+
benchmark.addHangulSyllable()
2021
return benchmark
2122
}
2223
mutating func run() throws {

Sources/RegexBenchmark/Inputs/GraphemeBreakData.swift

Lines changed: 1452 additions & 0 deletions
Large diffs are not rendered by default.

Sources/RegexBenchmark/Suite/Backtracking.swift

Lines changed: 0 additions & 45 deletions
This file was deleted.

Sources/RegexBenchmark/Suite/CssRegex.swift

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,12 @@ import _StringProcessing
33

44
extension BenchmarkRunner {
55
mutating func addCSS() {
6-
let r = "--([a-zA-Z0-9_-]+)\\s*:\\s*(.*?):"
7-
8-
let cssRegex = Benchmark(
9-
name: "cssRegex",
10-
regex: try! Regex(r),
11-
ty: .allMatches,
12-
target: Inputs.swiftOrgCSS
13-
)
6+
let r = #"--([a-zA-Z0-9_-]+)\s*:\s*(.*?):"#
147

15-
let cssRegexNS = NSBenchmark(
16-
name: "cssRegexNS",
17-
regex: try! NSRegularExpression(pattern: r),
18-
ty: .all,
19-
target: Inputs.swiftOrgCSS
20-
)
21-
register(cssRegex)
22-
register(cssRegexNS)
8+
// FIXME: Why is `first` and `all` the same running time?
9+
10+
let css = CrossBenchmark(
11+
baseName: "css", regex: r, input: Inputs.swiftOrgCSS)
12+
css.register(&self)
2313
}
2414
}

Sources/RegexBenchmark/Suite/FirstMatch.swift

Lines changed: 0 additions & 49 deletions
This file was deleted.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import _StringProcessing
2+
import RegexBuilder
3+
4+
import Foundation
5+
6+
extension BenchmarkRunner {
7+
mutating func addGraphemeBreak() {
8+
let input = Inputs.graphemeBreakData
9+
let regex = #"(?:[0-9A-F]+)(?:\.\.(?:[0-9A-F]+))?\s+;\s+(?:\w+).*"#
10+
11+
let benchmark = CrossBenchmark(
12+
baseName: "GraphemeBreakNoCap", regex: regex, input: input)
13+
benchmark.register(&self)
14+
}
15+
16+
mutating func addHangulSyllable() {
17+
let input = Inputs.graphemeBreakData
18+
let regex = #"HANGUL SYLLABLE [A-Z]+(?:\.\.HANGUL SYLLABLE [A-Z]+)?"#
19+
20+
let benchmark = CrossBenchmark(
21+
baseName: "HangulSyllable", regex: regex, input: input)
22+
benchmark.register(&self)
23+
}
24+
}
25+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import _StringProcessing
2+
import Foundation
3+
4+
extension BenchmarkRunner {
5+
mutating func addNotFound() {
6+
let input = String(repeating: " ", count: 100_000)
7+
8+
let notFound = CrossBenchmark(
9+
baseName: "notFound", regex: "a", input: input)
10+
notFound.register(&self)
11+
12+
let anchoredNotFound = CrossBenchmark(
13+
baseName: "notFound", regex: "^ +a", input: input)
14+
anchoredNotFound.register(&self)
15+
}
16+
}

Sources/RegexBenchmark/Suite/ReluctantQuant.swift

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,28 @@ import RegexBuilder
33

44
extension BenchmarkRunner {
55
mutating func addReluctantQuant() {
6-
let size = 500000
7-
let s = String(repeating: "a", count: size)
8-
9-
let reluctantQuant = Benchmark(
10-
name: "ReluctantQuant",
11-
regex: Regex {
12-
OneOrMore(.any, .reluctant)
13-
},
14-
ty: .whole,
15-
target: s
16-
)
6+
let size = 100_000
7+
let input = String(repeating: "a", count: size)
178

18-
let eagarQuantWithTerminal = Benchmark(
19-
name: "EagarQuantWithTerminal",
20-
regex: Regex {
21-
OneOrMore(.any, .eager)
22-
";"
23-
},
24-
ty: .whole,
25-
target: s + ";"
26-
)
9+
let reluctantQuant = CrossBenchmark(
10+
baseName: "ReluctantQuant",
11+
regex: #".*?"#,
12+
input: input,
13+
isWhole: true)
14+
reluctantQuant.register(&self)
2715

28-
let reluctantQuantWithTerminal = Benchmark(
29-
name: "ReluctantQuantWithTerminal",
30-
regex: Regex {
31-
OneOrMore(.any, .reluctant)
32-
";"
33-
},
34-
ty: .whole,
35-
target: s + ";"
36-
)
37-
38-
register(reluctantQuant)
39-
register(reluctantQuantWithTerminal)
40-
register(eagarQuantWithTerminal)
16+
let eagarQuantWithTerminal = CrossBenchmark(
17+
baseName: "EagarQuantWithTerminal",
18+
regex: #".*;"#,
19+
input: input + ";",
20+
isWhole: true)
21+
eagarQuantWithTerminal.register(&self)
22+
23+
let reluctantQuantWithTerminal = CrossBenchmark(
24+
baseName: "ReluctantQuantWithTerminal",
25+
regex: #".*?;"#,
26+
input: input + ";",
27+
isWhole: true)
28+
reluctantQuantWithTerminal.register(&self)
4129
}
4230
}

0 commit comments

Comments
 (0)