Skip to content

Commit 93b569d

Browse files
rctcwyvrnmilseman
andauthored
Add new benchmarks and benchmarker functionality (try 2) (#509)
* [benchmark] Add no-capture version of grapheme breaking exercise * [benchmark] Add cross-engine benchmark helpers * [benchmark] Hangul Syllable finding benchmark * Add debug mode * Fix typo in css regex * Add HTML benchmark * Add email regex benchmarks * Add save/compare functionality to the benchmarker * Clean up compare and add cli flags * Make fixes * oops, remove some leftover code * Fix linux build issue + add cli option for specifying compare file * Add benchmarks Co-authored-by: Michael Ilseman <[email protected]>
1 parent e3e186f commit 93b569d

14 files changed

+3106
-92
lines changed

Sources/RegexBenchmark/Benchmark.swift

Lines changed: 3 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ import Foundation
44
public protocol RegexBenchmark {
55
var name: String { get }
66
func run()
7+
func debug()
78
}
89

910
public struct Benchmark: RegexBenchmark {
1011
public let name: String
11-
let regex: Regex<Substring>
12+
let regex: Regex<AnyRegexOutput>
1213
let type: MatchType
1314
let target: String
1415

@@ -50,66 +51,6 @@ public struct NSBenchmark: RegexBenchmark {
5051
}
5152
}
5253

53-
public struct BenchmarkRunner {
54-
// Register instances of Benchmark and run them
55-
let suiteName: String
56-
var suite: [any RegexBenchmark]
57-
let samples: Int
58-
59-
public init(_ suiteName: String) {
60-
self.suiteName = suiteName
61-
self.suite = []
62-
self.samples = 20
63-
}
64-
65-
public init(_ suiteName: String, _ n: Int) {
66-
self.suiteName = suiteName
67-
self.suite = []
68-
self.samples = n
69-
}
70-
71-
public mutating func register(_ new: some RegexBenchmark) {
72-
suite.append(new)
73-
}
74-
75-
func measure(benchmark: some RegexBenchmark) -> Time {
76-
var times: [Time] = []
77-
78-
// initial run to make sure the regex has been compiled
79-
benchmark.run()
80-
81-
// fixme: use suspendingclock?
82-
for _ in 0..<samples {
83-
let start = Tick.now
84-
benchmark.run()
85-
let end = Tick.now
86-
let time = end.elapsedTime(since: start)
87-
times.append(time)
88-
}
89-
// todo: compute stdev and warn if it's too large
90-
91-
// return median time
92-
times.sort()
93-
return times[samples/2]
94-
}
95-
96-
public func run() {
97-
print("Running")
98-
for b in suite {
99-
print("- \(b.name) \(measure(benchmark: b))")
100-
}
101-
}
102-
103-
public func profile() {
104-
print("Starting")
105-
for b in suite {
106-
print("- \(b.name)")
107-
b.run()
108-
print("- done")
109-
}
110-
}
111-
}
112-
11354
/// A benchmark meant to be ran across multiple engines
11455
struct CrossBenchmark {
11556
/// The base name of the benchmark
@@ -130,7 +71,7 @@ struct CrossBenchmark {
13071
var isWhole: Bool = false
13172

13273
func register(_ runner: inout BenchmarkRunner) {
133-
let swiftRegex = try! Regex(regex, as: Substring.self)
74+
let swiftRegex = try! Regex(regex)
13475

13576
let nsPattern = isWhole ? "^" + regex + "$" : regex
13677
let nsRegex: NSRegularExpression
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// This file has lines generated by createBenchmark.py
2+
// Do not remove the start of registration or end of registration markers
3+
4+
extension BenchmarkRunner {
5+
public static func makeRunner(
6+
_ samples: Int,
7+
_ outputPath: String
8+
) -> BenchmarkRunner {
9+
var benchmark = BenchmarkRunner("RegexBench", samples, outputPath)
10+
// -- start of registrations --
11+
benchmark.addReluctantQuant()
12+
benchmark.addCSS()
13+
benchmark.addNotFound()
14+
benchmark.addGraphemeBreak()
15+
benchmark.addHangulSyllable()
16+
benchmark.addHTML()
17+
benchmark.addEmail()
18+
benchmark.addCustomCharacterClasses()
19+
// -- end of registrations --
20+
return benchmark
21+
}
22+
}
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import Foundation
2+
3+
public struct BenchmarkRunner {
4+
let suiteName: String
5+
var suite: [any RegexBenchmark] = []
6+
7+
let samples: Int
8+
var results: SuiteResult = SuiteResult()
9+
10+
// Outputting
11+
let startTime = Date()
12+
let outputPath: String
13+
14+
public init(_ suiteName: String, _ n: Int, _ outputPath: String) {
15+
self.suiteName = suiteName
16+
self.samples = n
17+
self.outputPath = outputPath
18+
}
19+
20+
public mutating func register(_ new: some RegexBenchmark) {
21+
suite.append(new)
22+
}
23+
24+
mutating func measure(benchmark: some RegexBenchmark) -> Time {
25+
var times: [Time] = []
26+
27+
// initial run to make sure the regex has been compiled
28+
// todo: measure compile times, or at least how much this first run
29+
// differs from the later ones
30+
benchmark.run()
31+
32+
// fixme: use suspendingclock?
33+
for _ in 0..<samples {
34+
let start = Tick.now
35+
benchmark.run()
36+
let end = Tick.now
37+
let time = end.elapsedTime(since: start)
38+
times.append(time)
39+
}
40+
// todo: compute stdev and warn if it's too large
41+
42+
// return median time
43+
times.sort()
44+
let median = times[samples/2]
45+
self.results.add(name: benchmark.name, time: median)
46+
return median
47+
}
48+
49+
public mutating func run() {
50+
print("Running")
51+
for b in suite {
52+
print("- \(b.name) \(measure(benchmark: b))")
53+
}
54+
}
55+
56+
public func profile() {
57+
print("Starting")
58+
for b in suite {
59+
print("- \(b.name)")
60+
b.run()
61+
print("- done")
62+
}
63+
}
64+
65+
public mutating func debug() {
66+
print("Debugging")
67+
print("========================")
68+
for b in suite {
69+
print("- \(b.name) \(measure(benchmark: b))")
70+
b.debug()
71+
print("========================")
72+
}
73+
}
74+
}
75+
76+
extension BenchmarkRunner {
77+
78+
#if _runtime(_ObjC)
79+
var dateStyle: Date.ISO8601FormatStyle { Date.ISO8601FormatStyle() }
80+
81+
func format(_ date: Date) -> String {
82+
return dateStyle.format(date)
83+
}
84+
#else
85+
func format(_ date: Date) -> String {
86+
return date.description
87+
}
88+
#endif
89+
90+
var outputFolderUrl: URL {
91+
let url = URL(fileURLWithPath: outputPath, isDirectory: true)
92+
if !FileManager.default.fileExists(atPath: url.path) {
93+
try! FileManager.default.createDirectory(atPath: url.path, withIntermediateDirectories: true)
94+
}
95+
return url
96+
}
97+
98+
public func save() throws {
99+
let now = format(startTime)
100+
let resultJsonUrl = outputFolderUrl.appendingPathComponent(now + "-result.json")
101+
print("Saving result to \(resultJsonUrl.path)")
102+
try results.save(to: resultJsonUrl)
103+
}
104+
105+
func fetchLatestResult() throws -> (String, SuiteResult) {
106+
#if _runtime(_ObjC)
107+
var pastResults: [Date: (String, SuiteResult)] = [:]
108+
for resultFile in try FileManager.default.contentsOfDirectory(
109+
at: outputFolderUrl,
110+
includingPropertiesForKeys: nil
111+
) {
112+
do {
113+
let dateString = resultFile.lastPathComponent.replacingOccurrences(
114+
of: "-result.json",
115+
with: "")
116+
let date = try dateStyle.parse(dateString)
117+
let result = try SuiteResult.load(from: resultFile)
118+
pastResults.updateValue((resultFile.lastPathComponent, result), forKey: date)
119+
} catch {
120+
print("Warning: Found invalid result file \(resultFile.lastPathComponent) in results directory, skipping")
121+
}
122+
}
123+
124+
let sorted = pastResults
125+
.sorted(by: {(kv1,kv2) in kv1.0 > kv2.0})
126+
return sorted[0].1
127+
#else
128+
// corelibs-foundation lacks Date.FormatStyle entirely, so we don't have
129+
// any way of parsing the dates. So use the filename sorting to pick out the
130+
// latest one... this sucks
131+
let items = try FileManager.default.contentsOfDirectory(
132+
at: outputFolderUrl,
133+
includingPropertiesForKeys: nil
134+
)
135+
let resultFile = items[items.count - 1]
136+
let pastResult = try SuiteResult.load(from: resultFile)
137+
return (resultFile.lastPathComponent, pastResult)
138+
#endif
139+
}
140+
141+
public func compare(against: String?) throws {
142+
let compareFile: String
143+
let compareResult: SuiteResult
144+
145+
if let compareFilePath = against {
146+
let compareFileURL = URL(fileURLWithPath: compareFilePath)
147+
compareResult = try SuiteResult.load(from: compareFileURL)
148+
compareFile = compareFileURL.lastPathComponent
149+
} else {
150+
(compareFile, compareResult) = try fetchLatestResult()
151+
}
152+
153+
let diff = results.compare(with: compareResult)
154+
let regressions = diff.filter({(_, change) in change.seconds > 0})
155+
let improvements = diff.filter({(_, change) in change.seconds < 0})
156+
157+
print("Comparing against benchmark result file \(compareFile)")
158+
print("=== Regressions ====================================================")
159+
for item in regressions {
160+
let oldVal = compareResult.results[item.key]!
161+
let newVal = results.results[item.key]!
162+
let percentage = item.value.seconds / oldVal.seconds
163+
print("- \(item.key)\t\t\(newVal)\t\(oldVal)\t\(item.value)\t\((percentage * 100).rounded())%")
164+
}
165+
print("=== Improvements ====================================================")
166+
for item in improvements {
167+
let oldVal = compareResult.results[item.key]!
168+
let newVal = results.results[item.key]!
169+
let percentage = item.value.seconds / oldVal.seconds
170+
print("- \(item.key)\t\t\(newVal)\t\(oldVal)\t\(item.value)\t\((percentage * 100).rounded())%")
171+
}
172+
}
173+
}
174+
175+
struct SuiteResult {
176+
var results: [String: Time] = [:]
177+
178+
public mutating func add(name: String, time: Time) {
179+
results.updateValue(time, forKey: name)
180+
}
181+
182+
public func compare(with other: SuiteResult) -> [String: Time] {
183+
var output: [String: Time] = [:]
184+
for item in results {
185+
if let otherVal = other.results[item.key] {
186+
let diff = item.value - otherVal
187+
if abs(100 * diff.seconds / otherVal.seconds) > 0.5 {
188+
output.updateValue(diff, forKey: item.key)
189+
}
190+
}
191+
}
192+
return output
193+
}
194+
}
195+
196+
extension SuiteResult: Codable {
197+
public func save(to url: URL) throws {
198+
let encoder = JSONEncoder()
199+
let data = try encoder.encode(self)
200+
try data.write(to: url, options: .atomic)
201+
}
202+
203+
public static func load(from url: URL) throws -> SuiteResult {
204+
let decoder = JSONDecoder()
205+
let data = try Data(contentsOf: url)
206+
return try decoder.decode(SuiteResult.self, from: data)
207+
}
208+
}

Sources/RegexBenchmark/CLI.swift

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,46 @@ struct Runner: ParsableCommand {
55
@Argument(help: "Names of benchmarks to run")
66
var specificBenchmarks: [String] = []
77

8-
@Option(help: "Run only once for profiling purposes")
8+
@Flag(help: "Run only once for profiling purposes")
99
var profile = false
1010

1111
@Option(help: "How many samples to collect for each benchmark")
1212
var samples = 20
13-
14-
func makeRunner() -> BenchmarkRunner {
15-
var benchmark = BenchmarkRunner("RegexBench", samples)
16-
benchmark.addReluctantQuant()
17-
benchmark.addCSS()
18-
benchmark.addNotFound()
19-
benchmark.addGraphemeBreak()
20-
benchmark.addHangulSyllable()
21-
return benchmark
22-
}
13+
14+
@Flag(help: "Debug benchmark regexes")
15+
var debug = false
16+
17+
@Option(help: "Output folder")
18+
var outputPath = "./results/"
19+
20+
@Flag(help: "Should the results be saved")
21+
var save = false
22+
23+
@Flag(help: "Compare this result with the latest saved result")
24+
var compare = false
25+
26+
@Option(help: "The result file to compare against, if this flag is not set it will compare against the most recent result file")
27+
var compareFile: String?
28+
2329
mutating func run() throws {
24-
var runner = makeRunner()
30+
var runner = BenchmarkRunner.makeRunner(samples, outputPath)
31+
32+
// todo: regex based filter
2533
if !self.specificBenchmarks.isEmpty {
2634
runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) }
2735
}
28-
if profile {
29-
runner.profile()
30-
} else {
36+
switch (profile, debug) {
37+
case (true, true): print("Cannot run both profile and debug")
38+
case (true, false): runner.profile()
39+
case (false, true): runner.debug()
40+
case (false, false):
3141
runner.run()
42+
if compare {
43+
try runner.compare(against: compareFile)
44+
}
45+
if save {
46+
try runner.save()
47+
}
3248
}
3349
}
3450
}

0 commit comments

Comments
 (0)