swiftlang · rctcwyvrn · Jun 22, 2022 · Jun 19, 2022 · Jun 19, 2022 · Jun 19, 2022
diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift
@@ -4,11 +4,12 @@ import Foundation
 public protocol RegexBenchmark {
   var name: String { get }
   func run()
+  func debug()
 }
 
 public struct Benchmark: RegexBenchmark {
   public let name: String
-  let regex: Regex<Substring>
+  let regex: Regex<AnyRegexOutput>
   let type: MatchType
   let target: String
 
@@ -50,66 +51,6 @@ public struct NSBenchmark: RegexBenchmark {
   }
 }
 
-public struct BenchmarkRunner {
-  // Register instances of Benchmark and run them
-  let suiteName: String
-  var suite: [any RegexBenchmark]
-  let samples: Int
-
-  public init(_ suiteName: String) {
-    self.suiteName = suiteName
-    self.suite = []
-    self.samples = 20
-  }
-
-  public init(_ suiteName: String, _ n: Int) {
-    self.suiteName = suiteName
-    self.suite = []
-    self.samples = n
-  }
-
-  public mutating func register(_ new: some RegexBenchmark) {
-    suite.append(new)
-  }
-
-  func measure(benchmark: some RegexBenchmark) -> Time {
-    var times: [Time] = []
-
-    // initial run to make sure the regex has been compiled
-    benchmark.run()
-
-    // fixme: use suspendingclock?
-    for _ in 0..<samples {
-      let start = Tick.now
-      benchmark.run()
-      let end = Tick.now
-      let time = end.elapsedTime(since: start)
-      times.append(time)
-    }
-    // todo: compute stdev and warn if it's too large
-
-    // return median time
-    times.sort()
-    return times[samples/2]
-  }
-
-  public func run() {
-    print("Running")
-    for b in suite {
-      print("- \(b.name) \(measure(benchmark: b))")
-    }
-  }
-
-  public func profile() {
-    print("Starting")
-    for b in suite {
-      print("- \(b.name)")
-      b.run()
-      print("- done")
-    }
-  }
-}
-
 /// A benchmark meant to be ran across multiple engines
 struct CrossBenchmark {
   /// The base name of the benchmark
@@ -130,7 +71,7 @@ struct CrossBenchmark {
   var isWhole: Bool = false
 
   func register(_ runner: inout BenchmarkRunner) {
-    let swiftRegex = try! Regex(regex, as: Substring.self)
+    let swiftRegex = try! Regex(regex)
 
     let nsPattern = isWhole ? "^" + regex + "$" : regex
     let nsRegex: NSRegularExpression

diff --git a/Sources/RegexBenchmark/BenchmarkRegistration.swift b/Sources/RegexBenchmark/BenchmarkRegistration.swift
@@ -0,0 +1,22 @@
+// This file has lines generated by createBenchmark.py
+// Do not remove the start of registration or end of registration markers
+
+extension BenchmarkRunner {
+  public static func makeRunner(
+    _ samples: Int,
+    _ outputPath: String
+  ) -> BenchmarkRunner {
+    var benchmark = BenchmarkRunner("RegexBench", samples, outputPath)
+    // -- start of registrations --
+    benchmark.addReluctantQuant()
+    benchmark.addCSS()
+    benchmark.addNotFound()
+    benchmark.addGraphemeBreak()
+    benchmark.addHangulSyllable()
+    benchmark.addHTML()
+    benchmark.addEmail()
+    benchmark.addCustomCharacterClasses()
+    // -- end of registrations --
+    return benchmark
+  }
+}
diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift
@@ -0,0 +1,208 @@
+import Foundation
+
+public struct BenchmarkRunner {
+  let suiteName: String
+  var suite: [any RegexBenchmark] = []
+
+  let samples: Int
+  var results: SuiteResult = SuiteResult()
+
+  // Outputting
+  let startTime = Date()
+  let outputPath: String
+
+  public init(_ suiteName: String, _ n: Int, _ outputPath: String) {
+    self.suiteName = suiteName
+    self.samples = n
+    self.outputPath = outputPath
+  }
+
+  public mutating func register(_ new: some RegexBenchmark) {
+    suite.append(new)
+  }
+
+  mutating func measure(benchmark: some RegexBenchmark) -> Time {
+    var times: [Time] = []
+
+    // initial run to make sure the regex has been compiled
+    // todo: measure compile times, or at least how much this first run
+    //       differs from the later ones
+    benchmark.run()
+
+    // fixme: use suspendingclock?
+    for _ in 0..<samples {
+      let start = Tick.now
+      benchmark.run()
+      let end = Tick.now
+      let time = end.elapsedTime(since: start)
+      times.append(time)
+    }
+    // todo: compute stdev and warn if it's too large
+
+    // return median time
+    times.sort()
+    let median = times[samples/2]
+    self.results.add(name: benchmark.name, time: median)
+    return median
+  }
+
+  public mutating func run() {
+    print("Running")
+    for b in suite {
+      print("- \(b.name) \(measure(benchmark: b))")
+    }
+  }
+
+  public func profile() {
+    print("Starting")
+    for b in suite {
+      print("- \(b.name)")
+      b.run()
+      print("- done")
+    }
+  }
+
+  public mutating func debug() {
+    print("Debugging")
+    print("========================")
+    for b in suite {
+      print("- \(b.name) \(measure(benchmark: b))")
+      b.debug()
+      print("========================")
+    }
+  }
+}
+
+extension BenchmarkRunner {
+
+#if _runtime(_ObjC)
+  var dateStyle: Date.ISO8601FormatStyle { Date.ISO8601FormatStyle() }
+
+  func format(_ date: Date) -> String {
+    return dateStyle.format(date)
+  }
+#else
+  func format(_ date: Date) -> String {
+    return date.description
+  }
+#endif
+
+  var outputFolderUrl: URL {
+    let url = URL(fileURLWithPath: outputPath, isDirectory: true)
+    if !FileManager.default.fileExists(atPath: url.path) {
+      try! FileManager.default.createDirectory(atPath: url.path, withIntermediateDirectories: true)
+    }
+    return url
+  }
+
+  public func save() throws {
+    let now = format(startTime)
+    let resultJsonUrl = outputFolderUrl.appendingPathComponent(now + "-result.json")
+    print("Saving result to \(resultJsonUrl.path)")
+    try results.save(to: resultJsonUrl)
+  }
+
+  func fetchLatestResult() throws -> (String, SuiteResult) {
+#if _runtime(_ObjC)
+    var pastResults: [Date: (String, SuiteResult)] = [:]
+    for resultFile in try FileManager.default.contentsOfDirectory(
+      at: outputFolderUrl,
+      includingPropertiesForKeys: nil
+    ) {
+      do {
+        let dateString = resultFile.lastPathComponent.replacingOccurrences(
+          of: "-result.json",
+          with: "")
+        let date = try dateStyle.parse(dateString)
+        let result = try SuiteResult.load(from: resultFile)
+        pastResults.updateValue((resultFile.lastPathComponent, result), forKey: date)
+      } catch {
+        print("Warning: Found invalid result file \(resultFile.lastPathComponent) in results directory, skipping")
+      }
+    }
+
+    let sorted = pastResults
+      .sorted(by: {(kv1,kv2) in kv1.0 > kv2.0})
+    return sorted[0].1
+#else
+    // corelibs-foundation lacks Date.FormatStyle entirely, so we don't have
+    // any way of parsing the dates. So use the filename sorting to pick out the
+    // latest one... this sucks
+    let items = try FileManager.default.contentsOfDirectory(
+      at: outputFolderUrl,
+      includingPropertiesForKeys: nil
+    )
+    let resultFile = items[items.count - 1]
+    let pastResult = try SuiteResult.load(from: resultFile)
+    return (resultFile.lastPathComponent, pastResult)
+#endif
+  }
+
+  public func compare(against: String?) throws {
+    let compareFile: String
+    let compareResult: SuiteResult
+
+    if let compareFilePath = against {
+      let compareFileURL = URL(fileURLWithPath: compareFilePath)
+      compareResult = try SuiteResult.load(from: compareFileURL)
+      compareFile = compareFileURL.lastPathComponent
+    } else {
+      (compareFile, compareResult) = try fetchLatestResult()
+    }
+
+    let diff = results.compare(with: compareResult)
+    let regressions = diff.filter({(_, change) in change.seconds > 0})
+    let improvements = diff.filter({(_, change) in change.seconds < 0})
+
+    print("Comparing against benchmark result file \(compareFile)")
+    print("=== Regressions ====================================================")
+    for item in regressions {
+      let oldVal = compareResult.results[item.key]!
+      let newVal = results.results[item.key]!
+      let percentage = item.value.seconds / oldVal.seconds
+      print("- \(item.key)\t\t\(newVal)\t\(oldVal)\t\(item.value)\t\((percentage * 100).rounded())%")
+    }
+    print("=== Improvements ====================================================")
+    for item in improvements {
+      let oldVal = compareResult.results[item.key]!
+      let newVal = results.results[item.key]!
+      let percentage = item.value.seconds / oldVal.seconds
+      print("- \(item.key)\t\t\(newVal)\t\(oldVal)\t\(item.value)\t\((percentage * 100).rounded())%")
+    }
+  }
+}
+
+struct SuiteResult {
+  var results: [String: Time] = [:]
+
+  public mutating func add(name: String, time: Time) {
+    results.updateValue(time, forKey: name)
+  }
+
+  public func compare(with other: SuiteResult) -> [String: Time] {
+    var output: [String: Time] = [:]
+    for item in results {
+      if let otherVal = other.results[item.key] {
+        let diff = item.value - otherVal
+        if abs(100 * diff.seconds / otherVal.seconds) > 0.5 {
+          output.updateValue(diff, forKey: item.key)
+        }
+      }
+    }
+    return output
+  }
+}
+
+extension SuiteResult: Codable {
+  public func save(to url: URL) throws {
+    let encoder = JSONEncoder()
+    let data = try encoder.encode(self)
+    try data.write(to: url, options: .atomic)
+  }
+
+  public static func load(from url: URL) throws -> SuiteResult {
+    let decoder = JSONDecoder()
+    let data = try Data(contentsOf: url)
+    return try decoder.decode(SuiteResult.self, from: data)
+  }
+}
diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift
@@ -5,30 +5,46 @@ struct Runner: ParsableCommand {
   @Argument(help: "Names of benchmarks to run")
   var specificBenchmarks: [String] = []
 
-  @Option(help: "Run only once for profiling purposes")
+  @Flag(help: "Run only once for profiling purposes")
   var profile = false
 
   @Option(help: "How many samples to collect for each benchmark")
   var samples = 20
-
-  func makeRunner() -> BenchmarkRunner {
-    var benchmark = BenchmarkRunner("RegexBench", samples)
-    benchmark.addReluctantQuant()
-    benchmark.addCSS()
-    benchmark.addNotFound()
-    benchmark.addGraphemeBreak()
-    benchmark.addHangulSyllable()
-    return benchmark
-  }
+
+  @Flag(help: "Debug benchmark regexes")
+  var debug = false
+
+  @Option(help: "Output folder")
+  var outputPath = "./results/"
+
+  @Flag(help: "Should the results be saved")
+  var save = false
+
+  @Flag(help: "Compare this result with the latest saved result")
+  var compare = false
+
+  @Option(help: "The result file to compare against, if this flag is not set it will compare against the most recent result file")
+  var compareFile: String?
+
   mutating func run() throws {
-    var runner = makeRunner()
+    var runner = BenchmarkRunner.makeRunner(samples, outputPath)
+
+    // todo: regex based filter 
     if !self.specificBenchmarks.isEmpty {
       runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) }
     }
-    if profile {
-      runner.profile()
-    } else {
+    switch (profile, debug) {
+    case (true, true): print("Cannot run both profile and debug")
+    case (true, false): runner.profile()
+    case (false, true): runner.debug()
+    case (false, false):
       runner.run()
+      if compare {
+        try runner.compare(against: compareFile)
+      }
+      if save {
+        try runner.save()
+      }
     }
   }
 }