Skip to content

[benchmark] Simplify and add more benchmarks #501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 80 additions & 6 deletions Sources/RegexBenchmark/Benchmark.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public protocol RegexBenchmark {
public struct Benchmark: RegexBenchmark {
public let name: String
let regex: Regex<Substring>
let ty: MatchType
let type: MatchType
let target: String

public enum MatchType {
Expand All @@ -19,7 +19,7 @@ public struct Benchmark: RegexBenchmark {
}

public func run() {
switch ty {
switch type {
case .whole: blackHole(target.wholeMatch(of: regex))
case .allMatches: blackHole(target.matches(of: regex))
case .first: blackHole(target.firstMatch(of: regex))
Expand All @@ -30,21 +30,21 @@ public struct Benchmark: RegexBenchmark {
public struct NSBenchmark: RegexBenchmark {
public let name: String
let regex: NSRegularExpression
let ty: NSMatchType
let type: NSMatchType
let target: String

var range: NSRange {
NSRange(target.startIndex..<target.endIndex, in: target)
}

public enum NSMatchType {
case all
case allMatches
case first
}

public func run() {
switch ty {
case .all: blackHole(regex.matches(in: target, range: range))
switch type {
case .allMatches: blackHole(regex.matches(in: target, range: range))
case .first: blackHole(regex.firstMatch(in: target, range: range))
}
}
Expand Down Expand Up @@ -110,6 +110,80 @@ public struct BenchmarkRunner {
}
}

/// A benchmark meant to be ran across multiple engines
struct CrossBenchmark {
/// The base name of the benchmark
var baseName: String

/// The string to compile in differnet engines
var regex: String

/// The text to search
var input: String

// TODO: var output, for validation

/// Whether this is whole string matching or a searching benchmark
///
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
/// accomodate multi-line matching, etc.
var isWhole: Bool = false

func register(_ runner: inout BenchmarkRunner) {
let swiftRegex = try! Regex(regex, as: Substring.self)

let nsPattern = isWhole ? "^" + regex + "$" : regex
let nsRegex: NSRegularExpression
if isWhole {
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
} else {
nsRegex = try! NSRegularExpression(pattern: regex)
}

if isWhole {
runner.register(
Benchmark(
name: baseName + "Whole",
regex: swiftRegex,
type: .whole,
target: input))
runner.register(
NSBenchmark(
name: baseName + "Whole_NS",
regex: nsRegex,
type: .first,
target: input))
} else {
runner.register(
Benchmark(
name: baseName + "First",
regex: swiftRegex,
type: .first,
target: input))
runner.register(
Benchmark(
name: baseName + "All",
regex: swiftRegex,
type: .allMatches,
target: input))
runner.register(
NSBenchmark(
name: baseName + "First_NS",
regex: nsRegex,
type: .first,
target: input))
runner.register(
NSBenchmark(
name: baseName + "All_NS",
regex: nsRegex,
type: .allMatches,
target: input))
}
}
}

// TODO: Capture-containing benchmarks

// nom nom nom, consume the argument
@inline(never)
public func blackHole<T>(_ x: T) {
Expand Down
5 changes: 3 additions & 2 deletions Sources/RegexBenchmark/CLI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ struct Runner: ParsableCommand {
func makeRunner() -> BenchmarkRunner {
var benchmark = BenchmarkRunner("RegexBench", samples)
benchmark.addReluctantQuant()
benchmark.addBacktracking()
benchmark.addCSS()
benchmark.addFirstMatch()
benchmark.addNotFound()
benchmark.addGraphemeBreak()
benchmark.addHangulSyllable()
return benchmark
}
mutating func run() throws {
Expand Down
1,452 changes: 1,452 additions & 0 deletions Sources/RegexBenchmark/Inputs/GraphemeBreakData.swift

Large diffs are not rendered by default.

45 changes: 0 additions & 45 deletions Sources/RegexBenchmark/Suite/Backtracking.swift

This file was deleted.

22 changes: 6 additions & 16 deletions Sources/RegexBenchmark/Suite/CssRegex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,12 @@ import _StringProcessing

extension BenchmarkRunner {
mutating func addCSS() {
let r = "--([a-zA-Z0-9_-]+)\\s*:\\s*(.*?):"

let cssRegex = Benchmark(
name: "cssRegex",
regex: try! Regex(r),
ty: .allMatches,
target: Inputs.swiftOrgCSS
)
let r = #"--([a-zA-Z0-9_-]+)\s*:\s*(.*?):"#
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

first and all are running at the same time because the regex isn't matching anything, there should be a semicolon at the end not a colon

let r = #"--([a-zA-Z0-9_-]+)\s*:\s*(.*?);"#


let cssRegexNS = NSBenchmark(
name: "cssRegexNS",
regex: try! NSRegularExpression(pattern: r),
ty: .all,
target: Inputs.swiftOrgCSS
)
register(cssRegex)
register(cssRegexNS)
// FIXME: Why is `first` and `all` the same running time?

let css = CrossBenchmark(
baseName: "css", regex: r, input: Inputs.swiftOrgCSS)
css.register(&self)
}
}
49 changes: 0 additions & 49 deletions Sources/RegexBenchmark/Suite/FirstMatch.swift

This file was deleted.

25 changes: 25 additions & 0 deletions Sources/RegexBenchmark/Suite/GraphemeBreak.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import _StringProcessing
import RegexBuilder

import Foundation

extension BenchmarkRunner {
mutating func addGraphemeBreak() {
let input = Inputs.graphemeBreakData
let regex = #"(?:[0-9A-F]+)(?:\.\.(?:[0-9A-F]+))?\s+;\s+(?:\w+).*"#

let benchmark = CrossBenchmark(
baseName: "GraphemeBreakNoCap", regex: regex, input: input)
benchmark.register(&self)
}

mutating func addHangulSyllable() {
let input = Inputs.graphemeBreakData
let regex = #"HANGUL SYLLABLE [A-Z]+(?:\.\.HANGUL SYLLABLE [A-Z]+)?"#

let benchmark = CrossBenchmark(
baseName: "HangulSyllable", regex: regex, input: input)
benchmark.register(&self)
}
}

16 changes: 16 additions & 0 deletions Sources/RegexBenchmark/Suite/NotFound.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import _StringProcessing
import Foundation

extension BenchmarkRunner {
mutating func addNotFound() {
let input = String(repeating: " ", count: 100_000)

let notFound = CrossBenchmark(
baseName: "notFound", regex: "a", input: input)
notFound.register(&self)

let anchoredNotFound = CrossBenchmark(
baseName: "notFound", regex: "^ +a", input: input)
anchoredNotFound.register(&self)
}
}
54 changes: 21 additions & 33 deletions Sources/RegexBenchmark/Suite/ReluctantQuant.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,28 @@ import RegexBuilder

extension BenchmarkRunner {
mutating func addReluctantQuant() {
let size = 500000
let s = String(repeating: "a", count: size)

let reluctantQuant = Benchmark(
name: "ReluctantQuant",
regex: Regex {
OneOrMore(.any, .reluctant)
},
ty: .whole,
target: s
)
let size = 100_000
let input = String(repeating: "a", count: size)

let eagarQuantWithTerminal = Benchmark(
name: "EagarQuantWithTerminal",
regex: Regex {
OneOrMore(.any, .eager)
";"
},
ty: .whole,
target: s + ";"
)
let reluctantQuant = CrossBenchmark(
baseName: "ReluctantQuant",
regex: #".*?"#,
input: input,
isWhole: true)
reluctantQuant.register(&self)

let reluctantQuantWithTerminal = Benchmark(
name: "ReluctantQuantWithTerminal",
regex: Regex {
OneOrMore(.any, .reluctant)
";"
},
ty: .whole,
target: s + ";"
)

register(reluctantQuant)
register(reluctantQuantWithTerminal)
register(eagarQuantWithTerminal)
let eagarQuantWithTerminal = CrossBenchmark(
baseName: "EagarQuantWithTerminal",
regex: #".*;"#,
input: input + ";",
isWhole: true)
eagarQuantWithTerminal.register(&self)

let reluctantQuantWithTerminal = CrossBenchmark(
baseName: "ReluctantQuantWithTerminal",
regex: #".*?;"#,
input: input + ";",
isWhole: true)
reluctantQuantWithTerminal.register(&self)
}
}