Skip to content

[Integration] main (96fb215) -> swift/main #589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9212b43
Validate optimizations when a match fails
hamishknight Jul 7, 2022
33856e7
Merge pull request #559 from hamishknight/validate-test
hamishknight Jul 7, 2022
33acdeb
Break out of quantification loop if there is no forward progress (#560)
rctcwyvrn Jul 11, 2022
7752047
Optimize matching to match on scalar values when possible (#525)
rctcwyvrn Jul 12, 2022
8f93498
Rip out unused _CharacterClassModel API
hamishknight Jul 14, 2022
297a69d
Remove _CharacterClassModel conformance to RegexComponent
hamishknight Jul 14, 2022
7d5e86d
Internalize `_CharacterClassModel`
hamishknight Jul 14, 2022
99e5e51
Merge pull request #578 from hamishknight/internalize-character-model
hamishknight Jul 14, 2022
d5010fb
Fix `CharacterClass.newlineSequence`
hamishknight Jul 14, 2022
446bfd4
Rename `any` -> `dot`
hamishknight Jul 14, 2022
efe90d1
Re-introduce `DSLTree.Atom.any`
hamishknight Jul 14, 2022
8f8c7d0
Fix `CharacterClass.any`
hamishknight Jul 14, 2022
657351e
Rename `startOfLine`/`endOfLine` -> `caretAnchor`/`dollarAnchor`
hamishknight Jul 14, 2022
21ca2fb
Move AssertionKind onto the DSL
hamishknight Jul 14, 2022
210bfa3
Fix `Anchor.startOfLine` and `Anchor.endOfLine`
hamishknight Jul 14, 2022
f111a57
Add some tests for `CharacterClass.anyGraphemeCluster`
hamishknight Jul 14, 2022
9a545a0
Add some tests for `CharacterClass.horizontalWhitespace`
hamishknight Jul 14, 2022
9456c54
Implement `CharacterClass.anyNonNewline`
hamishknight Jul 14, 2022
9bcb72f
Rename various APIs
Azoy Jul 13, 2022
33566dc
Merge pull request #575 from Azoy/various-tidbits
Azoy Jul 14, 2022
9f1f309
Move options from RegexComponent to Regex
Azoy Jul 13, 2022
991d90c
Merge pull request #576 from Azoy/options-regex
Azoy Jul 14, 2022
1f2ae04
Merge pull request #580 from hamishknight/character-work
hamishknight Jul 15, 2022
96fb215
Benchmarker improvements and more benchmarks (#581)
rctcwyvrn Jul 15, 2022
3a2a785
Merge branch 'main' into main-merge
hamishknight Jul 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 89 additions & 24 deletions Sources/RegexBenchmark/Benchmark.swift
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import _StringProcessing
import Foundation

public protocol RegexBenchmark {
protocol RegexBenchmark {
var name: String { get }
func run()
func debug()
}

public struct Benchmark: RegexBenchmark {
public let name: String
struct Benchmark: RegexBenchmark {
let name: String
let regex: Regex<AnyRegexOutput>
let type: MatchType
let target: String

public enum MatchType {
enum MatchType {
case whole
case first
case allMatches
}

public func run() {
func run() {
switch type {
case .whole: blackHole(target.wholeMatch(of: regex))
case .allMatches: blackHole(target.matches(of: regex))
Expand All @@ -28,8 +28,8 @@ public struct Benchmark: RegexBenchmark {
}
}

public struct NSBenchmark: RegexBenchmark {
public let name: String
struct NSBenchmark: RegexBenchmark {
let name: String
let regex: NSRegularExpression
let type: NSMatchType
let target: String
Expand All @@ -38,19 +38,55 @@ public struct NSBenchmark: RegexBenchmark {
NSRange(target.startIndex..<target.endIndex, in: target)
}

public enum NSMatchType {
enum NSMatchType {
case allMatches
case first
}

public func run() {
func run() {
switch type {
case .allMatches: blackHole(regex.matches(in: target, range: range))
case .first: blackHole(regex.firstMatch(in: target, range: range))
}
}
}

/// A benchmark running a regex on strings in input set
struct InputListBenchmark: RegexBenchmark {
let name: String
let regex: Regex<AnyRegexOutput>
let targets: [String]

func run() {
for target in targets {
blackHole(target.wholeMatch(of: regex))
}
}
}

struct InputListNSBenchmark: RegexBenchmark {
let name: String
let regex: NSRegularExpression
let targets: [String]

init(name: String, regex: String, targets: [String]) {
self.name = name
self.regex = try! NSRegularExpression(pattern: "^" + regex + "$")
self.targets = targets
}

func range(in target: String) -> NSRange {
NSRange(target.startIndex..<target.endIndex, in: target)
}

func run() {
for target in targets {
let range = range(in: target)
blackHole(regex.firstMatch(in: target, range: range))
}
}
}

/// A benchmark meant to be ran across multiple engines
struct CrossBenchmark {
/// The base name of the benchmark
Expand All @@ -69,11 +105,12 @@ struct CrossBenchmark {
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
/// accomodate multi-line matching, etc.
var isWhole: Bool = false

/// Whether or not to do firstMatch as well or just allMatches
var includeFirst: Bool = false

func register(_ runner: inout BenchmarkRunner) {
let swiftRegex = try! Regex(regex)

let nsPattern = isWhole ? "^" + regex + "$" : regex
let nsRegex: NSRegularExpression
if isWhole {
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
Expand All @@ -95,37 +132,65 @@ struct CrossBenchmark {
type: .first,
target: input))
} else {
runner.register(
Benchmark(
name: baseName + "First",
regex: swiftRegex,
type: .first,
target: input))
runner.register(
Benchmark(
name: baseName + "All",
regex: swiftRegex,
type: .allMatches,
target: input))
runner.register(
NSBenchmark(
name: baseName + "First_NS",
regex: nsRegex,
type: .first,
target: input))
runner.register(
NSBenchmark(
name: baseName + "All_NS",
regex: nsRegex,
type: .allMatches,
target: input))
if includeFirst {
runner.register(
Benchmark(
name: baseName + "First",
regex: swiftRegex,
type: .first,
target: input))
runner.register(
NSBenchmark(
name: baseName + "First_NS",
regex: nsRegex,
type: .first,
target: input))
}
}
}
}

/// A benchmark running a regex on strings in input list, run across multiple engines
struct CrossInputListBenchmark {
/// The base name of the benchmark
var baseName: String

/// The string to compile in differnet engines
var regex: String

/// The list of strings to search
var inputs: [String]

func register(_ runner: inout BenchmarkRunner) {
let swiftRegex = try! Regex(regex)
runner.register(InputListBenchmark(
name: baseName,
regex: swiftRegex,
targets: inputs
))
runner.register(InputListNSBenchmark(
name: baseName + "NS",
regex: regex,
targets: inputs
))
}
}

// TODO: Capture-containing benchmarks

// nom nom nom, consume the argument
@inline(never)
public func blackHole<T>(_ x: T) {
func blackHole<T>(_ x: T) {
}
14 changes: 10 additions & 4 deletions Sources/RegexBenchmark/BenchmarkRegistration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,26 @@
// Do not remove the start of registration or end of registration markers

extension BenchmarkRunner {
public static func makeRunner(
static func makeRunner(
_ samples: Int,
_ outputPath: String
_ quiet: Bool
) -> BenchmarkRunner {
var benchmark = BenchmarkRunner("RegexBench", samples, outputPath)
var benchmark = BenchmarkRunner("RegexBench", samples, quiet)
// -- start of registrations --
benchmark.addReluctantQuant()
benchmark.addCSS()
benchmark.addNotFound()
benchmark.addGraphemeBreak()
benchmark.addHangulSyllable()
benchmark.addHTML()
// benchmark.addHTML() // Disabled due to \b being unusably slow
benchmark.addEmail()
benchmark.addCustomCharacterClasses()
benchmark.addBuiltinCC()
benchmark.addUnicode()
benchmark.addLiteralSearch()
benchmark.addDiceNotation()
benchmark.addErrorMessages()
benchmark.addIpAddress()
// -- end of registrations --
return benchmark
}
Expand Down
Loading