Skip to content

Commit 236b47c

Browse files
authored
Speed up general character class matching (#642)
Short-circuit Character.isASCII checks inside built in character class matching. Also, make benchmark try a few more times before giving up.
1 parent 852b890 commit 236b47c

File tree

3 files changed

+18
-9
lines changed

3 files changed

+18
-9
lines changed

Sources/RegexBenchmark/BenchmarkRunner.swift

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import Foundation
22
@_spi(RegexBenchmark) import _StringProcessing
33

4+
/// The number of times to re-run the benchmark if results are too varying
5+
private var rerunCount: Int { 3 }
6+
47
struct BenchmarkRunner {
58
let suiteName: String
69
var suite: [any RegexBenchmark] = []
@@ -82,11 +85,16 @@ struct BenchmarkRunner {
8285
for b in suite {
8386
var result = measure(benchmark: b, samples: samples)
8487
if result.runtimeIsTooVariant {
85-
print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)")
86-
print(result.runtime)
87-
print("Rerunning \(b.name)")
88-
result = measure(benchmark: b, samples: result.runtime.samples*2)
89-
print(result.runtime)
88+
for _ in 0..<rerunCount {
89+
print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)")
90+
print(result.runtime)
91+
print("Rerunning \(b.name)")
92+
result = measure(benchmark: b, samples: result.runtime.samples*2)
93+
print(result.runtime)
94+
if !result.runtimeIsTooVariant {
95+
break
96+
}
97+
}
9098
if result.runtimeIsTooVariant {
9199
fatalError("Benchmark \(b.name) is too variant")
92100
}

Sources/_StringProcessing/Engine/MEBuiltins.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ extension Processor {
3838
return nil
3939
}
4040

41-
let asciiCheck = (char.isASCII && !isScalarSemantics)
41+
let asciiCheck = !isStrictASCII
4242
|| (scalar.isASCII && isScalarSemantics)
43-
|| !isStrictASCII
43+
|| char.isASCII
4444

4545
var matched: Bool
4646
var next: Input.Index

Sources/_StringProcessing/_CharacterClassModel.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ struct _CharacterClassModel: Hashable {
8181
let char = input[currentPosition]
8282
let scalar = input.unicodeScalars[currentPosition]
8383
let isScalarSemantics = matchLevel == .unicodeScalar
84-
let asciiCheck = (char.isASCII && !isScalarSemantics)
84+
85+
let asciiCheck = !isStrictASCII
8586
|| (scalar.isASCII && isScalarSemantics)
86-
|| !isStrictASCII
87+
|| char.isASCII
8788

8889
var matched: Bool
8990
var next: String.Index

0 commit comments

Comments
 (0)