Skip to content

Make RegexBuilder quantifiers follow option settings #293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions Sources/RegexBuilder/DSL.swift
Original file line number Diff line number Diff line change
Expand Up @@ -120,27 +120,29 @@ extension DSLTree.Node {
@available(SwiftStdlib 5.7, *)
static func repeating(
_ range: Range<Int>,
_ behavior: QuantificationBehavior,
_ behavior: QuantificationBehavior?,
_ node: DSLTree.Node
) -> DSLTree.Node {
// TODO: Throw these as errors
assert(range.lowerBound >= 0, "Cannot specify a negative lower bound")
assert(!range.isEmpty, "Cannot specify an empty range")

let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default

switch (range.lowerBound, range.upperBound) {
case (0, Int.max): // 0...
return .quantification(.zeroOrMore, behavior.astKind, node)
return .quantification(.zeroOrMore, kind, node)
case (1, Int.max): // 1...
return .quantification(.oneOrMore, behavior.astKind, node)
return .quantification(.oneOrMore, kind, node)
case _ where range.count == 1: // ..<1 or ...0 or any range with count == 1
// Note: `behavior` is ignored in this case
return .quantification(.exactly(.init(faking: range.lowerBound)), .eager, node)
return .quantification(.exactly(.init(faking: range.lowerBound)), .default, node)
case (0, _): // 0..<n or 0...n or ..<n or ...n
return .quantification(.upToN(.init(faking: range.upperBound)), behavior.astKind, node)
return .quantification(.upToN(.init(faking: range.upperBound)), kind, node)
case (_, Int.max): // n...
return .quantification(.nOrMore(.init(faking: range.lowerBound)), behavior.astKind, node)
return .quantification(.nOrMore(.init(faking: range.lowerBound)), kind, node)
default: // any other range
return .quantification(.range(.init(faking: range.lowerBound), .init(faking: range.upperBound)), behavior.astKind, node)
return .quantification(.range(.init(faking: range.lowerBound), .init(faking: range.upperBound)), kind, node)
}
}
}
Expand Down
440 changes: 253 additions & 187 deletions Sources/RegexBuilder/Variadics.swift

Large diffs are not rendered by default.

20 changes: 11 additions & 9 deletions Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,10 @@ struct VariadicsGenerator: ParsableCommand {
\(params.disfavored)\
public init<\(params.genericParams)>(
_ component: Component,
_ behavior: QuantificationBehavior = .eagerly
_ behavior: QuantificationBehavior? = nil
) \(params.whereClauseForInit) {
self.init(node: .quantification(.\(kind.astQuantifierAmount), behavior.astKind, component.regex.root))
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component.regex.root))
}
}

Expand All @@ -389,10 +390,11 @@ struct VariadicsGenerator: ParsableCommand {
\(defaultAvailableAttr)
\(params.disfavored)\
public init<\(params.genericParams)>(
_ behavior: QuantificationBehavior = .eagerly,
_ behavior: QuantificationBehavior? = nil,
@\(concatBuilderName) _ component: () -> Component
) \(params.whereClauseForInit) {
self.init(node: .quantification(.\(kind.astQuantifierAmount), behavior.astKind, component().regex.root))
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component().regex.root))
}
}

Expand All @@ -404,7 +406,7 @@ struct VariadicsGenerator: ParsableCommand {
public static func buildLimitedAvailability<\(params.genericParams)>(
_ component: Component
) -> \(regexTypeName)<\(params.matchType)> \(params.whereClause) {
.init(node: .quantification(.\(kind.astQuantifierAmount), .eager, component.regex.root))
.init(node: .quantification(.\(kind.astQuantifierAmount), .default, component.regex.root))
}
}
""" : "")
Expand Down Expand Up @@ -488,7 +490,7 @@ struct VariadicsGenerator: ParsableCommand {
) \(params.whereClauseForInit) {
assert(count > 0, "Must specify a positive count")
// TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)`
self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component.regex.root))
self.init(node: .quantification(.exactly(.init(faking: count)), .default, component.regex.root))
}

\(defaultAvailableAttr)
Expand All @@ -499,15 +501,15 @@ struct VariadicsGenerator: ParsableCommand {
) \(params.whereClauseForInit) {
assert(count > 0, "Must specify a positive count")
// TODO: Emit a warning about `repeatMatch(count: 0)` or `repeatMatch(count: 1)`
self.init(node: .quantification(.exactly(.init(faking: count)), .eager, component().regex.root))
self.init(node: .quantification(.exactly(.init(faking: count)), .default, component().regex.root))
}

\(defaultAvailableAttr)
\(params.disfavored)\
public init<\(params.genericParams), R: RangeExpression>(
_ component: Component,
_ expression: R,
_ behavior: QuantificationBehavior = .eagerly
_ behavior: QuantificationBehavior? = nil
) \(params.repeatingWhereClause) {
self.init(node: .repeating(expression.relative(to: 0..<Int.max), behavior, component.regex.root))
}
Expand All @@ -516,7 +518,7 @@ struct VariadicsGenerator: ParsableCommand {
\(params.disfavored)\
public init<\(params.genericParams), R: RangeExpression>(
_ expression: R,
_ behavior: QuantificationBehavior = .eagerly,
_ behavior: QuantificationBehavior? = nil,
@\(concatBuilderName) _ component: () -> Component
) \(params.repeatingWhereClause) {
self.init(node: .repeating(expression.relative(to: 0..<Int.max), behavior, component().regex.root))
Expand Down
18 changes: 14 additions & 4 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,20 @@ extension Compiler.ByteCodeGen {

mutating func emitQuantification(
_ amount: AST.Quantification.Amount,
_ kind: AST.Quantification.Kind,
_ kind: DSLTree.QuantificationKind,
_ child: DSLTree.Node
) throws {
let kind = kind.applying(options)
let updatedKind: AST.Quantification.Kind
switch kind {
case .explicit(let kind):
updatedKind = kind
case .syntax(let kind):
updatedKind = kind.applying(options)
case .default:
updatedKind = options.isReluctantByDefault
? .reluctant
: .eager
}

let (low, high) = amount.bounds
switch (low, high) {
Expand Down Expand Up @@ -496,7 +506,7 @@ extension Compiler.ByteCodeGen {
}

// Set up a dummy save point for possessive to update
if kind == .possessive {
if updatedKind == .possessive {
builder.pushEmptySavePoint()
}

Expand Down Expand Up @@ -542,7 +552,7 @@ extension Compiler.ByteCodeGen {
to: exit, ifZeroElseDecrement: extraTripsReg!)
}

switch kind {
switch updatedKind {
case .eager:
builder.buildSplit(to: loopBody, saving: exit)
case .possessive:
Expand Down
11 changes: 11 additions & 0 deletions Sources/_StringProcessing/PrintAsPattern.swift
Original file line number Diff line number Diff line change
Expand Up @@ -397,3 +397,14 @@ extension AST.Quantification.Kind {
}
}
}

extension DSLTree.QuantificationKind {
var _patternBase: String {
switch self {
case .explicit(let kind), .syntax(let kind):
return kind._patternBase
case .default:
return ".eager"
}
}
}
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Regex/ASTConversion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ extension AST.Node {
case let .quantification(v):
let child = v.child.dslTreeNode
return .quantification(
v.amount.value, v.kind.value, child)
v.amount.value, .syntax(v.kind.value), child)

case let .quote(v):
return .quotedLiteral(v.literal)
Expand Down
12 changes: 11 additions & 1 deletion Sources/_StringProcessing/Regex/DSLTree.swift
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ extension DSLTree {

case quantification(
AST.Quantification.Amount,
AST.Quantification.Kind,
QuantificationKind,
Node)

case customCharacterClass(CustomCharacterClass)
Expand Down Expand Up @@ -103,6 +103,16 @@ extension DSLTree {
}

extension DSLTree {
@_spi(RegexBuilder)
public enum QuantificationKind {
/// The default quantification kind, as set by options.
case `default`
/// An explicitly chosen kind, overriding any options.
case explicit(AST.Quantification.Kind)
/// A kind set via syntax, which can be affected by options.
case syntax(AST.Quantification.Kind)
}

@_spi(RegexBuilder)
public struct CustomCharacterClass {
var members: [Member]
Expand Down
59 changes: 26 additions & 33 deletions Sources/_StringProcessing/Regex/Options.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,32 @@ extension RegexComponent {
wrapInOption(.singleLine, addingIf: dotMatchesNewlines)
}

/// Returns a regular expression where the start and end of input
/// anchors (`^` and `$`) also match against the start and end of a line.
///
/// This method corresponds to applying the `m` option in a regular
/// expression literal. For this behavior in the `RegexBuilder` syntax, see
/// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``,
/// and ``Anchor.endOfInput``.
///
/// - Parameter matchLineEndings: A Boolean value indicating whether `^` and
/// `$` should match the start and end of lines, respectively.
public func anchorsMatchLineEndings(_ matchLineEndings: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.multiline, addingIf: matchLineEndings)
}

/// Returns a regular expression where quantifiers are reluctant by default
/// instead of eager.
///
/// This method corresponds to applying the `U` option in a regular
/// expression literal.
///
/// - Parameter useReluctantQuantifiers: A Boolean value indicating whether
/// quantifiers should be reluctant by default.
public func reluctantQuantifiers(_ useReluctantQuantifiers: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.reluctantByDefault, addingIf: useReluctantQuantifiers)
}

/// Returns a regular expression that matches with the specified semantic
/// level.
///
Expand Down Expand Up @@ -128,39 +154,6 @@ public struct RegexSemanticLevel: Hashable {
}
}

// Options that only affect literals
@available(SwiftStdlib 5.7, *)
extension RegexComponent {
/// Returns a regular expression where the start and end of input
/// anchors (`^` and `$`) also match against the start and end of a line.
///
/// This method corresponds to applying the `m` option in a regular
/// expression literal, and only applies to regular expressions specified as
/// literals. For this behavior in the `RegexBuilder` syntax, see
/// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``,
/// and ``Anchor.endOfInput``.
///
/// - Parameter matchLineEndings: A Boolean value indicating whether `^` and
/// `$` should match the start and end of lines, respectively.
public func anchorsMatchLineEndings(_ matchLineEndings: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.multiline, addingIf: matchLineEndings)
}

/// Returns a regular expression where quantifiers are reluctant by default
/// instead of eager.
///
/// This method corresponds to applying the `U` option in a regular
/// expression literal, and only applies to regular expressions specified as
/// literals. In the `RegexBuilder` syntax, pass a ``QuantificationBehavior``
/// value to any quantification method to change its behavior.
///
/// - Parameter useReluctantCaptures: A Boolean value indicating whether
/// quantifiers should be reluctant by default.
public func reluctantCaptures(_ useReluctantCaptures: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.reluctantByDefault, addingIf: useReluctantCaptures)
}
}

// MARK: - Helper method

@available(SwiftStdlib 5.7, *)
Expand Down
20 changes: 19 additions & 1 deletion Tests/RegexBuilderTests/RegexDSLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,24 @@ class RegexDSLTests: XCTestCase {
}
.ignoringCase(false)
}

try _testDSLCaptures(
("abcdef123", ("abcdef123", "a", "123")),
matchType: (Substring, Substring, Substring).self, ==) {
Capture {
// Reluctant behavior due to option
OneOrMore(.anyOf("abcd"))
.reluctantQuantifiers()
}
ZeroOrMore("a"..."z")

Capture {
// Eager behavior due to explicit parameter, despite option
OneOrMore(.digit, .eagerly)
.reluctantQuantifiers()
}
ZeroOrMore(.digit)
}
}

func testQuantificationBehavior() throws {
Expand Down Expand Up @@ -293,7 +311,7 @@ class RegexDSLTests: XCTestCase {
OneOrMore(.word)
Capture(.digit)
ZeroOrMore(.any)
}.reluctantCaptures()
}.reluctantQuantifiers()
}
}
#endif
Expand Down