@@ -149,6 +149,14 @@ extension Source {
149
149
return result
150
150
}
151
151
152
+ /// Perform a lookahead using a temporary source. Within the body of the
153
+ /// lookahead, any modifications to the source will not be reflected outside
154
+ /// the body.
155
+ func lookahead< T> ( _ body: ( inout Source ) throws -> T ) rethrows -> T {
156
+ var src = self
157
+ return try body ( & src)
158
+ }
159
+
152
160
/// Attempt to eat the given character, returning its source location if
153
161
/// successful, `nil` otherwise.
154
162
mutating func tryEatWithLoc( _ c: Character ) -> SourceLocation ? {
@@ -413,9 +421,7 @@ extension Source {
413
421
) throws -> ( Located < Quant . Amount > , Located < Quant . Kind > , [ AST . Trivia ] ) ? {
414
422
var trivia : [ AST . Trivia ] = [ ]
415
423
416
- if let t = try lexNonSemanticWhitespace ( context: context) {
417
- trivia. append ( t)
418
- }
424
+ if let t = lexNonSemanticWhitespace ( context: context) { trivia. append ( t) }
419
425
420
426
let amt : Located < Quant . Amount > ? = try recordLoc { src in
421
427
if src. tryEat ( " * " ) { return . zeroOrMore }
@@ -424,7 +430,7 @@ extension Source {
424
430
425
431
return try src. tryEating { src in
426
432
guard src. tryEat ( " { " ) ,
427
- let range = try src. lexRange ( context: context) ,
433
+ let range = try src. lexRange ( context: context, trivia : & trivia ) ,
428
434
src. tryEat ( " } " )
429
435
else { return nil }
430
436
return range. value
@@ -433,9 +439,7 @@ extension Source {
433
439
guard let amt = amt else { return nil }
434
440
435
441
// PCRE allows non-semantic whitespace here in extended syntax mode.
436
- if let t = try lexNonSemanticWhitespace ( context: context) {
437
- trivia. append ( t)
438
- }
442
+ if let t = lexNonSemanticWhitespace ( context: context) { trivia. append ( t) }
439
443
440
444
let kind : Located < Quant . Kind > = recordLoc { src in
441
445
if src. tryEat ( " ? " ) { return . reluctant }
@@ -452,11 +456,17 @@ extension Source {
452
456
/// | ExpRange
453
457
/// ExpRange -> '..<' <Int> | '...' <Int>
454
458
/// | <Int> '..<' <Int> | <Int> '...' <Int>?
455
- mutating func lexRange( context: ParsingContext ) throws -> Located < Quant . Amount > ? {
459
+ mutating func lexRange(
460
+ context: ParsingContext , trivia: inout [ AST . Trivia ]
461
+ ) throws -> Located < Quant . Amount > ? {
456
462
try recordLoc { src in
457
463
try src. tryEating { src in
464
+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
465
+
458
466
let lowerOpt = try src. lexNumber ( )
459
467
468
+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
469
+
460
470
// ',' or '...' or '..<' or nothing
461
471
// TODO: We ought to try and consume whitespace here and emit a
462
472
// diagnostic for the user warning them that it would cause the range to
@@ -476,11 +486,15 @@ extension Source {
476
486
closedRange = nil
477
487
}
478
488
489
+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
490
+
479
491
let upperOpt = try src. lexNumber ( ) ? . map { upper in
480
492
// If we have an open range, the upper bound should be adjusted down.
481
493
closedRange == true ? upper : upper - 1
482
494
}
483
495
496
+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
497
+
484
498
switch ( lowerOpt, closedRange, upperOpt) {
485
499
case let ( l? , nil , nil ) :
486
500
return . exactly( l)
@@ -625,11 +639,11 @@ extension Source {
625
639
///
626
640
mutating func lexComment( context: ParsingContext ) throws -> AST . Trivia ? {
627
641
let trivia : Located < String > ? = try recordLoc { src in
628
- if src. tryEat ( sequence: " (?# " ) {
629
- return try src. expectQuoted ( endingWith : " ) " ) . value
642
+ if !context . isInCustomCharacterClass && src. tryEat ( sequence: " (?# " ) {
643
+ return try src. lexUntil ( eating : " ) " ) . value
630
644
}
631
645
if context. experimentalComments, src. tryEat ( sequence: " /*") {
632
- return try src.expectQuoted(endingWith : "*/" ) . value
646
+ return try src.lexUntil(eating : "*/" ) . value
633
647
}
634
648
if context. endOfLineComments, src. tryEat ( " # " ) {
635
649
// Try eat until we either exhaust the input, or hit a newline. Note
@@ -667,7 +681,7 @@ extension Source {
667
681
/// Does nothing unless `SyntaxOptions.nonSemanticWhitespace` is set
668
682
mutating func lexNonSemanticWhitespace(
669
683
context: ParsingContext
670
- ) throws -> AST . Trivia ? {
684
+ ) -> AST . Trivia ? {
671
685
guard context. ignoreWhitespace else { return nil }
672
686
673
687
// FIXME: PCRE only treats space and tab characters as whitespace when
@@ -699,7 +713,7 @@ extension Source {
699
713
if let comment = try lexComment ( context: context) {
700
714
return comment
701
715
}
702
- if let whitespace = try lexNonSemanticWhitespace ( context: context) {
716
+ if let whitespace = lexNonSemanticWhitespace ( context: context) {
703
717
return whitespace
704
718
}
705
719
return nil
@@ -1178,8 +1192,7 @@ extension Source {
1178
1192
}
1179
1193
}
1180
1194
1181
- mutating func lexCustomCCStart(
1182
- ) throws -> Located < CustomCC . Start > ? {
1195
+ mutating func lexCustomCCStart( ) -> Located < CustomCC . Start > ? {
1183
1196
recordLoc { src in
1184
1197
// Make sure we don't have a POSIX character property. This may require
1185
1198
// walking to its ending to make sure we have a closing ':]', as otherwise
@@ -1240,8 +1253,9 @@ extension Source {
1240
1253
1241
1254
private func canLexPOSIXCharacterProperty( ) -> Bool {
1242
1255
do {
1243
- var src = self
1244
- return try src. lexPOSIXCharacterProperty ( ) != nil
1256
+ return try lookahead { src in
1257
+ try src. lexPOSIXCharacterProperty ( ) != nil
1258
+ }
1245
1259
} catch {
1246
1260
// We want to tend on the side of lexing a POSIX character property, so
1247
1261
// even if it is invalid in some way (e.g invalid property names), still
@@ -1394,10 +1408,11 @@ extension Source {
1394
1408
1395
1409
/// Checks whether a numbered reference can be lexed.
1396
1410
private func canLexNumberedReference( ) -> Bool {
1397
- var src = self
1398
- _ = src. tryEat ( anyOf: " + " , " - " )
1399
- guard let next = src. peek ( ) else { return false }
1400
- return RadixKind . decimal. characterFilter ( next)
1411
+ lookahead { src in
1412
+ _ = src. tryEat ( anyOf: " + " , " - " )
1413
+ guard let next = src. peek ( ) else { return false }
1414
+ return RadixKind . decimal. characterFilter ( next)
1415
+ }
1401
1416
}
1402
1417
1403
1418
/// Eat a named reference up to a given closing delimiter.
@@ -1587,53 +1602,55 @@ extension Source {
1587
1602
1588
1603
/// Whether we can lex a group-like reference after the specifier '(?'.
1589
1604
private func canLexGroupLikeReference( ) -> Bool {
1590
- var src = self
1591
- if src. tryEat ( " P " ) {
1592
- return src. tryEat ( anyOf: " = " , " > " ) != nil
1593
- }
1594
- if src. tryEat ( anyOf: " & " , " R " ) != nil {
1595
- return true
1605
+ lookahead { src in
1606
+ if src. tryEat ( " P " ) {
1607
+ return src. tryEat ( anyOf: " = " , " > " ) != nil
1608
+ }
1609
+ if src. tryEat ( anyOf: " & " , " R " ) != nil {
1610
+ return true
1611
+ }
1612
+ return src. canLexNumberedReference ( )
1596
1613
}
1597
- return src. canLexNumberedReference ( )
1598
1614
}
1599
1615
1600
1616
private func canLexMatchingOptionsAsAtom( context: ParsingContext ) -> Bool {
1601
- var src = self
1602
-
1603
- // See if we can lex a matching option sequence that terminates in ')'. Such
1604
- // a sequence is an atom. If an error is thrown, there are invalid elements
1605
- // of the matching option sequence. In such a case, we can lex as a group
1606
- // and diagnose the invalid group kind.
1607
- guard ( try ? src. lexMatchingOptionSequence ( context: context) ) != nil else {
1608
- return false
1617
+ lookahead { src in
1618
+ // See if we can lex a matching option sequence that terminates in ')'.
1619
+ // Such a sequence is an atom. If an error is thrown, there are invalid
1620
+ // elements of the matching option sequence. In such a case, we can lex as
1621
+ // a group and diagnose the invalid group kind.
1622
+ guard ( try ? src. lexMatchingOptionSequence ( context: context) ) != nil else {
1623
+ return false
1624
+ }
1625
+ return src. tryEat ( " ) " )
1609
1626
}
1610
- return src. tryEat ( " ) " )
1611
1627
}
1612
1628
1613
1629
/// Whether a group specifier should be lexed as an atom instead of a group.
1614
1630
private func shouldLexGroupLikeAtom( context: ParsingContext ) -> Bool {
1615
- var src = self
1616
- guard src. tryEat ( " ( " ) else { return false }
1631
+ lookahead { src in
1632
+ guard src. tryEat ( " ( " ) else { return false }
1617
1633
1618
- if src. tryEat ( " ? " ) {
1619
- // The start of a reference '(?P=', '(?R', ...
1620
- if src. canLexGroupLikeReference ( ) { return true }
1634
+ if src. tryEat ( " ? " ) {
1635
+ // The start of a reference '(?P=', '(?R', ...
1636
+ if src. canLexGroupLikeReference ( ) { return true }
1621
1637
1622
- // The start of a PCRE callout.
1623
- if src. tryEat ( " C " ) { return true }
1638
+ // The start of a PCRE callout.
1639
+ if src. tryEat ( " C " ) { return true }
1624
1640
1625
- // The start of an Oniguruma 'of-contents' callout.
1626
- if src. tryEat ( " { " ) { return true }
1641
+ // The start of an Oniguruma 'of-contents' callout.
1642
+ if src. tryEat ( " { " ) { return true }
1627
1643
1628
- // A matching option atom (?x), (?i), ...
1629
- if src. canLexMatchingOptionsAsAtom ( context: context) { return true }
1644
+ // A matching option atom (?x), (?i), ...
1645
+ if src. canLexMatchingOptionsAsAtom ( context: context) { return true }
1646
+
1647
+ return false
1648
+ }
1649
+ // The start of a backreference directive or Oniguruma named callout.
1650
+ if src. tryEat ( " * " ) { return true }
1630
1651
1631
1652
return false
1632
1653
}
1633
- // The start of a backreference directive or Oniguruma named callout.
1634
- if src. tryEat ( " * " ) { return true }
1635
-
1636
- return false
1637
1654
}
1638
1655
1639
1656
/// Consume an escaped atom, starting from after the backslash
@@ -2022,20 +2039,11 @@ extension Source {
2022
2039
return AST . Atom ( kind. value, kind. location)
2023
2040
}
2024
2041
2025
- /// Try to lex the end of a range in a custom character class, which consists
2026
- /// of a '-' character followed by an atom.
2027
- mutating func lexCustomCharClassRangeEnd(
2028
- context: ParsingContext
2029
- ) throws -> ( dashLoc: SourceLocation , AST . Atom ) ? {
2030
- // Make sure we don't have a binary operator e.g '--', and the '-' is not
2031
- // ending the custom character class (in which case it is literal).
2032
- guard peekCCBinOp ( ) == nil , !starts( with: " -] " ) ,
2033
- let dash = tryEatWithLoc ( " - " ) ,
2034
- let end = try lexAtom ( context: context)
2035
- else {
2036
- return nil
2037
- }
2038
- return ( dash, end)
2042
+ /// Try to lex the range operator '-' for a custom character class.
2043
+ mutating func lexCustomCharacterClassRangeOperator( ) -> SourceLocation ? {
2044
+ // Eat a '-', making sure we don't have a binary op such as '--'.
2045
+ guard peekCCBinOp ( ) == nil else { return nil }
2046
+ return tryEatWithLoc ( " - " )
2039
2047
}
2040
2048
2041
2049
/// Try to consume a newline sequence matching option kind.
0 commit comments