@@ -502,6 +502,12 @@ extension Parser {
502
502
var members : Array < Member > = [ ]
503
503
try parseCCCMembers ( into: & members)
504
504
505
+ // Make sure we have at least one semantic member.
506
+ if members. none ( \. isSemantic) {
507
+ throw Source . LocatedError (
508
+ ParseError . expectedCustomCharacterClassMembers, start. location)
509
+ }
510
+
505
511
// If we have a binary set operator, parse it and the next members. Note
506
512
// that this means we left associate for a chain of operators.
507
513
// TODO: We may want to diagnose and require users to disambiguate, at least
@@ -511,16 +517,12 @@ extension Parser {
511
517
var rhs : Array < Member > = [ ]
512
518
try parseCCCMembers ( into: & rhs)
513
519
514
- if members . none ( \ . isSemantic ) || rhs. none ( \. isSemantic) {
520
+ if rhs. none ( \. isSemantic) {
515
521
throw Source . LocatedError (
516
522
ParseError . expectedCustomCharacterClassMembers, start. location)
517
523
}
518
524
members = [ . setOperation( members, binOp, rhs) ]
519
525
}
520
- if members. none ( \. isSemantic) {
521
- throw Source . LocatedError (
522
- ParseError . expectedCustomCharacterClassMembers, start. location)
523
- }
524
526
try source. expect ( " ] " )
525
527
return CustomCC ( start, members, loc ( start. location. start) )
526
528
}
@@ -550,48 +552,88 @@ extension Parser {
550
552
return nil
551
553
}
552
554
553
- mutating func parseCCCMembers(
554
- into members: inout Array < CustomCC . Member >
555
+ /// Attempt to parse a custom character class range into `members`, or regular
556
+ /// members if a range cannot be formed.
557
+ mutating func parsePotentialCCRange(
558
+ into members: inout [ CustomCC . Member ]
555
559
) throws {
556
- // Parse members until we see the end of the custom char class or an
557
- // operator.
558
- while let member = try parseCCCMember ( ) {
559
- members. append ( member)
560
-
561
- // If we have an atom, we can try to parse a character class range. Each
562
- // time we parse a component of the range, we append to `members` in case
563
- // it ends up not being a range, and we bail. If we succeed in parsing, we
564
- // remove the intermediate members.
565
- if case . atom( let lhs) = member {
566
- let membersBeforeRange = members. count - 1
567
-
568
- while let t = try source. lexTrivia ( context: context) {
569
- members. append ( . trivia( t) )
570
- }
560
+ guard let lhs = members. last, lhs. isSemantic else { return }
561
+
562
+ // Try and see if we can parse a character class range. Each time we parse
563
+ // a component of the range, we append to `members` in case it ends up not
564
+ // being a range, and we bail. If we succeed in parsing, we remove the
565
+ // intermediate members.
566
+ let membersBeforeRange = members. count - 1
567
+ while let t = try source. lexTrivia ( context: context) {
568
+ members. append ( . trivia( t) )
569
+ }
570
+ guard let dash = source. lexCustomCharacterClassRangeOperator ( ) else {
571
+ return
572
+ }
571
573
572
- guard let dash = source. lexCustomCharacterClassRangeOperator ( ) else {
573
- continue
574
- }
575
- // If we can't parse a range, '-' becomes literal, e.g `[6-]`.
576
- members. append ( . atom( . init( . char( " - " ) , dash) ) )
574
+ // If we can't parse a range, '-' becomes literal, e.g `[6-]`.
575
+ members. append ( . atom( . init( . char( " - " ) , dash) ) )
577
576
578
- while let t = try source. lexTrivia ( context: context) {
579
- members. append ( . trivia( t) )
577
+ while let t = try source. lexTrivia ( context: context) {
578
+ members. append ( . trivia( t) )
579
+ }
580
+ guard let rhs = try parseCCCMember ( ) else { return }
581
+ members. append ( rhs)
582
+
583
+ func makeOperand( _ m: CustomCC . Member , isLHS: Bool ) throws -> AST . Atom {
584
+ switch m {
585
+ case . atom( let a) :
586
+ return a
587
+ case . custom:
588
+ // Not supported. While .NET allows `x-[...]` to spell subtraction, we
589
+ // require `x--[...]`. We also ban `[...]-x` for consistency.
590
+ if isLHS {
591
+ throw Source . LocatedError (
592
+ ParseError . invalidCharacterClassRangeOperand, m. location)
593
+ } else {
594
+ throw Source . LocatedError (
595
+ ParseError . unsupportedDotNetSubtraction, m. location)
580
596
}
581
- guard let rhs = try parseCCCMember ( ) else { continue }
582
- members. append ( rhs)
583
-
584
- guard case let . atom( rhs) = rhs else { continue }
585
-
586
- // We've successfully parsed an atom LHS and RHS, so form a range,
587
- // collecting the trivia we've parsed, and replacing the members that
588
- // would have otherwise been added to the custom character class.
589
- let rangeMemberCount = members. count - membersBeforeRange
590
- let trivia = members. suffix ( rangeMemberCount) . compactMap ( \. asTrivia)
591
- members. removeLast ( rangeMemberCount)
592
- members. append ( . range( . init( lhs, dash, rhs, trivia: trivia) ) )
597
+ case . quote:
598
+ // Currently unsupported, we need to figure out what the semantics
599
+ // would be for grapheme/scalar modes.
600
+ throw Source . LocatedError (
601
+ ParseError . unsupported ( " range with quoted sequence " ) , m. location)
602
+ case . trivia:
603
+ throw Unreachable ( " Should have been lexed separately " )
604
+ case . range, . setOperation:
605
+ throw Unreachable ( " Parsed later " )
593
606
}
594
607
}
608
+ let lhsOp = try makeOperand ( lhs, isLHS: true )
609
+ let rhsOp = try makeOperand ( rhs, isLHS: false )
610
+
611
+ // We've successfully parsed an atom LHS and RHS, so form a range,
612
+ // collecting the trivia we've parsed, and replacing the members that
613
+ // would have otherwise been added to the custom character class.
614
+ let rangeMemberCount = members. count - membersBeforeRange
615
+ let trivia = members. suffix ( rangeMemberCount) . compactMap ( \. asTrivia)
616
+ members. removeLast ( rangeMemberCount)
617
+ members. append ( . range( . init( lhsOp, dash, rhsOp, trivia: trivia) ) )
618
+
619
+ // We need to specially check if we can lex a .NET character class
620
+ // subtraction here as e.g `[a-c-[...]]` is allowed in .NET. Otherwise we'd
621
+ // treat the second `-` as literal.
622
+ if let dashLoc = source. canLexDotNetCharClassSubtraction ( context: context) {
623
+ throw Source . LocatedError (
624
+ ParseError . unsupportedDotNetSubtraction, dashLoc)
625
+ }
626
+ }
627
+
628
+ mutating func parseCCCMembers(
629
+ into members: inout Array < CustomCC . Member >
630
+ ) throws {
631
+ // Parse members and ranges until we see the end of the custom char class
632
+ // or an operator.
633
+ while let member = try parseCCCMember ( ) {
634
+ members. append ( member)
635
+ try parsePotentialCCRange ( into: & members)
636
+ }
595
637
}
596
638
}
597
639
0 commit comments