@@ -502,6 +502,12 @@ extension Parser {
502502 var members : Array < Member > = [ ]
503503 try parseCCCMembers ( into: & members)
504504
505+ // Make sure we have at least one semantic member.
506+ if members. none ( \. isSemantic) {
507+ throw Source . LocatedError (
508+ ParseError . expectedCustomCharacterClassMembers, start. location)
509+ }
510+
505511 // If we have a binary set operator, parse it and the next members. Note
506512 // that this means we left associate for a chain of operators.
507513 // TODO: We may want to diagnose and require users to disambiguate, at least
@@ -511,16 +517,12 @@ extension Parser {
511517 var rhs : Array < Member > = [ ]
512518 try parseCCCMembers ( into: & rhs)
513519
514- if members . none ( \ . isSemantic ) || rhs. none ( \. isSemantic) {
520+ if rhs. none ( \. isSemantic) {
515521 throw Source . LocatedError (
516522 ParseError . expectedCustomCharacterClassMembers, start. location)
517523 }
518524 members = [ . setOperation( members, binOp, rhs) ]
519525 }
520- if members. none ( \. isSemantic) {
521- throw Source . LocatedError (
522- ParseError . expectedCustomCharacterClassMembers, start. location)
523- }
524526 try source. expect ( " ] " )
525527 return CustomCC ( start, members, loc ( start. location. start) )
526528 }
@@ -550,48 +552,88 @@ extension Parser {
550552 return nil
551553 }
552554
553- mutating func parseCCCMembers(
554- into members: inout Array < CustomCC . Member >
555+ /// Attempt to parse a custom character class range into `members`, or regular
556+ /// members if a range cannot be formed.
557+ mutating func parsePotentialCCRange(
558+ into members: inout [ CustomCC . Member ]
555559 ) throws {
556- // Parse members until we see the end of the custom char class or an
557- // operator.
558- while let member = try parseCCCMember ( ) {
559- members. append ( member)
560-
561- // If we have an atom, we can try to parse a character class range. Each
562- // time we parse a component of the range, we append to `members` in case
563- // it ends up not being a range, and we bail. If we succeed in parsing, we
564- // remove the intermediate members.
565- if case . atom( let lhs) = member {
566- let membersBeforeRange = members. count - 1
567-
568- while let t = try source. lexTrivia ( context: context) {
569- members. append ( . trivia( t) )
570- }
560+ guard let lhs = members. last, lhs. isSemantic else { return }
561+
562+ // Try and see if we can parse a character class range. Each time we parse
563+ // a component of the range, we append to `members` in case it ends up not
564+ // being a range, and we bail. If we succeed in parsing, we remove the
565+ // intermediate members.
566+ let membersBeforeRange = members. count - 1
567+ while let t = try source. lexTrivia ( context: context) {
568+ members. append ( . trivia( t) )
569+ }
570+ guard let dash = source. lexCustomCharacterClassRangeOperator ( ) else {
571+ return
572+ }
571573
572- guard let dash = source. lexCustomCharacterClassRangeOperator ( ) else {
573- continue
574- }
575- // If we can't parse a range, '-' becomes literal, e.g `[6-]`.
576- members. append ( . atom( . init( . char( " - " ) , dash) ) )
574+ // If we can't parse a range, '-' becomes literal, e.g `[6-]`.
575+ members. append ( . atom( . init( . char( " - " ) , dash) ) )
577576
578- while let t = try source. lexTrivia ( context: context) {
579- members. append ( . trivia( t) )
577+ while let t = try source. lexTrivia ( context: context) {
578+ members. append ( . trivia( t) )
579+ }
580+ guard let rhs = try parseCCCMember ( ) else { return }
581+ members. append ( rhs)
582+
583+ func makeOperand( _ m: CustomCC . Member , isLHS: Bool ) throws -> AST . Atom {
584+ switch m {
585+ case . atom( let a) :
586+ return a
587+ case . custom:
588+ // Not supported. While .NET allows `x-[...]` to spell subtraction, we
589+ // require `x--[...]`. We also ban `[...]-x` for consistency.
590+ if isLHS {
591+ throw Source . LocatedError (
592+ ParseError . invalidCharacterClassRangeOperand, m. location)
593+ } else {
594+ throw Source . LocatedError (
595+ ParseError . unsupportedDotNetSubtraction, m. location)
580596 }
581- guard let rhs = try parseCCCMember ( ) else { continue }
582- members. append ( rhs)
583-
584- guard case let . atom( rhs) = rhs else { continue }
585-
586- // We've successfully parsed an atom LHS and RHS, so form a range,
587- // collecting the trivia we've parsed, and replacing the members that
588- // would have otherwise been added to the custom character class.
589- let rangeMemberCount = members. count - membersBeforeRange
590- let trivia = members. suffix ( rangeMemberCount) . compactMap ( \. asTrivia)
591- members. removeLast ( rangeMemberCount)
592- members. append ( . range( . init( lhs, dash, rhs, trivia: trivia) ) )
597+ case . quote:
598+ // Currently unsupported, we need to figure out what the semantics
599+ // would be for grapheme/scalar modes.
600+ throw Source . LocatedError (
601+ ParseError . unsupported ( " range with quoted sequence " ) , m. location)
602+ case . trivia:
603+ throw Unreachable ( " Should have been lexed separately " )
604+ case . range, . setOperation:
605+ throw Unreachable ( " Parsed later " )
593606 }
594607 }
608+ let lhsOp = try makeOperand ( lhs, isLHS: true )
609+ let rhsOp = try makeOperand ( rhs, isLHS: false )
610+
611+ // We've successfully parsed an atom LHS and RHS, so form a range,
612+ // collecting the trivia we've parsed, and replacing the members that
613+ // would have otherwise been added to the custom character class.
614+ let rangeMemberCount = members. count - membersBeforeRange
615+ let trivia = members. suffix ( rangeMemberCount) . compactMap ( \. asTrivia)
616+ members. removeLast ( rangeMemberCount)
617+ members. append ( . range( . init( lhsOp, dash, rhsOp, trivia: trivia) ) )
618+
619+ // We need to specially check if we can lex a .NET character class
620+ // subtraction here as e.g `[a-c-[...]]` is allowed in .NET. Otherwise we'd
621+ // treat the second `-` as literal.
622+ if let dashLoc = source. canLexDotNetCharClassSubtraction ( context: context) {
623+ throw Source . LocatedError (
624+ ParseError . unsupportedDotNetSubtraction, dashLoc)
625+ }
626+ }
627+
628+ mutating func parseCCCMembers(
629+ into members: inout Array < CustomCC . Member >
630+ ) throws {
631+ // Parse members and ranges until we see the end of the custom char class
632+ // or an operator.
633+ while let member = try parseCCCMember ( ) {
634+ members. append ( member)
635+ try parsePotentialCCRange ( into: & members)
636+ }
595637 }
596638}
597639
0 commit comments