@@ -157,6 +157,19 @@ extension Source {
157157 return . init( start ..< currentPosition)
158158 }
159159
160+ /// Attempt to eat a given prefix that satisfies a given predicate, with the
161+ /// source location recorded.
162+ mutating func tryEatLocatedPrefix(
163+ maxLength: Int ? = nil ,
164+ _ f: ( Char ) -> Bool
165+ ) -> Located < String > ? {
166+ let result = recordLoc { src in
167+ src. tryEatPrefix ( maxLength: maxLength, f)
168+ }
169+ guard let result = result else { return nil }
170+ return result. map ( \. string)
171+ }
172+
160173 /// Throws an expected ASCII character error if not matched
161174 mutating func expectASCII( ) throws -> Located < Character > {
162175 try recordLoc { src in
@@ -217,13 +230,13 @@ extension Source {
217230 /// return the scalar value, or throw an error if the string is malformed or
218231 /// would overflow the scalar.
219232 private static func validateUnicodeScalar(
220- _ str: String , _ kind: RadixKind
221- ) throws -> Unicode . Scalar {
222- let num = try validateNumber ( str, UInt32 . self, kind)
233+ _ str: Source . Located < String > , _ kind: RadixKind
234+ ) throws -> AST . Atom . Scalar {
235+ let num = try validateNumber ( str. value , UInt32 . self, kind)
223236 guard let scalar = Unicode . Scalar ( num) else {
224237 throw ParseError . misc ( " Invalid scalar value U+ \( num. hexStr) " )
225238 }
226- return scalar
239+ return . init ( scalar, str . location )
227240 }
228241
229242 /// Try to eat a number of a particular type and radix off the front.
@@ -266,14 +279,15 @@ extension Source {
266279 /// Eat a scalar value from hexadecimal notation off the front
267280 private mutating func expectUnicodeScalar(
268281 numDigits: Int
269- ) throws -> Located < Unicode . Scalar > {
270- try recordLoc { src in
282+ ) throws -> AST . Atom . Scalar {
283+ let str = try recordLoc { src -> String in
271284 let str = src. eat ( upToCount: numDigits) . string
272285 guard str. count == numDigits else {
273286 throw ParseError . expectedNumDigits ( str, numDigits)
274287 }
275- return try Source . validateUnicodeScalar ( str, . hex )
288+ return str
276289 }
290+ return try Source . validateUnicodeScalar ( str, . hex)
277291 }
278292
279293 /// Eat a scalar off the front, starting from after the
@@ -289,49 +303,57 @@ extension Source {
289303 ///
290304 mutating func expectUnicodeScalar(
291305 escapedCharacter base: Character
292- ) throws -> Located < Unicode . Scalar > {
306+ ) throws -> AST . Atom . Kind {
293307 try recordLoc { src in
308+
309+ func nullScalar( ) -> AST . Atom . Kind {
310+ let pos = src. currentPosition
311+ return . scalar( . init( UnicodeScalar ( 0 ) , SourceLocation ( pos ..< pos) ) )
312+ }
313+
294314 // TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
295315 switch base {
296316 // Hex numbers.
297317 case " u " where src. tryEat ( " { " ) , " x " where src. tryEat ( " { " ) :
298- let str = try src. lexUntil ( eating: " } " ) . value
299- return try Source . validateUnicodeScalar ( str, . hex)
318+ let str = try src. lexUntil ( eating: " } " )
319+ return . scalar ( try Source . validateUnicodeScalar ( str, . hex) )
300320
301321 case " x " :
302322 // \x expects *up to* 2 digits.
303- guard let digits = src. tryEatPrefix ( maxLength: 2 , \. isHexDigit) else {
323+ guard let digits = src. tryEatLocatedPrefix ( maxLength: 2 , \. isHexDigit)
324+ else {
304325 // In PCRE, \x without any valid hex digits is \u{0}.
305326 // TODO: This doesn't appear to be followed by ICU or Oniguruma, so
306327 // could be changed to throw an error if we had a parsing mode for
307328 // them.
308- return Unicode . Scalar ( 0 )
329+ return nullScalar ( )
309330 }
310- return try Source . validateUnicodeScalar ( digits. string , . hex)
331+ return . scalar ( try Source . validateUnicodeScalar ( digits, . hex) )
311332
312333 case " u " :
313- return try src. expectUnicodeScalar ( numDigits: 4 ) . value
334+ return . scalar ( try src. expectUnicodeScalar ( numDigits: 4 ) )
314335 case " U " :
315- return try src. expectUnicodeScalar ( numDigits: 8 ) . value
336+ return . scalar ( try src. expectUnicodeScalar ( numDigits: 8 ) )
316337
317338 // Octal numbers.
318339 case " o " where src. tryEat ( " { " ) :
319- let str = try src. lexUntil ( eating: " } " ) . value
320- return try Source . validateUnicodeScalar ( str, . octal)
340+ let str = try src. lexUntil ( eating: " } " )
341+ return . scalar ( try Source . validateUnicodeScalar ( str, . octal) )
321342
322343 case " 0 " :
323344 // We can read *up to* 3 more octal digits.
324345 // FIXME: PCRE can only read up to 2 octal digits, if we get a strict
325346 // PCRE mode, we should limit it here.
326- guard let digits = src. tryEatPrefix ( maxLength: 3 , \. isOctalDigit) else {
327- return Unicode . Scalar ( 0 )
347+ guard let digits = src. tryEatLocatedPrefix ( maxLength: 3 , \. isOctalDigit)
348+ else {
349+ return nullScalar ( )
328350 }
329- return try Source . validateUnicodeScalar ( digits. string , . octal)
351+ return . scalar ( try Source . validateUnicodeScalar ( digits, . octal) )
330352
331353 default :
332354 fatalError ( " Unexpected scalar start " )
333355 }
334- }
356+ } . value
335357 }
336358
337359 /// Try to consume a quantifier
@@ -1153,7 +1175,7 @@ extension Source {
11531175
11541176 // We should either have a unicode scalar.
11551177 if src. tryEat ( sequence: " U+ " ) {
1156- let str = try src. lexUntil ( eating: " } " ) . value
1178+ let str = try src. lexUntil ( eating: " } " )
11571179 return . scalar( try Source . validateUnicodeScalar ( str, . hex) )
11581180 }
11591181
@@ -1581,8 +1603,7 @@ extension Source {
15811603 switch char {
15821604 // Hexadecimal and octal unicode scalars.
15831605 case " u " , " x " , " U " , " o " , " 0 " :
1584- return try . scalar(
1585- src. expectUnicodeScalar ( escapedCharacter: char) . value)
1606+ return try src. expectUnicodeScalar ( escapedCharacter: char)
15861607 default :
15871608 break
15881609 }
0 commit comments