99
1010use crate :: peg:: { PairExt , PairsExt , PartiQLParser , Rule } ;
1111use crate :: prelude:: * ;
12+ use bigdecimal:: BigDecimal ;
13+ use num_bigint:: BigInt ;
14+ use num_traits:: Num ;
1215use pest:: iterators:: Pair ;
13- use pest:: Parser ;
16+ use pest:: { Parser , RuleType } ;
1417use std:: borrow:: Cow ;
1518
1619/// The parsed content associated with a [`Token`] that has been scanned.
1720#[ derive( Clone , Debug , Eq , PartialEq ) ]
1821pub enum Content < ' val > {
1922 /// A PartiQL keyword. Contains the slice for the keyword case folded to upper case.
2023 Keyword ( Cow < ' val , str > ) ,
24+
2125 /// An identifier. Contains the slice for the text of the identifier.
2226 Identifier ( Cow < ' val , str > ) ,
27+
28+ /// An integer literal. Stores this as an as a [`BigInt`].
29+ ///
30+ /// Users will likely deal with smaller integers and encode this in execution/compilation
31+ /// as `i64` or the like, but the parser need not deal with that detail.
32+ IntegerLiteral ( BigInt ) ,
33+
34+ /// A decimal literal. Contains the parsed [`BigDecimal`] for the literal.
35+ DecimalLiteral ( BigDecimal ) ,
36+
2337 /// A string literal. Contains the slice for the content of the literal.
2438 StringLiteral ( Cow < ' val , str > ) ,
2539 // TODO things like literals, punctuation, etc.
@@ -123,6 +137,18 @@ fn normalize_quoted_ident(raw_text: &str) -> Cow<str> {
123137 . into ( )
124138}
125139
140+ fn parse_num < T , R , E > ( pair : Pair < R > ) -> ParserResult < T >
141+ where
142+ T : Num < FromStrRadixErr = E > ,
143+ R : RuleType ,
144+ E : std:: fmt:: Display ,
145+ {
146+ match T :: from_str_radix ( pair. as_str ( ) , 10 ) {
147+ Ok ( value) => Ok ( value) ,
148+ Err ( e) => pair. syntax_error ( format ! ( "Could not parse number {}: {}" , pair. as_str( ) , e) ) ,
149+ }
150+ }
151+
126152impl < ' val > PartiQLScanner < ' val > {
127153 fn do_next_token ( & mut self ) -> ParserResult < Token < ' val > > {
128154 // the scanner rule is expected to return a single node
@@ -144,10 +170,20 @@ impl<'val> PartiQLScanner<'val> {
144170 Rule :: QuotedIdentifier => {
145171 Content :: Identifier ( normalize_quoted_ident ( ident_pair. as_str ( ) ) )
146172 }
147- _ => return ident_pair. syntax_error ( ) ,
173+ _ => return ident_pair. unexpected ( ) ,
174+ }
175+ }
176+ Rule :: Number => {
177+ let number_pair = pair. into_inner ( ) . exactly_one ( ) ?;
178+ match number_pair. as_rule ( ) {
179+ Rule :: Integer => Content :: IntegerLiteral ( parse_num ( number_pair) ?) ,
180+ Rule :: Decimal | Rule :: DecimalExp => {
181+ Content :: DecimalLiteral ( parse_num ( number_pair) ?)
182+ }
183+ _ => return number_pair. unexpected ( ) ,
148184 }
149185 }
150- _ => return pair. syntax_error ( ) ,
186+ _ => return pair. unexpected ( ) ,
151187 } ;
152188
153189 Ok ( Token {
@@ -292,7 +328,7 @@ mod test {
292328 ]
293329 ) ]
294330 #[ case:: bad_identifier(
295- " 99ranch " ,
331+ " 💩 " ,
296332 vec![
297333 syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 9 ) ) ,
298334 ]
@@ -349,6 +385,174 @@ mod test {
349385 syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 32 ) ) ,
350386 ]
351387 ) ]
388+ #[ case:: numeric_literals(
389+ "1 -0099 1.1 +00055.023100 99.1234e0010" ,
390+ vec![
391+ Ok ( Token {
392+ content: Content :: IntegerLiteral ( 1 . into( ) ) ,
393+ start: LineAndColumn :: at( 1 , 1 ) ,
394+ end: LineAndColumn :: at( 1 , 2 ) ,
395+ text: "1" ,
396+ remainder: Remainder {
397+ input: " -0099 1.1 +00055.023100 99.1234e0010" ,
398+ offset: LineAndColumn :: at( 1 , 2 )
399+ }
400+ } ) ,
401+ Ok ( Token {
402+ content: Content :: IntegerLiteral ( BigInt :: from( -99 ) ) ,
403+ start: LineAndColumn :: at( 1 , 3 ) ,
404+ end: LineAndColumn :: at( 1 , 8 ) ,
405+ text: "-0099" ,
406+ remainder: Remainder {
407+ input: " 1.1 +00055.023100 99.1234e0010" ,
408+ offset: LineAndColumn :: at( 1 , 8 )
409+ }
410+ } ) ,
411+ Ok ( Token {
412+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "1.1" , 10 ) . unwrap( ) ) ,
413+ start: LineAndColumn :: at( 1 , 9 ) ,
414+ end: LineAndColumn :: at( 1 , 12 ) ,
415+ text: "1.1" ,
416+ remainder: Remainder {
417+ input: " +00055.023100 99.1234e0010" ,
418+ offset: LineAndColumn :: at( 1 , 12 )
419+ }
420+ } ) ,
421+ Ok ( Token {
422+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "55.023100" , 10 ) . unwrap( ) ) ,
423+ start: LineAndColumn :: at( 1 , 13 ) ,
424+ end: LineAndColumn :: at( 1 , 26 ) ,
425+ text: "+00055.023100" ,
426+ remainder: Remainder {
427+ input: " 99.1234e0010" ,
428+ offset: LineAndColumn :: at( 1 , 26 )
429+ }
430+ } ) ,
431+ Ok ( Token {
432+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "99.1234e10" , 10 ) . unwrap( ) ) ,
433+ start: LineAndColumn :: at( 1 , 27 ) ,
434+ end: LineAndColumn :: at( 1 , 39 ) ,
435+ text: "99.1234e0010" ,
436+ remainder: Remainder {
437+ input: "" ,
438+ offset: LineAndColumn :: at( 1 , 39 )
439+ }
440+ } ) ,
441+ syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 39 ) ) ,
442+ ]
443+ ) ]
444+ #[ case:: numeric_literals_with_pads(
445+ "+0005 .0001 -00.0002 000003.004E+001" ,
446+ vec![
447+ Ok ( Token {
448+ content: Content :: IntegerLiteral ( 5 . into( ) ) ,
449+ start: LineAndColumn :: at( 1 , 1 ) ,
450+ end: LineAndColumn :: at( 1 , 6 ) ,
451+ text: "+0005" ,
452+ remainder: Remainder {
453+ input: " .0001 -00.0002 000003.004E+001" ,
454+ offset: LineAndColumn :: at( 1 , 6 )
455+ }
456+ } ) ,
457+ Ok ( Token {
458+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0001" , 10 ) . unwrap( ) ) ,
459+ start: LineAndColumn :: at( 1 , 7 ) ,
460+ end: LineAndColumn :: at( 1 , 12 ) ,
461+ text: ".0001" ,
462+ remainder: Remainder {
463+ input: " -00.0002 000003.004E+001" ,
464+ offset: LineAndColumn :: at( 1 , 12 )
465+ }
466+ } ) ,
467+ Ok ( Token {
468+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "-0.0002" , 10 ) . unwrap( ) ) ,
469+ start: LineAndColumn :: at( 1 , 13 ) ,
470+ end: LineAndColumn :: at( 1 , 21 ) ,
471+ text: "-00.0002" ,
472+ remainder: Remainder {
473+ input: " 000003.004E+001" ,
474+ offset: LineAndColumn :: at( 1 , 21 )
475+ }
476+ } ) ,
477+ Ok ( Token {
478+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "3.004e1" , 10 ) . unwrap( ) ) ,
479+ start: LineAndColumn :: at( 1 , 22 ) ,
480+ end: LineAndColumn :: at( 1 , 37 ) ,
481+ text: "000003.004E+001" ,
482+ remainder: Remainder {
483+ input: "" ,
484+ offset: LineAndColumn :: at( 1 , 37 )
485+ }
486+ } ) ,
487+ syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 37 ) ) ,
488+ ]
489+ ) ]
490+ #[ case:: zeroes(
491+ "0 000 .0 000.000 .0e0 0.0e000" ,
492+ vec![
493+ Ok ( Token {
494+ content: Content :: IntegerLiteral ( 0 . into( ) ) ,
495+ start: LineAndColumn :: at( 1 , 1 ) ,
496+ end: LineAndColumn :: at( 1 , 2 ) ,
497+ text: "0" ,
498+ remainder: Remainder {
499+ input: " 000 .0 000.000 .0e0 0.0e000" ,
500+ offset: LineAndColumn :: at( 1 , 2 )
501+ }
502+ } ) ,
503+ Ok ( Token {
504+ content: Content :: IntegerLiteral ( 0 . into( ) ) ,
505+ start: LineAndColumn :: at( 1 , 3 ) ,
506+ end: LineAndColumn :: at( 1 , 6 ) ,
507+ text: "000" ,
508+ remainder: Remainder {
509+ input: " .0 000.000 .0e0 0.0e000" ,
510+ offset: LineAndColumn :: at( 1 , 6 )
511+ }
512+ } ) ,
513+ Ok ( Token {
514+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0" , 10 ) . unwrap( ) ) ,
515+ start: LineAndColumn :: at( 1 , 7 ) ,
516+ end: LineAndColumn :: at( 1 , 9 ) ,
517+ text: ".0" ,
518+ remainder: Remainder {
519+ input: " 000.000 .0e0 0.0e000" ,
520+ offset: LineAndColumn :: at( 1 , 9 )
521+ }
522+ } ) ,
523+ Ok ( Token {
524+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.000" , 10 ) . unwrap( ) ) ,
525+ start: LineAndColumn :: at( 1 , 10 ) ,
526+ end: LineAndColumn :: at( 1 , 17 ) ,
527+ text: "000.000" ,
528+ remainder: Remainder {
529+ input: " .0e0 0.0e000" ,
530+ offset: LineAndColumn :: at( 1 , 17 )
531+ }
532+ } ) ,
533+ Ok ( Token {
534+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0" , 10 ) . unwrap( ) ) ,
535+ start: LineAndColumn :: at( 1 , 18 ) ,
536+ end: LineAndColumn :: at( 1 , 22 ) ,
537+ text: ".0e0" ,
538+ remainder: Remainder {
539+ input: " 0.0e000" ,
540+ offset: LineAndColumn :: at( 1 , 22 )
541+ }
542+ } ) ,
543+ Ok ( Token {
544+ content: Content :: DecimalLiteral ( BigDecimal :: from_str_radix( "0.0" , 10 ) . unwrap( ) ) ,
545+ start: LineAndColumn :: at( 1 , 23 ) ,
546+ end: LineAndColumn :: at( 1 , 30 ) ,
547+ text: "0.0e000" ,
548+ remainder: Remainder {
549+ input: "" ,
550+ offset: LineAndColumn :: at( 1 , 30 )
551+ }
552+ } ) ,
553+ syntax_error( "IGNORED MESSAGE" , Position :: at( 1 , 30 ) ) ,
554+ ]
555+ ) ]
352556 #[ case:: select_from(
353557 r#"SelEct '✨✨✨' fROM "┬─┬" "# ,
354558 vec![
0 commit comments