|
1 | | -use logos::Logos; |
| 1 | +use nom::{ |
| 2 | + IResult, Parser, |
| 3 | + branch::alt, |
| 4 | + bytes::complete::{tag, take_while}, |
| 5 | + character::complete::{multispace0, satisfy}, |
| 6 | + combinator::{map, opt, recognize, value}, |
| 7 | + multi::many0, |
| 8 | + sequence::{pair, preceded}, |
| 9 | +}; |
2 | 10 |
|
3 | | -#[derive(Logos, Debug, PartialEq, Clone)] |
4 | | -#[logos(skip r"[ \t\r\n\f]+")] |
| 11 | +#[derive(Debug, PartialEq, Clone)] |
5 | 12 | pub enum Token { |
6 | | - #[token(":")] |
7 | 13 | Colon, |
8 | | - |
9 | | - #[token("::")] |
10 | 14 | DoubleColon, |
11 | | - |
12 | | - #[token("<")] |
13 | 15 | OpenAngle, |
14 | | - |
15 | | - #[token(">")] |
16 | 16 | CloseAngle, |
17 | | - |
18 | | - #[token("=")] |
19 | 17 | EqualSign, |
20 | | - |
21 | | - #[regex(r"\(\s*[a-zA-Z_][a-zA-Z0-9_]*\s*,\s*[a-zA-Z_][a-zA-Z0-9_]*\s*\)", |lex| lex.slice().to_string())] |
22 | | - #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())] |
| 18 | + OpenBracket, |
| 19 | + ClosedBracket, |
23 | 20 | Identifier(String), |
24 | | - |
25 | | - #[regex(r"jet::[a-zA-Z0-9_]?", priority = 2)] |
26 | | - #[token("jet::", priority = 1)] |
27 | 21 | Jet, |
28 | 22 | } |
| 23 | + |
| 24 | +fn parse_symbol(input: &str) -> IResult<&str, Token> { |
| 25 | + let mut parser = alt(( |
| 26 | + value(Token::DoubleColon, tag("::")), |
| 27 | + value(Token::Colon, tag(":")), |
| 28 | + value(Token::OpenBracket, tag("(")), |
| 29 | + value(Token::ClosedBracket, tag(")")), |
| 30 | + value(Token::OpenAngle, tag("<")), |
| 31 | + value(Token::CloseAngle, tag(">")), |
| 32 | + value(Token::EqualSign, tag("=")), |
| 33 | + )); |
| 34 | + parser.parse(input) |
| 35 | +} |
| 36 | + |
| 37 | +fn parse_jet(input: &str) -> IResult<&str, Token> { |
| 38 | + let mut parser = value( |
| 39 | + Token::Jet, |
| 40 | + recognize(pair( |
| 41 | + tag("jet::"), |
| 42 | + opt(take_while(|c: char| c.is_alphanumeric() || c == '_')), |
| 43 | + )), |
| 44 | + ); |
| 45 | + parser.parse(input) |
| 46 | +} |
| 47 | + |
| 48 | +fn parse_identifier(input: &str) -> IResult<&str, Token> { |
| 49 | + let mut parser = map( |
| 50 | + recognize(pair( |
| 51 | + satisfy(|c| c.is_alphabetic() || c == '_'), |
| 52 | + take_while(|c: char| c.is_alphanumeric() || c == '_'), |
| 53 | + )), |
| 54 | + |s: &str| Token::Identifier(s.to_string()), |
| 55 | + ); |
| 56 | + parser.parse(input) |
| 57 | +} |
| 58 | + |
| 59 | +pub fn lex_tokens(input: &str) -> IResult<&str, Vec<Token>> { |
| 60 | + let mut parser = many0(preceded( |
| 61 | + multispace0, |
| 62 | + alt((parse_jet, parse_symbol, parse_identifier)), |
| 63 | + )); |
| 64 | + parser.parse(input) |
| 65 | +} |
0 commit comments