88class Lexer (object ):
99 START_IDENTIFIER = set (string .ascii_letters + '_' )
1010 VALID_IDENTIFIER = set (string .ascii_letters + string .digits + '_' )
11- START_NUMBER = set (string .digits + '-' )
1211 VALID_NUMBER = set (string .digits )
1312 WHITESPACE = set (" \t \n \r " )
1413 SIMPLE_TOKENS = {
@@ -63,13 +62,22 @@ def tokenize(self, expression):
6362 yield self ._match_or_else ('&' , 'and' , 'expref' )
6463 elif self ._current == '`' :
6564 yield self ._consume_literal ()
66- elif self ._current in self .START_NUMBER :
65+ elif self ._current in self .VALID_NUMBER :
6766 start = self ._position
68- buff = self ._current
69- while self ._next () in self .VALID_NUMBER :
70- buff += self ._current
67+ buff = self ._consume_number ()
7168 yield {'type' : 'number' , 'value' : int (buff ),
7269 'start' : start , 'end' : start + len (buff )}
70+ elif self ._current == '-' :
71+ # Negative number.
72+ start = self ._position
73+ buff = self ._consume_number ()
74+ if len (buff ) > 1 :
75+ yield {'type' : 'number' , 'value' : int (buff ),
76+ 'start' : start , 'end' : start + len (buff )}
77+ else :
78+ raise LexerError (lexer_position = start ,
79+ lexer_value = buff ,
80+ message = "Unknown token '%s'" % buff )
7381 elif self ._current == '"' :
7482 yield self ._consume_quoted_identifier ()
7583 elif self ._current == '<' :
@@ -87,6 +95,13 @@ def tokenize(self, expression):
8795 yield {'type' : 'eof' , 'value' : '' ,
8896 'start' : self ._length , 'end' : self ._length }
8997
98+ def _consume_number (self ):
99+ start = self ._position
100+ buff = self ._current
101+ while self ._next () in self .VALID_NUMBER :
102+ buff += self ._current
103+ return buff
104+
90105 def _initialize_for_expression (self , expression ):
91106 if not expression :
92107 raise EmptyExpressionError ()
0 commit comments