Merge branch 'lexer-error-msg' into develop

jamesls · jamesls · commit 15fa803a6a17 · 2015-10-20T21:37:18.000-07:00
* lexer-error-msg:
  Raise LexerError on invalid numbers

Conflicts:
	tests/compliance/syntax.json
diff --git a/jmespath/lexer.py b/jmespath/lexer.py
@@ -8,7 +8,6 @@
 class Lexer(object):
     START_IDENTIFIER = set(string.ascii_letters + '_')
     VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
-    START_NUMBER = set(string.digits + '-')
     VALID_NUMBER = set(string.digits)
     WHITESPACE = set(" \t\n\r")
     SIMPLE_TOKENS = {
@@ -63,13 +62,22 @@ def tokenize(self, expression):
                 yield self._match_or_else('&', 'and', 'expref')
             elif self._current == '`':
                 yield self._consume_literal()
-            elif self._current in self.START_NUMBER:
+            elif self._current in self.VALID_NUMBER:
                 start = self._position
-                buff = self._current
-                while self._next() in self.VALID_NUMBER:
-                    buff += self._current
+                buff = self._consume_number()
                 yield {'type': 'number', 'value': int(buff),
                        'start': start, 'end': start + len(buff)}
+            elif self._current == '-':
+                # Negative number.
+                start = self._position
+                buff = self._consume_number()
+                if len(buff) > 1:
+                    yield {'type': 'number', 'value': int(buff),
+                           'start': start, 'end': start + len(buff)}
+                else:
+                    raise LexerError(lexer_position=start,
+                                     lexer_value=buff,
+                                     message="Unknown token '%s'" % buff)
             elif self._current == '"':
                 yield self._consume_quoted_identifier()
             elif self._current == '<':
@@ -87,6 +95,13 @@ def tokenize(self, expression):
         yield {'type': 'eof', 'value': '',
                'start': self._length, 'end': self._length}
 
+    def _consume_number(self):
+        start = self._position
+        buff = self._current
+        while self._next() in self.VALID_NUMBER:
+            buff += self._current
+        return buff
+
     def _initialize_for_expression(self, expression):
         if not expression:
             raise EmptyExpressionError()
diff --git a/tests/compliance/syntax.json b/tests/compliance/syntax.json
@@ -103,6 +103,10 @@
       {
         "expression": "!",
         "error": "syntax"
+      },
+      {
+        "expression": "foo-bar",
+        "error": "syntax"
       }
     ]
   },
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
@@ -144,13 +144,17 @@ def test_adds_quotes_when_invalid_json(self):
         )
 
     def test_unknown_character(self):
-        with self.assertRaises(LexerError):
+        with self.assertRaises(LexerError) as e:
             tokens = list(self.lexer.tokenize('foo[0^]'))
 
     def test_bad_first_character(self):
         with self.assertRaises(LexerError):
             tokens = list(self.lexer.tokenize('^foo[0]'))
 
+    def test_unknown_character_with_identifier(self):
+        with self.assertRaisesRegexp(LexerError, "Unknown token"):
+            list(self.lexer.tokenize('foo-bar'))
+
 
 if __name__ == '__main__':
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -103,6 +103,10 @@`
`103`	`103`	`{`
`104`	`104`	`"expression": "!",`
`105`	`105`	`"error": "syntax"`
	`106`	`+ },`
	`107`	`+ {`
	`108`	`+ "expression": "foo-bar",`
	`109`	`+ "error": "syntax"`
`106`	`110`	`}`
`107`	`111`	`]`
`108`	`112`	`},`