@@ -30,6 +30,7 @@ pub struct TokenStream<'a> {
3030 position : Pos ,
3131 off : usize ,
3232 next_state : Option < ( usize , Token < ' a > , usize , Pos ) > ,
33+ recursion_limit : usize ,
3334}
3435
3536#[ derive( Clone , Debug , PartialEq ) ]
@@ -53,7 +54,7 @@ impl<'a> StreamOnce for TokenStream<'a> {
5354 }
5455 }
5556 let old_pos = self . off ;
56- let ( kind, len) = self . peek_token ( ) ?;
57+ let ( kind, len) = self . take_token ( ) ?;
5758 let value = & self . buf [ self . off -len..self . off ] ;
5859 self . skip_whitespace ( ) ;
5960 let token = Token { kind, value } ;
@@ -125,17 +126,34 @@ fn check_float(value: &str, exponent: Option<usize>, real: Option<usize>)
125126
126127impl < ' a > TokenStream < ' a > {
127128 pub fn new ( s : & str ) -> TokenStream {
129+ Self :: with_recursion_limit ( s, 50 )
130+ }
131+
132+ /// Specify a limit to recursive parsing. Note that increasing the limit
133+ /// from the default may represent a security issue since a maliciously
134+ /// crafted input may cause a stack overflow, crashing the process.
135+ pub ( crate ) fn with_recursion_limit ( s : & str , recursion_limit : usize ) -> TokenStream {
128136 let mut me = TokenStream {
129137 buf : s,
130138 position : Pos { line : 1 , column : 1 } ,
131139 off : 0 ,
132140 next_state : None ,
141+ recursion_limit
133142 } ;
134143 me. skip_whitespace ( ) ;
135144 me
136145 }
137146
138- fn peek_token ( & mut self )
147+ /// Convenience for the common case where a token does
148+ /// not span multiple lines. Infallible.
149+ #[ inline]
150+ fn advance_token < T > ( & mut self , kind : Kind , size : usize ) -> Result < ( Kind , usize ) , T > {
151+ self . position . column += size;
152+ self . off += size;
153+ Ok ( ( kind, size) )
154+ }
155+
156+ fn take_token ( & mut self )
139157 -> Result < ( Kind , usize ) , Error < Token < ' a > , Token < ' a > > >
140158 {
141159 use self :: Kind :: * ;
@@ -146,19 +164,32 @@ impl<'a> TokenStream<'a> {
146164 } ;
147165
148166 match cur_char {
149- '!' | '$' | ':' | '=' | '@' | '|' |
150- '(' | ')' | '[' | ']' | '{' | '}' | '&' => {
151- self . position . column += 1 ;
152- self . off += 1 ;
153-
154- Ok ( ( Punctuator , 1 ) )
167+ '(' | '[' | '{' => {
168+ // Check for recursion limit
169+ self . recursion_limit = self . recursion_limit
170+ . checked_sub ( 1 )
171+ . ok_or_else ( || Error :: message_static_message ( "Recursion limit exceeded" ) ) ?;
172+
173+ self . advance_token ( Punctuator , 1 )
174+ } ,
175+ ')' | ']' | '}' => {
176+ // Notes on exceptional cases:
177+ // recursion_limit may exceed the original value specified
178+ // when constructing the Tokenizer. It may at first
179+ // seem like this would be a good place to handle that,
180+ // but instead this code allows this token to propagate up
181+ // to the parser which is better equipped to make specific
182+ // error messages about unmatched pairs.
183+ // The case where recursion limit would overflow but instead
184+ // saturates is just a specific case of the more general
185+ // occurrence above.
186+ self . recursion_limit = self . recursion_limit . saturating_add ( 1 ) ;
187+ self . advance_token ( Punctuator , 1 )
155188 }
189+ '!' | '$' | ':' | '=' | '@' | '|' | '&' => self . advance_token ( Punctuator , 1 ) ,
156190 '.' => {
157191 if iter. as_str ( ) . starts_with ( ".." ) {
158- self . position . column += 3 ;
159- self . off += 3 ;
160-
161- Ok ( ( Punctuator , 3 ) )
192+ self . advance_token ( Punctuator , 3 )
162193 } else {
163194 Err (
164195 Error :: unexpected_message (
@@ -172,11 +203,7 @@ impl<'a> TokenStream<'a> {
172203 while let Some ( ( idx, cur_char) ) = iter. next ( ) {
173204 match cur_char {
174205 '_' | 'a' ..='z' | 'A' ..='Z' | '0' ..='9' => continue ,
175- _ => {
176- self . position . column += idx;
177- self . off += idx;
178- return Ok ( ( Name , idx) ) ;
179- }
206+ _ => return self . advance_token ( Name , idx) ,
180207 }
181208 }
182209 let len = self . buf . len ( ) - self . off ;
@@ -227,10 +254,7 @@ impl<'a> TokenStream<'a> {
227254 )
228255 ) ;
229256 }
230- self . position . column += len;
231- self . off += len;
232-
233- Ok ( ( IntValue , len) )
257+ self . advance_token ( IntValue , len)
234258 }
235259 }
236260 '"' => {
0 commit comments