@@ -116,19 +116,28 @@ def to_dict(self):
116116@dataclass
117117class ParsedResults :
118118 results : List [ParsedResult ]
119+ remain_data : str = None
119120
120121 @property
121122 def has_results (self ):
122123 return len (self .results ) > 0
123124
125+ @property
126+ def has_remain_data (self ):
127+ return self .remain_data is not None
128+
124129 def __getitem__ (self , item ):
125130 return self .results [item ]
126131
127132 def to_dict (self ):
128133 results = [result .to_dict () for result in self .results ]
129- return dict (
134+ dict_results = dict (
130135 results = results ,
131136 )
137+ if self .has_remain_data :
138+ dict_results ["remain_data" ] = self .remain_data
139+
140+ return dict_results
132141
133142
134143class State (Enum ):
@@ -197,6 +206,44 @@ def hex_string_to_utf8(cls, string) -> str:
197206 string = string .replace (" " , "" )
198207 return binascii .unhexlify (string ).decode ("utf-8" )
199208
209+ @classmethod
210+ def chunk_to_hex_string (cls , chunk ) -> str :
211+ return hex (chunk )[2 :].zfill (2 )
212+
213+ @classmethod
214+ def change_endian (cls , string ) -> str :
215+ is_valid , valid_string = cls .validate (string )
216+ if not is_valid :
217+ raise ValueError ("Invalid hex format" )
218+
219+ _output = []
220+
221+ _chunk_buffer = []
222+ for chunk in cls .get_chunked_list (valid_string ):
223+ _chunk_buffer .append (chunk )
224+ if len (_chunk_buffer ) == 2 :
225+ _chunk_buffer .reverse ()
226+ for _chunk in _chunk_buffer :
227+ _output .append (_chunk )
228+ _chunk_buffer = []
229+
230+ for _chunk in _chunk_buffer :
231+ _output .append (_chunk )
232+
233+ return " " .join (_output )
234+
235+ @classmethod
236+ def show_parsed_results (cls , parsed_results : ParsedResults , depth = 0 , print_func = print ):
237+ if parsed_results .has_results :
238+ for result in parsed_results .results :
239+ if isinstance (result .data , ParsedResults ):
240+ print_func ("\t " * depth , f"[{ result .field } : { result .wire_type } ] =>" )
241+ cls .show_parsed_results (result .data , depth + 1 )
242+ else :
243+ print_func ("\t " * depth , f"[{ result .field } : { result .wire_type } ] => { result .data } " )
244+ if parsed_results .has_remain_data :
245+ print_func ("\t " * depth , f"left over bytes: { parsed_results .remain_data } " )
246+
200247
201248class BytesBuffer :
202249 def __init__ (self ):
@@ -254,17 +301,57 @@ def fetch_32bits(self):
254301 self .set_data_length (4 + 1 )
255302
256303
304+ class RemainChunkTransaction :
305+ def __init__ (self ):
306+ self ._is_done = True
307+ self ._remain_hex_string_list = []
308+
309+ def consume_chunk (self , chunk ):
310+ self ._remain_hex_string_list .append (
311+ Utils .chunk_to_hex_string (chunk )
312+ )
313+
314+ def flush_chunk (self ):
315+ self ._remain_hex_string_list = []
316+
317+ def start (self ):
318+ self ._is_done = False
319+
320+ def done (self ):
321+ self ._is_done = True
322+ self .flush_chunk ()
323+
324+ @property
325+ def is_done (self ):
326+ return self ._is_done
327+
328+ @property
329+ def remain_hex_string_list (self ):
330+ return self ._remain_hex_string_list
331+
332+ @property
333+ def remain_hex_string (self ):
334+ return " " .join (self ._remain_hex_string_list )
335+
336+ @property
337+ def has_remain_data (self ):
338+ return len (self ._remain_hex_string_list ) > 0
339+
340+
257341class Parser :
258- def __init__ (self , nexted_depth : int = 0 ):
342+ def __init__ (self , nexted_depth : int = 0 , strict : bool = False ):
259343 self ._nested_depth = nexted_depth
260344 self ._buffer = BytesBuffer ()
261345 self ._fetcher = Fetcher ()
262346 self ._target_field = None
263347 self ._parsed_data : List [ParsedResult ] = []
264348 self ._state = State .FIND_FIELD
349+ self ._is_strict = strict
350+
351+ self ._t = RemainChunkTransaction ()
265352
266353 def _create_nested_parser (self ) -> Parser :
267- return Parser (nexted_depth = self ._nested_depth + 1 )
354+ return Parser (nexted_depth = self ._nested_depth + 1 , strict = self . _is_strict )
268355
269356 @staticmethod
270357 def _has_next (chunk_bytes ) -> bool :
@@ -294,6 +381,8 @@ def _handler_find_field(self, chunk):
294381 if self ._has_next (chunk ):
295382 return self ._next_buffer_handler (value )
296383
384+ self ._t .start ()
385+
297386 self ._buffer .append (value )
298387 bit_value = self ._get_buffered_value ()
299388 wire_type , field = self ._parse_wire_type (bit_value )
@@ -314,7 +403,10 @@ def _handler_find_field(self, chunk):
314403 elif wire_type == WireType .EGROUP .value :
315404 self ._state = State .TERMINATED
316405 else :
406+ if self ._is_strict :
407+ raise AssertionError (f"Invalid wire_type: { wire_type } " )
317408 self ._state = State .TERMINATED
409+
318410 self ._buffer .flush ()
319411
320412 def _parse_varint_handler (self , chunk ):
@@ -334,6 +426,7 @@ def _parse_varint_handler(self, chunk):
334426
335427 self ._state = State .FIND_FIELD
336428 self ._buffer .flush ()
429+ self ._t .done ()
337430
338431 def _parse_fixed_handler (self , chunk ):
339432 self ._next_buffer_handler (chunk )
@@ -354,6 +447,7 @@ def _parse_fixed_handler(self, chunk):
354447 self ._state = State .FIND_FIELD
355448 self ._buffer .flush ()
356449 self ._fetcher .seek ()
450+ self ._t .done ()
357451
358452 def _zero_length_delimited_handler (self ):
359453 self ._parsed_data .append (
@@ -365,6 +459,7 @@ def _zero_length_delimited_handler(self):
365459 )
366460 self ._state = State .FIND_FIELD
367461 self ._buffer .flush ()
462+ self ._t .done ()
368463
369464 def _parse_length_delimited_handler (self , chunk ):
370465 value = self ._get_value (chunk )
@@ -379,6 +474,7 @@ def _parse_length_delimited_handler(self, chunk):
379474 self ._fetcher .set_data_length (data_length )
380475 self ._state = State .GET_DELIMITED_DATA
381476 self ._buffer .flush ()
477+ self ._t .done ()
382478
383479 def _next_get_delimited_data_handler (self , value ):
384480 self ._fetcher .fetch ()
@@ -439,9 +535,16 @@ def _get_delimited_data_handler(self, chunk):
439535 self ._buffer .flush ()
440536 self ._fetcher .seek ()
441537 self ._state = State .FIND_FIELD
538+ self ._t .done ()
442539
443540 def _create_parsed_results (self ) -> ParsedResults :
444- return ParsedResults (results = self ._parsed_data )
541+ if not self ._t .has_remain_data :
542+ return ParsedResults (results = self ._parsed_data )
543+
544+ return ParsedResults (
545+ results = self ._parsed_data ,
546+ remain_data = self ._t .remain_hex_string
547+ )
445548
446549 def parse (self , test_target ) -> ParsedResults :
447550 if test_target == "" :
@@ -454,6 +557,8 @@ def parse(self, test_target) -> ParsedResults:
454557 for hex_chunk in Utils .get_chunked_list (validate_string ):
455558 chunk = Utils .hex_string_to_decimal (hex_chunk )
456559
560+ self ._t .consume_chunk (chunk )
561+
457562 if self ._state == State .FIND_FIELD :
458563 self ._handler_find_field (chunk )
459564
@@ -478,8 +583,12 @@ def parse(self, test_target) -> ParsedResults:
478583 continue
479584
480585 elif self ._state == State .TERMINATED :
481- return self ._create_parsed_results ()
586+ pass
587+
482588 else :
483589 raise ValueError (f"Unsupported State { self ._state } " )
484590
591+ if self ._is_strict :
592+ assert self ._t .is_done , "parsing process is not done, Maybe invalid protobuf"
593+
485594 return self ._create_parsed_results ()
0 commit comments