|
1 | 1 |  |
2 | 2 |
|
3 | 3 | # Protobuf Decoder |
4 | | -Simple protobuf decoder for python |
5 | 4 |
|
| 5 | +Simple protobuf decoder for python |
6 | 6 |
|
7 | 7 | # Motivation |
| 8 | + |
8 | 9 | The goal of this project is decode protobuf binary without proto files |
9 | 10 |
|
10 | 11 | # Installation |
| 12 | + |
11 | 13 | Install using pip |
12 | 14 |
|
13 | 15 | `pip install protobuf-decoder` |
14 | 16 |
|
15 | 17 | # Simple Examples |
16 | | -``` |
| 18 | + |
| 19 | +``` python |
17 | 20 | """ |
18 | | - # proto |
19 | | - message Test1 { |
20 | | - string a = 1; |
21 | | - } |
22 | | -
|
23 | | - # message |
24 | | - { |
25 | | - "a": "테스트" |
26 | | - } |
27 | | -
|
28 | | - # binary |
29 | | - 0A 09 ED 85 8C EC 8A A4 ED 8A B8 |
| 21 | +# proto |
| 22 | +message Test1 { |
| 23 | + string a = 1; |
| 24 | +} |
| 25 | +
|
| 26 | +# message |
| 27 | +{ |
| 28 | + "a": "테스트" |
| 29 | +} |
| 30 | +
|
| 31 | +# binary |
| 32 | +0A 09 ED 85 8C EC 8A A4 ED 8A B8 |
30 | 33 | """ |
31 | 34 | from protobuf_decoder.protobuf_decoder import Parser |
32 | 35 |
|
33 | 36 | test_target = "0A 09 ED 85 8C EC 8A A4 ED 8A B8" |
34 | 37 | parsed_data = Parser().parse(test_target) |
35 | | ->> parsed_data |
36 | | ->> [ParsedResult(field=1, wire_type="string", data='테스트')] |
| 38 | +assert parsed_data == ParsedResults([ParsedResult(field=1, wire_type="string", data='테스트')]) |
| 39 | +assert parsed_data.to_dict() == {'results': [{'field': 1, 'wire_type': 'string', 'data': '테스트'}]} |
37 | 40 | ``` |
38 | 41 |
|
39 | | - |
40 | | -``` |
| 42 | +``` python |
41 | 43 | """ |
42 | | - # proto |
43 | | - message Test1 { |
44 | | - int32 a = 1; |
45 | | - } |
46 | | -
|
47 | | - message Test2 { |
48 | | - Test1 b = 3; |
49 | | - } |
50 | | -
|
51 | | - # message |
52 | | - { |
53 | | - "a": { |
54 | | - "b": 150 |
55 | | - } |
56 | | - } |
57 | | -
|
58 | | - # binary |
59 | | - 1a 03 08 96 01 |
| 44 | +# proto |
| 45 | +message Test1 { |
| 46 | + int32 a = 1; |
| 47 | +} |
| 48 | +
|
| 49 | +message Test2 { |
| 50 | + Test1 b = 3; |
| 51 | +} |
| 52 | +
|
| 53 | +# message |
| 54 | +{ |
| 55 | + "a": { |
| 56 | + "b": 150 |
| 57 | + } |
| 58 | +} |
| 59 | +
|
| 60 | +# binary |
| 61 | +1a 03 08 96 01 |
60 | 62 | """ |
61 | 63 | from protobuf_decoder.protobuf_decoder import Parser |
62 | 64 |
|
63 | 65 | test_target = "1a 03 08 96 01" |
64 | 66 | parsed_data = Parser().parse(test_target) |
| 67 | +assert parsed_data == ParsedResults( |
| 68 | + [ |
| 69 | + ParsedResult(field=3, wire_type="length_delimited", data=ParsedResults([ |
| 70 | + ParsedResult(field=1, wire_type="varint", data=150) |
| 71 | + ])) |
| 72 | + ]) |
| 73 | +assert parsed_data.to_dict() == {'results': [{'field': 3, 'wire_type': 'length_delimited', 'data': { |
| 74 | + 'results': [{'field': 1, 'wire_type': 'varint', 'data': 150}]}}]} |
65 | 75 |
|
66 | | ->> parsed_data |
67 | | ->> [ParsedResult(field=3, wire_type="length_delimited", data=[ParsedResult(field=1, wire_type="varint", data=150)])] |
68 | 76 | ``` |
69 | 77 |
|
70 | | -``` |
| 78 | +``` python |
71 | 79 | """ |
72 | | - # proto |
73 | | - message Test1 { |
74 | | - required string a = 1; |
75 | | - } |
| 80 | +# proto |
| 81 | +message Test1 { |
| 82 | + required string a = 1; |
| 83 | +} |
76 | 84 |
|
77 | | - # message |
78 | | - { |
79 | | - "a": "✊" |
80 | | - } |
| 85 | +# message |
| 86 | +{ |
| 87 | + "a": "✊" |
| 88 | +} |
81 | 89 |
|
82 | | - # binary |
83 | | - 0A 03 E2 9C 8A |
| 90 | +# binary |
| 91 | +0A 03 E2 9C 8A |
84 | 92 |
|
85 | | - """ |
| 93 | +""" |
86 | 94 | from protobuf_decoder.protobuf_decoder import Parser |
87 | 95 |
|
88 | 96 | test_target = "0A 03 E2 9C 8A" |
89 | 97 | parsed_data = Parser().parse(test_target) |
90 | | ->> parsed_data |
91 | | ->> [ParsedResult(field=1, wire_type="string", data='✊')] |
| 98 | +assert parsed_data == ParsedResults([ParsedResult(field=1, wire_type="string", data='✊')]) |
| 99 | +assert parsed_data.to_dict() == {'results': [{'field': 1, 'wire_type': 'string', 'data': '✊'}]} |
| 100 | + |
92 | 101 | ``` |
93 | 102 |
|
94 | 103 | # Nested Protobuf Detection Logic |
95 | | -Our project implements a distinct method to determine whether a given input is possibly a nested protobuf. |
96 | | -The core of this logic is the `is_maybe_nested_protobuf` function. |
| 104 | + |
| 105 | +Our project implements a distinct method to determine whether a given input is possibly a nested protobuf. |
| 106 | +The core of this logic is the `is_maybe_nested_protobuf` function. |
97 | 107 | We recently enhanced this function to provide a more accurate distinction and handle nested protobufs effectively. |
98 | 108 |
|
99 | 109 | ### Current Logic |
| 110 | + |
100 | 111 | The `is_maybe_nested_protobuf` function works by: |
101 | 112 |
|
102 | 113 | - Attempting to convert the given hex string to UTF-8. |
103 | 114 | - Checking the ordinal values of the first four characters of the converted data. |
104 | 115 | - Returning `True` if the data might be a nested protobuf based on certain conditions, otherwise returning False. |
105 | 116 |
|
106 | 117 | ### Extensibility |
107 | | -You can extend or modify the `is_maybe_nested_protobuf` function based on your specific requirements or use-cases. |
108 | | -If you find a scenario where the current logic can be further improved, |
| 118 | + |
| 119 | +You can extend or modify the `is_maybe_nested_protobuf` function based on your specific requirements or use-cases. |
| 120 | +If you find a scenario where the current logic can be further improved, |
109 | 121 | feel free to adapt the function accordingly. |
110 | 122 |
|
111 | 123 | (A big shoutout to **@fuzzyrichie** for their significant contributions to this update!) |
112 | 124 |
|
| 125 | +# Remain Bytes |
| 126 | + |
| 127 | +If there are remaining bytes after parsing, the parser will return the remaining bytes as a string. |
| 128 | + |
| 129 | +```python |
| 130 | +from protobuf_decoder.protobuf_decoder import Parser |
| 131 | + |
| 132 | +test_target = "ed 85 8c ec 8a a4 ed 8a b8" |
| 133 | +parsed_data = Parser().parse(test_target) |
| 134 | + |
| 135 | +assert parsed_data.has_results is False |
| 136 | +assert parsed_data.has_remain_data is True |
| 137 | + |
| 138 | +assert parsed_data.remain_data == "ed 85 8c ec 8a a4 ed 8a b8" |
| 139 | +assert parsed_data.to_dict() == {'results': [], 'remain_data': 'ed 85 8c ec 8a a4 ed 8a b8', } |
| 140 | + |
| 141 | +``` |
| 142 | + |
113 | 143 |
|
114 | 144 | # Reference |
| 145 | + |
115 | 146 | - [Google protocol-buffers encoding document](https://developers.google.com/protocol-buffers/docs/encoding) |
0 commit comments