Skip to content

Commit a5cfd46

Browse files
authored
Add Protobuf standard benchmarks (#122)
1 parent 3e493cc commit a5cfd46

File tree

3 files changed

+107
-0
lines changed

3 files changed

+107
-0
lines changed

bench/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
/deps
33
erl_crash.dump
44
benchmarks
5+
/data/datasets.tar.gz
6+
/data/dataset.google_message3*.pb
7+
/data/dataset.google_message4.pb

bench/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,54 @@ Generated benchmarks/output/encode.html
144144
Opened report using open
145145
```
146146

147+
## Protobuf standard benchmarks
148+
149+
Protobuf includes benchmarks for its official language implementations, such as Python, C++
150+
and Golang. They measure average encode and decode throughput for each built-in dataset. This
151+
is useful to check how Elixir matches up with them. You can read more about these benchmarks
152+
[here](https://github.com/protocolbuffers/protobuf/blob/master/benchmarks/README.md).
153+
154+
To run the standard benchmarks for Elixir, download the datasets then run `standard_bench.exs`.
155+
156+
```console
157+
$ mix run script/standard_bench.exs
158+
Message benchmarks.proto2.GoogleMessage1 of dataset file data/dataset.google_message1_proto2.pb
159+
Average throughput for parse_from_benchmark: 18.48 MB/s
160+
Average throughput for serialize_to_benchmark: 6.19 MB/s
161+
162+
Message benchmarks.proto3.GoogleMessage1 of dataset file data/dataset.google_message1_proto3.pb
163+
Average throughput for parse_from_benchmark: 18.4 MB/s
164+
Average throughput for serialize_to_benchmark: 11.1 MB/s
165+
166+
Message benchmarks.proto2.GoogleMessage2 of dataset file data/dataset.google_message2.pb
167+
Average throughput for parse_from_benchmark: 47.82 MB/s
168+
Average throughput for serialize_to_benchmark: 5656.75 MB/s
169+
170+
Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_1.pb
171+
Average throughput for parse_from_benchmark: 19.94 MB/s
172+
Average throughput for serialize_to_benchmark: 45.5 MB/s
173+
174+
Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_2.pb
175+
Average throughput for parse_from_benchmark: 110.65 MB/s
176+
Average throughput for serialize_to_benchmark: 164.96 MB/s
177+
178+
Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_3.pb
179+
Average throughput for parse_from_benchmark: 9.8 MB/s
180+
Average throughput for serialize_to_benchmark: 6.84 MB/s
181+
182+
Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_4.pb
183+
Average throughput for parse_from_benchmark: 5254.14 MB/s
184+
Average throughput for serialize_to_benchmark: 737.71 MB/s
185+
186+
Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_5.pb
187+
Average throughput for parse_from_benchmark: 3.77 MB/s
188+
Average throughput for serialize_to_benchmark: 3.29 MB/s
189+
190+
Message benchmarks.google_message4.GoogleMessage4 of dataset file data/dataset.google_message4.pb
191+
Average throughput for parse_from_benchmark: 20.06 MB/s
192+
Average throughput for serialize_to_benchmark: 32.46 MB/s
193+
```
194+
147195
## Contributing
148196

149197
If you have trouble using the downloaded datasets, they might have been upgraded and their

bench/script/standard_bench.exs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Standard benchmark. Its output is compatible with the built-in benchmarks from
2+
# protobuf for official language implementations, including encoding and decoding
3+
# throughput on each dataset.
4+
#
5+
# Based on Python's implementation:
6+
# https://github.com/protocolbuffers/protobuf/blob/master/benchmarks/python/py_benchmark.py
7+
8+
single = fn fun, inputs ->
9+
Enum.reduce(inputs, 0, fn input, total ->
10+
{time, _result} = :timer.tc(fun, [input])
11+
total + time
12+
end)
13+
end
14+
15+
repeat = fn fun, inputs, reps ->
16+
Enum.reduce(1..reps, 0, fn _, total ->
17+
total + single.(fun, inputs)
18+
end)
19+
end
20+
21+
run = fn fun, inputs ->
22+
target_run_time = 3_000_000
23+
single_run_time = single.(fun, inputs)
24+
25+
with true <- single_run_time < target_run_time,
26+
reps when reps > 1 <- trunc(ceil(target_run_time / single_run_time)) do
27+
repeat.(fun, inputs, reps) / reps
28+
else
29+
_ -> single_run_time
30+
end
31+
end
32+
33+
throughput = fn bytes, microseconds ->
34+
megabytes = bytes / 1_048_576
35+
seconds = microseconds / 1_000_000
36+
Float.round(megabytes / seconds, 2)
37+
end
38+
39+
for file <- Path.wildcard("data/*.pb") do
40+
%{payload: payloads, message_name: mod_name} = ProtoBench.load(file)
41+
module = ProtoBench.mod_name(mod_name)
42+
43+
IO.puts("Message #{mod_name} of dataset file #{file}")
44+
45+
bytes = Enum.reduce(payloads, 0, &(byte_size(&1) + &2))
46+
messages = Enum.map(payloads, &module.decode/1)
47+
48+
parse = throughput.(bytes, run.(&module.decode/1, payloads))
49+
50+
IO.puts("Average throughput for parse_from_benchmark: #{parse} MB/s")
51+
52+
serialize = throughput.(bytes, run.(&module.encode/1, messages))
53+
54+
IO.puts("Average throughput for serialize_to_benchmark: #{serialize} MB/s")
55+
IO.puts("")
56+
end

0 commit comments

Comments
 (0)