Skip to content

Commit fd782f3

Browse files
committed
Implementations tested & benchmarked with Catch2 too; refactoring of swar benchmarks to share between Google benchmark and Catch2
1 parent dc03288 commit fd782f3

File tree

6 files changed

+157
-10
lines changed

6 files changed

+157
-10
lines changed

benchmark/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ add_subdirectory(dependencies/google_benchmark)
2323

2424
add_executable(
2525
catch2Benchmark
26-
catch2BenchmarkMain.cpp catch2Functions.cpp egyptian.cpp
26+
catch2BenchmarkMain.cpp catch2Functions.cpp catch2swar-demo.cpp
27+
atoi.cpp
28+
egyptian.cpp
2729
RobinHood.benchmark.cpp
2830
)
2931

@@ -32,7 +34,7 @@ add_executable(
3234
)
3335

3436
add_executable(
35-
zoo-atoi-benchmark benchmark_main.cpp atoi.cpp
37+
zoo-atoi-benchmark benchmark_main.cpp bm-swar.cpp atoi.cpp
3638
)
3739

3840
target_link_libraries(zoo-google-benchmark benchmark::benchmark)

benchmark/atoi-corpus.h

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#include <vector>
2+
#include <string>
3+
#include <random>
4+
5+
struct Corpus8DecimalDigits {
6+
std::vector<int> asNumbers_;
7+
std::string characters_;
8+
9+
Corpus8DecimalDigits(std::vector<int> aNs, std::string cs):
10+
asNumbers_(aNs),
11+
characters_(cs)
12+
{}
13+
14+
template<typename G>
15+
static auto makeCorpus(G &generator) {
16+
auto count = 1031; // 1031 is a prime number, this helps to disable in
17+
// practice the branch predictor, the idea is to measure the performance
18+
// of the code under measurement, not how the the unrealistic conditions
19+
// of microbenchmarking help/hurt the code under measurement
20+
std::string allCharacters;
21+
allCharacters.resize(count * 9);
22+
std::vector<int> inputs;
23+
std::uniform_int_distribution<> range(0, 100*1000*1000 - 1);
24+
char *base = allCharacters.data();
25+
for(;;) {
26+
auto input = range(generator);
27+
snprintf(base, 9, "%08d", input);
28+
inputs.push_back(input);
29+
if(--count) { break; }
30+
base += 9;
31+
}
32+
return Corpus8DecimalDigits(inputs, allCharacters);
33+
}
34+
35+
struct Iterator {
36+
Corpus8DecimalDigits *thy;
37+
int *ip;
38+
char *cp;
39+
40+
Iterator &operator++() {
41+
++ip;
42+
cp += 9;
43+
return *this;
44+
}
45+
46+
char *operator*() {
47+
return cp;
48+
}
49+
50+
auto next() noexcept {
51+
++(*this);
52+
return cp != thy->characters_.data() + thy->characters_.size();
53+
}
54+
};
55+
56+
Iterator commence() {
57+
return { this, asNumbers_.data(), characters_.data() };
58+
}
59+
};
60+
61+
#define PARSE8BYTES_CORPUS_X_LIST \
62+
X(Lemire, parse_eight_digits_swar)\
63+
X(Zoo, lemire_as_zoo_swar)\
64+
X(LIBC, atoi)
65+
66+
#define X(Typename, FunctionToCall) \
67+
struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } };
68+
69+
PARSE8BYTES_CORPUS_X_LIST
70+
#undef X

benchmark/atoi.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
#include "zoo/swar/SWAR.h"
22
#include "zoo/swar/associative_iteration.h"
33

4-
#include <benchmark/benchmark.h>
5-
64
#include <stdint.h>
75
#include <string.h>
86
#include <stdlib.h>
97

108
// Copied from Daniel Lemire's GitHub at
9+
// https://lemire.me/blog/2018/10/03/quickly-parsing-eight-digits/
1110
// https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/ddb082981228f7256e9a4dbbf56fd4a335d78e30/2018/10/03/eightchartoi.c#L26C1-L34C2
1211

13-
uint32_t parse_eight_digits_swar(const unsigned char *chars) {
12+
uint32_t parse_eight_digits_swar(const char *chars) {
1413
uint64_t val;
1514
memcpy(&val, chars, 8);
1615
val = val - 0x3030303030303030;
@@ -22,8 +21,6 @@ uint32_t parse_eight_digits_swar(const unsigned char *chars) {
2221

2322
// Note: eight digits can represent from 0 to (10^9) - 1, the logarithm base 2
2423
// of 10^9 is slightly less than 30, thus, only 30 bits are needed.
25-
template<typename> struct Trick;
26-
2724
auto lemire_as_zoo_swar(const char *chars) {
2825
uint64_t bytes;
2926
memcpy(&bytes, chars, 8);
@@ -40,12 +37,15 @@ auto lemire_as_zoo_swar(const char *chars) {
4037
* BASE256 10*B 10*C 10*D 10*E 10*F 10*G 10*H 0
4138
* --------------------------------------
4239
* BASE256 A+10B ....................... G+10H H
40+
* See that the odd-digits (base256) contain 10*odd + even
41+
* Then, we can use base(2^16) digits, and base(2^32) to
42+
* calculate the conversion for the digits in 100s and 10,000s
4343
*/
4444
auto by11base256 = convertedToIntegers.multiply(256*10 + 1);
4545
auto bytePairs = zoo::swar::doublePrecision(by11base256).odd;
4646
static_assert(std::is_same_v<decltype(bytePairs), zoo::swar::SWAR<16, uint64_t>>);
47-
auto by101base2to16 = bytePairs.multiply(1 + 100 << 16);
47+
auto by101base2to16 = bytePairs.multiply(1 + (100 << 16));
4848
auto byteQuads = zoo::swar::doublePrecision(by101base2to16).odd;
49-
auto by10001base2to32 = zoo::swar::doublePrecision(by101base2to16).odd;
50-
return uint32_t(zoo::swar::doublePrecision(by10001base2to32).odd.value());
49+
auto by10001base2to32 = byteQuads.multiply(1 + (10000ull << 32));
50+
return uint32_t(by10001base2to32.value() >> 32);
5151
}

benchmark/atoi.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#include "stdint.h"
2+
3+
uint32_t parse_eight_digits_swar(const char *chars);
4+
uint32_t lemire_as_zoo_swar(const char *chars);

benchmark/bm-swar.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include "atoi.h"
2+
#include "atoi-corpus.h"
3+
4+
#include <benchmark/benchmark.h>
5+
6+
int g_SideEffect = 0;
7+
8+
template<typename Corpus, typename Callable>
9+
void goOverCorpus(Corpus &c, Callable &&cc) {
10+
auto iterator = c.commence();
11+
auto result = g_SideEffect;
12+
do {
13+
result ^= cc(*iterator);
14+
} while(iterator.next());
15+
g_SideEffect = result;
16+
}
17+
18+
template<typename CorpusMaker, typename Callable>
19+
void runBenchmark(benchmark::State &s) {
20+
std::random_device rd;
21+
std::mt19937 g(rd());
22+
auto corpus = CorpusMaker::makeCorpus(g);
23+
Callable function;
24+
for(auto _: s) {
25+
goOverCorpus(corpus, function);
26+
}
27+
}
28+
29+
#define X(Typename, FunctionToCall) \
30+
BENCHMARK(runBenchmark<Corpus8DecimalDigits, Invoke##Typename>);
31+
32+
PARSE8BYTES_CORPUS_X_LIST
33+
#undef X
34+

benchmark/catch2swar-demo.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include "atoi.h"
2+
#include "atoi-corpus.h"
3+
4+
#define CATCH_CONFIG_ENABLE_BENCHMARKING
5+
#include "catch2/catch.hpp"
6+
7+
TEST_CASE("Atoi benchmarks", "[atoi][swar]") {
8+
auto traverse =
9+
[](auto &&corpus, auto &&function, auto rv) {
10+
auto iterator = corpus.commence();
11+
do {
12+
rv ^= function(*iterator);
13+
} while(iterator.next());
14+
return rv;
15+
};
16+
std::random_device rd;
17+
auto seed = rd();
18+
CAPTURE(seed);
19+
std::mt19937 g(seed);
20+
auto corpus = Corpus8DecimalDigits::makeCorpus(g);
21+
#define X(Type, Fun) \
22+
auto from##Type = traverse(corpus, Invoke##Type{}, 0);
23+
PARSE8BYTES_CORPUS_X_LIST
24+
#undef X
25+
REQUIRE(fromLemire == fromZoo);
26+
REQUIRE(fromLIBC == fromZoo);
27+
28+
auto haveTheRoleOfMemoryBarrier = -1;
29+
#define X(Type, Fun) \
30+
BENCHMARK(#Type) { \
31+
return \
32+
traverse(corpus, Invoke##Type{}, haveTheRoleOfMemoryBarrier); \
33+
};
34+
PARSE8BYTES_CORPUS_X_LIST
35+
#undef X
36+
}
37+

0 commit comments

Comments
 (0)