Skip to content

Commit f129574

Browse files
authored
Demos of the SWAR library (#69)
Remaining comments are just nits, merging for unblock. * VSCode interaction * Builds with update * Zooification of Lemire 8 bytes * Benchmarks operative * Implementations tested & benchmarked with Catch2 too; refactoring of swar benchmarks to share between Google benchmark and Catch2 * Draft of strlen * Strlen implemented * Improvement in strlen, SIMD strlen, improved CMake to support AVX in Xcode --------- Co-authored-by: Eddie <eddie see email elsewhere>
1 parent 9e46478 commit f129574

File tree

10 files changed

+532
-14
lines changed

10 files changed

+532
-14
lines changed

benchmark/CMakeLists.txt

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,64 @@
1-
cmake_minimum_required (VERSION 2.6)
1+
cmake_minimum_required (VERSION 3.8)
22

3-
set(CMAKE_CXX_FLAGS "-std=c++17 -g3 -fno-unroll-loops")
3+
project(ZooBenchmark)
44

5-
set(CMAKE_BUILD_TYPE "Release")
65

7-
project(ZooBenchmark)
6+
set(CMAKE_CXX_FLAGS "-fno-unroll-loops")
7+
8+
set(CMAKE_CXX_STANDARD 17)
9+
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
10+
11+
if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
12+
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
13+
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -march=native -fno-unroll-loops")
14+
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -march=native -fno-unroll-loops")
15+
endif()
16+
endif()
17+
18+
# Macro to set properties for Xcode targets
19+
macro(set_xcode_properties TARGET_NAME)
20+
if(CMAKE_GENERATOR STREQUAL Xcode)
21+
set_target_properties(${TARGET_NAME} PROPERTIES
22+
XCODE_ATTRIBUTE_ENABLE_AVX YES
23+
XCODE_ATTRIBUTE_ENABLE_AVX2 YES
24+
XCODE_ATTRIBUTE_OTHER_CPLUSPLUSFLAGS "-mavx -mavx2"
25+
XCODE_ATTRIBUTE_OTHER_CFLAGS "-mavx -mavx2"
26+
)
27+
endif()
28+
endmacro()
829

930
include_directories(
1031
../test/inc
11-
../../benchmark/include
32+
dependencies/google_benchmark/include
1233
../inc
1334
../junkyard/inc
1435
../test/third_party/Catch2/single_include
1536
)
1637

1738
# build google benchmark (target: benchmark)
1839
# do not build tests of benchmarking lib
19-
# set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE)
40+
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE)
41+
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type" FORCE)
42+
add_subdirectory(dependencies/google_benchmark)
2043

21-
# add_subdirectory(dependencies/google_benchmark)
44+
add_executable(
45+
catch2Benchmark
46+
catch2BenchmarkMain.cpp catch2Functions.cpp catch2swar-demo.cpp
47+
atoi.cpp
48+
egyptian.cpp
49+
# RobinHood.benchmark.cpp
50+
)
51+
set_xcode_properties(catch2Benchmark)
2252

53+
add_executable(
54+
zoo-google-benchmark benchmark_main.cpp cfs.cpp cfs/cfs_utility.cpp
55+
)
56+
set_xcode_properties(zoo-google-benchmark)
2357

2458
add_executable(
25-
catch2Benchmark
26-
catch2BenchmarkMain.cpp catch2Functions.cpp egyptian.cpp
27-
RobinHood.benchmark.cpp
59+
zoo-atoi-benchmark benchmark_main.cpp bm-swar.cpp atoi.cpp
2860
)
61+
set_xcode_properties(zoo-atoi-benchmark)
62+
63+
target_link_libraries(zoo-google-benchmark benchmark::benchmark)
64+
target_link_libraries(zoo-atoi-benchmark benchmark::benchmark)

benchmark/atoi-corpus.h

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#include "atoi.h"
2+
3+
#include <vector>
4+
#include <string>
5+
#include <cstring>
6+
#include <random>
7+
8+
struct Corpus8DecimalDigits {
9+
std::vector<int> asNumbers_;
10+
std::string characters_;
11+
12+
Corpus8DecimalDigits(std::vector<int> aNs, std::string cs):
13+
asNumbers_(aNs),
14+
characters_(cs)
15+
{}
16+
17+
template<typename G>
18+
static auto makeCorpus(G &generator) {
19+
auto count = 1031; // 1031 is a prime number, this helps to disable in
20+
// practice the branch predictor, the idea is to measure the performance
21+
// of the code under measurement, not how the the unrealistic conditions
22+
// of microbenchmarking help/hurt the code under measurement
23+
std::string allCharacters;
24+
allCharacters.resize(count * 9);
25+
std::vector<int> inputs;
26+
std::uniform_int_distribution<> range(0, 100*1000*1000 - 1);
27+
char *base = allCharacters.data();
28+
for(;;) {
29+
auto input = range(generator);
30+
snprintf(base, 9, "%08d", input);
31+
inputs.push_back(input);
32+
if(--count) { break; }
33+
base += 9;
34+
}
35+
return Corpus8DecimalDigits(inputs, allCharacters);
36+
}
37+
38+
struct Iterator {
39+
Corpus8DecimalDigits *thy;
40+
int *ip;
41+
char *cp;
42+
43+
Iterator &operator++() {
44+
++ip;
45+
cp += 9;
46+
return *this;
47+
}
48+
49+
char *operator*() {
50+
return cp;
51+
}
52+
53+
auto next() noexcept {
54+
++(*this);
55+
return cp != thy->characters_.data() + thy->characters_.size();
56+
}
57+
};
58+
59+
Iterator commence() {
60+
return { this, asNumbers_.data(), characters_.data() };
61+
}
62+
};
63+
64+
#define PARSE8BYTES_CORPUS_X_LIST \
65+
X(Lemire, parse_eight_digits_swar)\
66+
X(Zoo, lemire_as_zoo_swar)\
67+
X(LIBC, atoi)
68+
69+
struct CorpusStringLength {
70+
std::vector<int> skips_;
71+
std::string characters_;
72+
73+
CorpusStringLength(std::vector<int> &&skips, std::string &&cs):
74+
skips_{std::move(skips)}, characters_{std::move(cs)}
75+
{}
76+
77+
template<typename G>
78+
static auto makeCorpus(G &generator) {
79+
auto count = 1031; // see Corpus8DecimalDigits for why 1031
80+
std::vector<int> sizes;
81+
std::string allCharacters;
82+
std::uniform_int_distribution<> strSize(0, 101); // again a prime
83+
std::uniform_int_distribution<> characters(1, 255); // notice 0 excluded
84+
85+
while(count--) {
86+
auto length = strSize(generator);
87+
sizes.push_back(length);
88+
for(auto i = length; i--; ) {
89+
allCharacters.append(1, characters(generator));
90+
}
91+
allCharacters.append(1, '\0');
92+
}
93+
return CorpusStringLength(std::move(sizes), std::move(allCharacters));
94+
}
95+
96+
struct Iterator {
97+
int *skips, *sentinel;
98+
char *cp;
99+
100+
Iterator &operator++() {
101+
cp += *skips++;
102+
return *this;
103+
}
104+
105+
char *operator*() {
106+
return cp;
107+
}
108+
109+
auto next() noexcept {
110+
++(*this);
111+
return sentinel != skips;
112+
}
113+
};
114+
115+
Iterator commence() {
116+
return {
117+
skips_.data(), skips_.data() + skips_.size(), characters_.data()
118+
};
119+
}
120+
};
121+
122+
#define STRLEN_CORPUS_X_LIST \
123+
X(LIBC_STRLEN, strlen) \
124+
X(ZOO_STRLEN, zoo::c_strLength) \
125+
X(ZOO_NATURAL_STRLEN, zoo::c_strLength_natural) \
126+
X(ZOO_MANUAL_STRLEN, zoo::c_strLength_manualComparison) \
127+
X(ZOO_AVX, zoo::avx2_strlen) \
128+
X(GENERIC_GLIBC_STRLEN, STRLEN_old)
129+
130+
#define X(Typename, FunctionToCall) \
131+
struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } };
132+
133+
PARSE8BYTES_CORPUS_X_LIST
134+
STRLEN_CORPUS_X_LIST
135+
#undef X

0 commit comments

Comments
 (0)