Skip to content

Commit 51db77c

Browse files
committed
Strlen implemented
1 parent 7a8d1d6 commit 51db77c

File tree

6 files changed

+139
-30
lines changed

6 files changed

+139
-30
lines changed

benchmark/CMakeLists.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
cmake_minimum_required (VERSION 3.8)
22

3+
project(ZooBenchmark)
4+
5+
36
set(CMAKE_CXX_FLAGS "-fno-unroll-loops")
47

58
set(CMAKE_CXX_STANDARD 17)
69
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
710

8-
project(ZooBenchmark)
11+
if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
12+
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
13+
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -march=native -fno-unroll-loops")
14+
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -march=native -fno-unroll-loops")
15+
endif()
16+
endif()
917

1018
include_directories(
1119
../test/inc
@@ -26,7 +34,7 @@ add_executable(
2634
catch2BenchmarkMain.cpp catch2Functions.cpp catch2swar-demo.cpp
2735
atoi.cpp
2836
egyptian.cpp
29-
RobinHood.benchmark.cpp
37+
# RobinHood.benchmark.cpp
3038
)
3139

3240
add_executable(

benchmark/atoi-corpus.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ struct CorpusStringLength {
8282
std::uniform_int_distribution<> strSize(0, 101); // again a prime
8383
std::uniform_int_distribution<> characters(1, 255); // notice 0 excluded
8484

85-
for(;;) {
85+
while(count--) {
8686
auto length = strSize(generator);
8787
sizes.push_back(length);
8888
for(auto i = length; i--; ) {
@@ -121,8 +121,9 @@ struct CorpusStringLength {
121121

122122
#define STRLEN_CORPUS_X_LIST \
123123
X(LIBC_STRLEN, strlen) \
124-
X(ZOO_BEST_STRLEN, zoo::c_strLength) \
125-
X(ZOO_NATURAL_STRLEN, zoo::c_strLength_MoreNaturalButSlightlyWorse)
124+
X(ZOO_NATURAL_STRLEN, zoo::c_strLength) \
125+
X(ZOO_MANUAL_STRLEN, zoo::c_strLength_ManualComparison) \
126+
X(GENERIC_GLIBC_STRLEN, STRLEN_old)
126127

127128
#define X(Typename, FunctionToCall) \
128129
struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } };

benchmark/atoi.cpp

Lines changed: 88 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,35 +59,110 @@ std::size_t c_strLength(const char *s) {
5959
constexpr auto MSBs = S{S::MostSignificantBit};
6060
for(auto base = s;; base += 8) {
6161
memcpy(&bytes.m_v, base, 8);
62-
// A null byte is detected in two steps:
63-
// 1. it has the MSB off, and
64-
// the least significant bits are also off.
65-
// The swar library allows the detection of lsbs off
66-
// By comparing greater equal to 0,
67-
// 0 can only be greater-equal to a byte with LSBs 0
68-
auto haveMSB_cleared = bytes ^ MSBs;
69-
auto lsbNulls = zoo::swar::greaterEqual_MSB_off(S{0}, bytes);
70-
auto nulls = swar::asBooleanSWAR(haveMSB_cleared & lsbNulls);
71-
if(nulls) {
62+
auto nulls = zoo::swar::equals(bytes, S{0});
63+
if(nulls) { // there is a null!
7264
auto firstNullIndex = nulls.lsbIndex();
7365
return firstNullIndex + (base - s);
7466
}
7567
}
7668
}
7769

78-
std::size_t c_strLength_MoreNaturalButSlightlyWorse(const char *s) {
70+
std::size_t c_strLength_ManualComparison(const char *s) {
7971
std::size_t rv = 0;
8072
using S = swar::SWAR<8, std::size_t>;
8173
S bytes;
8274
constexpr auto MSBs = S{S::MostSignificantBit};
8375
for(auto base = s;; base += 8) {
8476
memcpy(&bytes.m_v, base, 8);
85-
auto nulls = zoo::swar::equals(bytes, S{0});
86-
if(nulls) { // there is a null!
77+
// A null byte is detected in two steps:
78+
// 1. it has the MSB off, and
79+
// the least significant bits are also off.
80+
// The swar library allows the detection of lsbs off
81+
// By comparing greater equal to 0,
82+
// 0 can only be greater-equal to a byte with LSBs 0
83+
auto haveMSB_cleared = bytes ^ MSBs;
84+
auto lsbNulls = zoo::swar::greaterEqual_MSB_off(S{0}, bytes & ~MSBs);
85+
auto nulls = swar::asBooleanSWAR(haveMSB_cleared & lsbNulls);
86+
if(nulls) {
8787
auto firstNullIndex = nulls.lsbIndex();
8888
return firstNullIndex + (base - s);
8989
}
9090
}
9191
}
9292

9393
}
94+
95+
/// \brief This is the last non-platform specific "generic" strlen in GLibC.
96+
/// Taken from https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=string/strlen.c;hb=6d7e8eda9b85b08f207a6dc6f187e94e4817270f
97+
/// that dates to 2023-01-06 (a year ago at the time of writing)
98+
std::size_t
99+
STRLEN_old (const char *str)
100+
{
101+
const char *char_ptr;
102+
const unsigned long int *longword_ptr;
103+
unsigned long int longword, himagic, lomagic;
104+
105+
/* Handle the first few characters by reading one character at a time.
106+
Do this until CHAR_PTR is aligned on a longword boundary. */
107+
for (char_ptr = str; ((unsigned long int) char_ptr
108+
& (sizeof (longword) - 1)) != 0;
109+
++char_ptr)
110+
if (*char_ptr == '\0')
111+
return char_ptr - str;
112+
113+
/* All these elucidatory comments refer to 4-byte longwords,
114+
but the theory applies equally well to 8-byte longwords. */
115+
116+
longword_ptr = (unsigned long int *) char_ptr;
117+
118+
/* Computing (longword - lomagic) sets the high bit of any corresponding
119+
byte that is either zero or greater than 0x80. The latter case can be
120+
filtered out by computing (~longword & himagic). The final result
121+
will always be non-zero if one of the bytes of longword is zero. */
122+
himagic = 0x80808080L;
123+
lomagic = 0x01010101L;
124+
if (sizeof (longword) > 4)
125+
{
126+
/* 64-bit version of the magic. */
127+
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
128+
himagic = ((himagic << 16) << 16) | himagic;
129+
lomagic = ((lomagic << 16) << 16) | lomagic;
130+
}
131+
if (sizeof (longword) > 8)
132+
abort ();
133+
134+
/* Instead of the traditional loop which tests each character,
135+
we will test a longword at a time. The tricky part is testing
136+
if *any of the four* bytes in the longword in question are zero. */
137+
for (;;)
138+
{
139+
longword = *longword_ptr++;
140+
141+
if (((longword - lomagic) & ~longword & himagic) != 0)
142+
{
143+
/* Which of the bytes was the zero? */
144+
145+
const char *cp = (const char *) (longword_ptr - 1);
146+
147+
if (cp[0] == 0)
148+
return cp - str;
149+
if (cp[1] == 0)
150+
return cp - str + 1;
151+
if (cp[2] == 0)
152+
return cp - str + 2;
153+
if (cp[3] == 0)
154+
return cp - str + 3;
155+
if (sizeof (longword) > 4)
156+
{
157+
if (cp[4] == 0)
158+
return cp - str + 4;
159+
if (cp[5] == 0)
160+
return cp - str + 5;
161+
if (cp[6] == 0)
162+
return cp - str + 6;
163+
if (cp[7] == 0)
164+
return cp - str + 7;
165+
}
166+
}
167+
}
168+
}

benchmark/atoi.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ uint32_t lemire_as_zoo_swar(const char *chars);
77
namespace zoo {
88

99
std::size_t c_strLength(const char *s);
10-
std::size_t c_strLength_MoreNaturalButSlightlyWorse(const char *s);
10+
std::size_t c_strLength_ManualComparison(const char *s);
1111

1212
}
13+
14+
std::size_t
15+
STRLEN_old (const char *str);

benchmark/bm-swar.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@ void runBenchmark(benchmark::State &s) {
2626
}
2727
}
2828

29+
2930
#define X(Typename, FunctionToCall) \
30-
BENCHMARK(runBenchmark<Corpus8DecimalDigits, Invoke##Typename>);
31+
BENCHMARK(runBenchmark<CORPUS, Invoke##Typename>);
3132

32-
PARSE8BYTES_CORPUS_X_LIST
33-
#undef X
33+
#define CORPUS Corpus8DecimalDigits
34+
PARSE8BYTES_CORPUS_X_LIST
35+
#undef CORPUS
36+
37+
#define CORPUS CorpusStringLength
38+
STRLEN_CORPUS_X_LIST
39+
#undef CORPUS
3440

35-
#define X(TN, FTC) \
36-
BENCHMARK(runBenchmark<CorpusStringLength, Invoke##TN>);
37-
STRLEN_CORPUS_X_LIST
3841
#undef X

benchmark/catch2swar-demo.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,40 @@ TEST_CASE("Atoi benchmarks", "[atoi][swar]") {
1717
auto seed = rd();
1818
CAPTURE(seed);
1919
std::mt19937 g(seed);
20-
auto corpus = Corpus8DecimalDigits::makeCorpus(g);
20+
auto corpus8D = Corpus8DecimalDigits::makeCorpus(g);
21+
auto corpusStrlen = CorpusStringLength::makeCorpus(g);
2122
#define X(Type, Fun) \
22-
auto from##Type = traverse(corpus, Invoke##Type{}, 0);
23-
PARSE8BYTES_CORPUS_X_LIST
23+
auto from##Type = traverse(CORPUS, Invoke##Type{}, 0);
24+
25+
#define CORPUS corpus8D
26+
PARSE8BYTES_CORPUS_X_LIST
27+
#undef CORPUS
28+
29+
#define CORPUS corpusStrlen
30+
STRLEN_CORPUS_X_LIST
31+
#undef CORPUS
2432
#undef X
2533
REQUIRE(fromLemire == fromZoo);
2634
REQUIRE(fromLIBC == fromZoo);
35+
REQUIRE(fromLIBC_STRLEN == fromZOO_NATURAL_STRLEN);
36+
REQUIRE(fromZOO_NATURAL_STRLEN == fromZOO_MANUAL_STRLEN);
37+
REQUIRE(fromGENERIC_GLIBC_STRLEN == fromZOO_NATURAL_STRLEN);
2738

2839
auto haveTheRoleOfMemoryBarrier = -1;
2940
#define X(Type, Fun) \
41+
WARN(typeid(CORPUS).name() << ':' << typeid(Fun).name()); \
3042
BENCHMARK(#Type) { \
3143
return \
32-
traverse(corpus, Invoke##Type{}, haveTheRoleOfMemoryBarrier); \
44+
traverse(CORPUS, Invoke##Type{}, haveTheRoleOfMemoryBarrier); \
3345
};
34-
PARSE8BYTES_CORPUS_X_LIST
46+
47+
#define CORPUS corpus8D
48+
PARSE8BYTES_CORPUS_X_LIST
49+
#undef CORPUS
50+
51+
#define CORPUS corpusStrlen
52+
STRLEN_CORPUS_X_LIST
53+
#undef CORPUS
3554
#undef X
3655
}
3756

0 commit comments

Comments
 (0)