Zooification of Lemire 8 bytes

Eddie · Eddie · commit 506074353996 · 2024-02-04T12:42:23.000-08:00
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -9,7 +9,7 @@ project(ZooBenchmark)
 
 include_directories(
     ../test/inc
-    ../../benchmark/include
+    dependencies/google_benchmark/include
     ../inc
     ../junkyard/inc
     ../test/third_party/Catch2/single_include
@@ -30,4 +30,9 @@ add_executable(
     zoo-google-benchmark benchmark_main.cpp cfs.cpp cfs/cfs_utility.cpp
 )
 
+add_executable(
+    zoo-atoi-benchmark benchmark_main.cpp atoi.cpp
+)
+
 target_link_libraries(zoo-google-benchmark benchmark::benchmark)
+target_link_libraries(zoo-atoi-benchmark benchmark::benchmark)
diff --git a/benchmark/atoi.cpp b/benchmark/atoi.cpp
@@ -0,0 +1,51 @@
+#include "zoo/swar/SWAR.h"
+#include "zoo/swar/associative_iteration.h"
+
+#include <benchmark/benchmark.h>
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+// Copied from Daniel Lemire's GitHub at
+// https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/ddb082981228f7256e9a4dbbf56fd4a335d78e30/2018/10/03/eightchartoi.c#L26C1-L34C2
+
+uint32_t parse_eight_digits_swar(const unsigned char *chars) {
+  uint64_t val;
+  memcpy(&val, chars, 8);
+  val = val - 0x3030303030303030;
+  uint64_t byte10plus   = ((val        * (1 + (0xa  <<  8))) >>  8) & 0x00FF00FF00FF00FF;
+  uint64_t short100plus = ((byte10plus * (1 + (0x64 << 16))) >> 16) & 0x0000FFFF0000FFFF;
+  short100plus *= (1 + (10000ULL << 32));
+  return short100plus >> 32;
+}
+
+// Note: eight digits can represent from 0 to (10^9) - 1, the logarithm base 2
+// of 10^9 is slightly less than 30, thus, only 30 bits are needed.
+template<typename> struct Trick;
+
+auto lemire_as_zoo_swar(const char *chars) {
+    uint64_t bytes;
+    memcpy(&bytes, chars, 8);
+    auto allCharacterZero = zoo::meta::BitmaskMaker<uint64_t, '0', 8>::value;
+    using S8_64 = zoo::swar::SWAR<8, uint64_t>;
+    S8_64 convertedToIntegers = S8_64{bytes - allCharacterZero};
+    /* the idea is to perform the following multiplication:
+     * NOTE: THE BASE OF THE NUMBERS is 256 (2^8), then 65536 (2^16), 2^32
+     * convertedToIntegers is IN BASE 256 the number ABCDEFGH
+     * BASE256:   A    B    C    D    E    F    G    H *
+     * BASE256                                 10    1 =
+     *           --------------------------------------
+     * BASE256  1*A  1*B  1*C  1*D  1*E  1*F  1*G  1*H +
+     * BASE256 10*B 10*C 10*D 10*E 10*F 10*G 10*H    0
+     *           --------------------------------------
+     * BASE256 A+10B ....................... G+10H   H
+     */
+    auto by11base256 = convertedToIntegers.multiply(256*10 + 1);
+    auto bytePairs = zoo::swar::doublePrecision(by11base256).odd;
+    static_assert(std::is_same_v<decltype(bytePairs), zoo::swar::SWAR<16, uint64_t>>);
+    auto by101base2to16 = bytePairs.multiply(1 + 100 << 16);
+    auto byteQuads = zoo::swar::doublePrecision(by101base2to16).odd;
+    auto by10001base2to32 = zoo::swar::doublePrecision(by101base2to16).odd;
+    return uint32_t(zoo::swar::doublePrecision(by10001base2to32).odd.value());
+}
diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h
@@ -144,6 +144,8 @@ struct SWAR {
         return SWAR{(*this & protectiveMask).value() >> bitCount};
     }
 
+    constexpr SWAR
+    multiply(T multiplier) const noexcept { return SWAR{m_v * multiplier}; }
     T m_v;
 };
 

Original file line number	Diff line number	Diff line change
`@@ -144,6 +144,8 @@ struct SWAR {`
`144`	`144`	`return SWAR{(*this & protectiveMask).value() >> bitCount};`
`145`	`145`	`}`
`146`	`146`
	`147`	`+ constexpr SWAR`
	`148`	`+ multiply(T multiplier) const noexcept { return SWAR{m_v * multiplier}; }`
`147`	`149`	`T m_v;`
`148`	`150`	`};`
`149`	`151`