|
| 1 | +#include "zoo/swar/SWAR.h" |
| 2 | +#include "zoo/swar/associative_iteration.h" |
| 3 | + |
| 4 | +#include <benchmark/benchmark.h> |
| 5 | + |
| 6 | +#include <stdint.h> |
| 7 | +#include <string.h> |
| 8 | +#include <stdlib.h> |
| 9 | + |
| 10 | +// Copied from Daniel Lemire's GitHub at |
| 11 | +// https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/ddb082981228f7256e9a4dbbf56fd4a335d78e30/2018/10/03/eightchartoi.c#L26C1-L34C2 |
| 12 | + |
| 13 | +uint32_t parse_eight_digits_swar(const unsigned char *chars) { |
| 14 | + uint64_t val; |
| 15 | + memcpy(&val, chars, 8); |
| 16 | + val = val - 0x3030303030303030; |
| 17 | + uint64_t byte10plus = ((val * (1 + (0xa << 8))) >> 8) & 0x00FF00FF00FF00FF; |
| 18 | + uint64_t short100plus = ((byte10plus * (1 + (0x64 << 16))) >> 16) & 0x0000FFFF0000FFFF; |
| 19 | + short100plus *= (1 + (10000ULL << 32)); |
| 20 | + return short100plus >> 32; |
| 21 | +} |
| 22 | + |
| 23 | +// Note: eight digits can represent from 0 to (10^9) - 1, the logarithm base 2 |
| 24 | +// of 10^9 is slightly less than 30, thus, only 30 bits are needed. |
| 25 | +template<typename> struct Trick; |
| 26 | + |
| 27 | +auto lemire_as_zoo_swar(const char *chars) { |
| 28 | + uint64_t bytes; |
| 29 | + memcpy(&bytes, chars, 8); |
| 30 | + auto allCharacterZero = zoo::meta::BitmaskMaker<uint64_t, '0', 8>::value; |
| 31 | + using S8_64 = zoo::swar::SWAR<8, uint64_t>; |
| 32 | + S8_64 convertedToIntegers = S8_64{bytes - allCharacterZero}; |
| 33 | + /* the idea is to perform the following multiplication: |
| 34 | + * NOTE: THE BASE OF THE NUMBERS is 256 (2^8), then 65536 (2^16), 2^32 |
| 35 | + * convertedToIntegers is IN BASE 256 the number ABCDEFGH |
| 36 | + * BASE256: A B C D E F G H * |
| 37 | + * BASE256 10 1 = |
| 38 | + * -------------------------------------- |
| 39 | + * BASE256 1*A 1*B 1*C 1*D 1*E 1*F 1*G 1*H + |
| 40 | + * BASE256 10*B 10*C 10*D 10*E 10*F 10*G 10*H 0 |
| 41 | + * -------------------------------------- |
| 42 | + * BASE256 A+10B ....................... G+10H H |
| 43 | + */ |
| 44 | + auto by11base256 = convertedToIntegers.multiply(256*10 + 1); |
| 45 | + auto bytePairs = zoo::swar::doublePrecision(by11base256).odd; |
| 46 | + static_assert(std::is_same_v<decltype(bytePairs), zoo::swar::SWAR<16, uint64_t>>); |
| 47 | + auto by101base2to16 = bytePairs.multiply(1 + 100 << 16); |
| 48 | + auto byteQuads = zoo::swar::doublePrecision(by101base2to16).odd; |
| 49 | + auto by10001base2to32 = zoo::swar::doublePrecision(by101base2to16).odd; |
| 50 | + return uint32_t(zoo::swar::doublePrecision(by10001base2to32).odd.value()); |
| 51 | +} |
0 commit comments