11#include " zoo/swar/SWAR.h"
22#include " zoo/swar/associative_iteration.h"
33
4+ #include < immintrin.h>
5+
46#include < stdint.h>
57#include < string.h>
68#include < stdlib.h>
@@ -53,10 +55,36 @@ auto lemire_as_zoo_swar(const char *chars) {
5355namespace zoo {
5456
5557std::size_t c_strLength (const char *s) {
56- std::size_t rv = 0 ;
58+ using S = swar::SWAR<8 , std::size_t >;
59+ constexpr auto
60+ MSBs = S{S::MostSignificantBit},
61+ Ones = S{S::LeastSignificantBit};
62+ S bytes;
63+ for (auto base = s;; base += 8 ) {
64+ memcpy (&bytes.m_v , base, 8 );
65+ auto firstNullTurnsOnMSB = bytes - Ones;
66+ // The first lane with a null will borrow and set its MSB on when
67+ // subtracted one.
68+ // The borrowing from the first null interferes with the subsequent
69+ // lanes, that's why we focus on the first null.
70+ // The lanes previous to the first null might keep their MSB on after
71+ // subtracting one (if their value is greater than 0x80).
72+ // This provides a way to detect the first null: It is the first lane
73+ // in firstNullTurnsOnMSB that "flipped on" its MSB
74+ auto cheapestInversionOfMSBs = ~bytes;
75+ auto firstMSBsOnIsFirstNull =
76+ firstNullTurnsOnMSB & cheapestInversionOfMSBs;
77+ auto onlyMSBs = zoo::swar::convertToBooleanSWAR (firstMSBsOnIsFirstNull);
78+ if (onlyMSBs) { // there is a null!
79+ auto firstNullIndex = onlyMSBs.lsbIndex ();
80+ return firstNullIndex + (base - s);
81+ }
82+ }
83+ }
84+
85+ std::size_t c_strLength_natural (const char *s) {
5786 using S = swar::SWAR<8 , std::size_t >;
5887 S bytes;
59- constexpr auto MSBs = S{S::MostSignificantBit};
6088 for (auto base = s;; base += 8 ) {
6189 memcpy (&bytes.m_v , base, 8 );
6290 auto nulls = zoo::swar::equals (bytes, S{0 });
@@ -67,29 +95,52 @@ std::size_t c_strLength(const char *s) {
6795 }
6896}
6997
70- std::size_t c_strLength_ManualComparison (const char *s) {
71- std::size_t rv = 0 ;
98+ std::size_t c_strLength_manualComparison (const char *s) {
7299 using S = swar::SWAR<8 , std::size_t >;
73100 S bytes;
74101 constexpr auto MSBs = S{S::MostSignificantBit};
75102 for (auto base = s;; base += 8 ) {
76103 memcpy (&bytes.m_v , base, 8 );
77104 // A null byte is detected in two steps:
78105 // 1. it has the MSB off, and
79- // the least significant bits are also off.
106+ // 2. the least significant bits are also off.
80107 // The swar library allows the detection of lsbs off
81108 // By comparing greater equal to 0,
82109 // 0 can only be greater-equal to a byte with LSBs 0
83110 auto haveMSB_cleared = bytes ^ MSBs;
84111 auto lsbNulls = zoo::swar::greaterEqual_MSB_off (S{0 }, bytes & ~MSBs);
85- auto nulls = swar::asBooleanSWAR ( haveMSB_cleared & lsbNulls) ;
86- if (nulls) {
112+ auto nulls = haveMSB_cleared & lsbNulls;
113+ if (nulls. value () ) {
87114 auto firstNullIndex = nulls.lsbIndex ();
88115 return firstNullIndex + (base - s);
89116 }
90117 }
91118}
92119
120+ size_t avx2_strlen (const char * str) {
121+ const __m256i zero = _mm256_setzero_si256 (); // Vector of 32 zero bytes
122+ size_t offset = 0 ;
123+
124+ // Loop over the string in blocks of 32 bytes
125+ for (;; offset += 32 ) {
126+ // Load 32 bytes of the string into a __m256i vector
127+ __m256i data;// = _mm256_load_si256((const __m256i*)(str + offset));
128+ memcpy (&data, str + offset, 32 );
129+ // Compare each byte with '\0'
130+ __m256i cmp = _mm256_cmpeq_epi8 (data, zero);
131+ // Create a mask indicating which bytes are '\0'
132+ int mask = _mm256_movemask_epi8 (cmp);
133+
134+ // If mask is not zero, we found a '\0' byte
135+ if (mask) {
136+ // Calculate the index of the first '\0' byte using ctz (Count Trailing Zeros)
137+ return offset + __builtin_ctz (mask);
138+ }
139+ }
140+ // Unreachable, but included to avoid compiler warnings
141+ return offset;
142+ }
143+
93144}
94145
95146// / \brief This is the last non-platform specific "generic" strlen in GLibC.
0 commit comments