11#include " atoi.h"
2+ #include " atoi_impl.h"
23
34#include " zoo/swar/associative_iteration.h"
45
@@ -53,6 +54,21 @@ uint32_t calculateBase10(zoo::swar::SWAR<8, uint64_t> convertedToIntegers) noexc
5354 return uint32_t (by10001base2to32.value () >> 32 );
5455}
5556
57+ uint64_t calculateBase10 (zoo::swar::SWAR<8 , __uint128_t > convertedToIntegers) noexcept {
58+ auto by11base256 = convertedToIntegers.multiply (256 *10 + 1 );
59+ auto bytePairs = zoo::swar::doublePrecision (by11base256).odd ;
60+ auto by101base2to16 = bytePairs.multiply (1 + (100 << 16 ));
61+ auto byteQuads = zoo::swar::doublePrecision (by101base2to16).odd ;
62+ auto by10001base2to32 = byteQuads.multiply (1 + (10000ull << 32 ));
63+ // Now, truly work with 128 bits: combine two 32 bit results, each
64+ // corresponding to 8 bytes of inputs, into the the 64 bit result by
65+ // scaling one by 10^8
66+ auto byteOcts = zoo::swar::doublePrecision (by10001base2to32).odd ;
67+ auto byHundredMillionBase2to64 =
68+ byteOcts.multiply (1 + (__uint128_t (100'000'000 ) << 64 ));
69+ return uint64_t (byHundredMillionBase2to64.value () >> 64 );
70+ }
71+
5672// Note: eight digits can represent from 0 to (10^9) - 1, the logarithm base 2
5773// of 10^9 is slightly less than 30, thus, only 30 bits are needed.
5874uint32_t lemire_as_zoo_swar (const char *chars) noexcept {
@@ -104,25 +120,6 @@ std::size_t leadingSpacesCountAligned(S bytes) noexcept {
104120 return rv;
105121}
106122
107- // / @brief Loads the "block" containing the pointer, by proper alignment
108- // / @tparam PtrT Pointer type for loading
109- // / @tparam Block as the name indicates
110- // / @param pointerInsideBlock the potentially misaligned pointer
111- // / @param b where the loaded bytes will be put
112- // / @return a pair to indicate the aligned pointer to the base of the block
113- // / and the misalignment, in bytes, of the source pointer
114- template <typename PtrT, typename Block>
115- std::tuple<PtrT *, int >
116- blockAlignedLoad (PtrT *pointerInsideBlock, Block *b) {
117- uintptr_t asUint = reinterpret_cast <uintptr_t >(pointerInsideBlock);
118- constexpr auto Alignment = alignof (Block), Size = sizeof (Block);
119- static_assert (Alignment == Size);
120- auto misalignment = asUint % Alignment;
121- auto *base = reinterpret_cast <PtrT *>(asUint - misalignment);
122- memcpy (b, base, Size);
123- return { base, misalignment };
124- }
125-
126123std::size_t leadingSpacesCount (const char *p) noexcept {
127124 using S = swar::SWAR<8 , uint64_t >;
128125 S bytes;
@@ -173,11 +170,33 @@ auto leadingDigitsCount(const char *p) noexcept {
173170 }
174171}
175172
176- int c_strToI (const char *str) noexcept {
177- constexpr static std::array<int , 8 > LastFactor = {
178- 1 , 10 , 100 , 1000 ,
179- 10'000 , 100'000 , 1000'000 , 10'000'000
180- };
173+ namespace impl {
174+
175+ template <typename > struct ConversionTraits ;
176+ template <> struct ConversionTraits <int32_t >{
177+ constexpr static auto NPositions = 9 ; // from 10^0 to 10^8
178+ using PowersOf10Array = std::array<int32_t , NPositions>;
179+ using DoublePrecision = uint64_t ;
180+ };
181+ template <> struct ConversionTraits <int64_t >{
182+ constexpr static auto NPositions = 17 ; // from 10^0 to 10^16
183+ using PowersOf10Array = std::array<int64_t , NPositions>;
184+ using DoublePrecision = __uint128_t ;
185+ };
186+
187+ template <typename Result>
188+ auto PowersOf10Array () {
189+ using Traits = ConversionTraits<Result>;
190+ typename Traits::PowersOf10Array rv{1 };
191+ for (std::size_t i = 1 ; i < Traits::NPositions; ++i) {
192+ rv[i] = rv[i - 1 ] * 10 ;
193+ }
194+ return rv;
195+ };
196+
197+ template <typename Return>
198+ Return c_strToIntegral (const char *str) noexcept {
199+ auto LastFactor = PowersOf10Array<Return>();
181200 auto leadingSpaces = leadingSpacesCount (str);
182201 auto s = str + leadingSpaces;
183202 auto sign = 1 ;
@@ -187,14 +206,19 @@ int c_strToI(const char *str) noexcept {
187206 case ' +' : ++s; break ;
188207 default : ;
189208 }
190- using S = swar::SWAR<8 , uint64_t >;
209+
210+ using SWAR_BaseType = typename ConversionTraits<Return>::DoublePrecision;
211+ constexpr auto
212+ NBytes = sizeof (SWAR_BaseType),
213+ NBitsPerByte = 8ul ; // 8 bits per byte
214+ using S = swar::SWAR<NBitsPerByte, SWAR_BaseType>;
191215 S bytes;
192216 auto [base, misalignment] = blockAlignedLoad (s, &bytes.m_v );
193- auto bitDisplacement = 8 * misalignment;
217+ auto bitDisplacement = NBitsPerByte * misalignment;
194218 constexpr static S
195- AllZeroCharacter{meta::BitmaskMaker<uint64_t , ' 0' , 8 >::value},
219+ AllZeroCharacter{meta::BitmaskMaker<SWAR_BaseType , ' 0' , NBitsPerByte >::value},
196220 AllOn = ~S{0 };
197- // blit the zero-characters to the misaligned part
221+
198222 auto mask = S{AllOn.value () << bitDisplacement};
199223 auto misalignedEliminated = bytes & mask;
200224 auto zeroCharactersIntroduced = AllZeroCharacter & ~mask;
@@ -210,23 +234,35 @@ int c_strToI(const char *str) noexcept {
210234 auto nonDigitIndex = nonDigits.lsbIndex ();
211235 auto asIntegers = bytes - AllZeroCharacter; // upper lanes garbage
212236 auto integersInHighLanes =
213- // allow complete clearing of the 8 bytes by doing 2 shifts,
214- // since it is UB to shift 64 bits.
215- asIntegers.shiftLanesLeft (7 - nonDigitIndex).shiftLanesLeft (1 );
237+ // split the shift in two steps because if nonDigitIndex is
238+ // zero, then you'd shift all bits, this would result in U.B.
239+ // for a single shift
240+ asIntegers.shiftLanesLeft (NBytes - 1 - nonDigitIndex)
241+ .shiftLanesLeft (1 );
216242 auto inBase10 = calculateBase10 (integersInHighLanes);
217243 auto scaledAccumulator = accumulator * LastFactor[nonDigitIndex];
218- return int ((scaledAccumulator + inBase10) * sign);
244+ return Return ((scaledAccumulator + inBase10) * sign);
219245 }
220- // all 8 bytes are digits
246+ // all bytes are digits
221247 auto asIntegers = bytes - AllZeroCharacter;
222- accumulator *= 100'000'000 ;
248+ accumulator *= LastFactor. back () ;
223249 auto inBase10 = calculateBase10 (asIntegers);
224250 accumulator += inBase10;
225- base += 8 ;
226- memcpy (&bytes.m_v , base, 8 );
251+ base += NBytes ;
252+ memcpy (&bytes.m_v , base, NBytes );
227253 }
228254}
229255
256+ }
257+
258+ int c_strToI (const char *str) noexcept {
259+ return impl::c_strToIntegral<int >(str);
260+ }
261+
262+ int64_t c_strToL (const char *str) noexcept {
263+ return impl::c_strToIntegral<int64_t >(str);
264+ }
265+
230266// / \brief Helper function to fix the non-string part of block
231267template <typename S>
232268S adjustMisalignmentFor_strlen (S data, int misalignment) {
@@ -252,7 +288,7 @@ std::size_t c_strLength(const char *s) {
252288
253289 auto indexOfFirstTrue = [](auto bs) { return bs.lsbIndex (); };
254290
255- // Misalignment must be taken into account because a SWAR read is
291+ // Misalignment must be taken into account because a SWAR read is
256292 // speculative, it might read bytes outside of the actual string.
257293 // It is safe to read within the page where the string occurs, and to
258294 // guarantee that, simply make aligned reads because the size of the SWAR
0 commit comments