1- /* auto-generated on 2025-01-16 13:33:53 -0500. Do not edit! */
1+ /* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */
2+ /* modified for SIMDJSON_PHP to mimic PHP's JSON encode/decode behavior */
23/* including simdjson.cpp: */
34/* begin file simdjson.cpp */
45#define SIMDJSON_SRC_SIMDJSON_CPP
@@ -776,22 +777,22 @@ inline namespace literals {
776777inline namespace string_view_literals {
777778
778779
779- constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
780+ constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1)
780781{
781782 return std::string_view{ str, len };
782783}
783784
784- constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
785+ constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2)
785786{
786787 return std::u16string_view{ str, len };
787788}
788789
789- constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
790+ constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3)
790791{
791792 return std::u32string_view{ str, len };
792793}
793794
794- constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
795+ constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4)
795796{
796797 return std::wstring_view{ str, len };
797798}
@@ -2122,22 +2123,22 @@ nssv_inline_ns namespace string_view_literals {
21222123
21232124#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
21242125
2125- nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
2126+ nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1)
21262127{
21272128 return nonstd::sv_lite::string_view{ str, len };
21282129}
21292130
2130- nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2131+ nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21312132{
21322133 return nonstd::sv_lite::u16string_view{ str, len };
21332134}
21342135
2135- nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2136+ nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21362137{
21372138 return nonstd::sv_lite::u32string_view{ str, len };
21382139}
21392140
2140- nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2141+ nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21412142{
21422143 return nonstd::sv_lite::wstring_view{ str, len };
21432144}
@@ -2146,22 +2147,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str,
21462147
21472148#if nssv_CONFIG_USR_SV_OPERATOR
21482149
2149- nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
2150+ nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1)
21502151{
21512152 return nonstd::sv_lite::string_view{ str, len };
21522153}
21532154
2154- nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2155+ nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21552156{
21562157 return nonstd::sv_lite::u16string_view{ str, len };
21572158}
21582159
2159- nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2160+ nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21602161{
21612162 return nonstd::sv_lite::u32string_view{ str, len };
21622163}
21632164
2164- nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2165+ nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21652166{
21662167 return nonstd::sv_lite::wstring_view{ str, len };
21672168}
@@ -2431,7 +2432,7 @@ enum error_code {
24312432 SUCCESS = 0, ///< No error
24322433 CAPACITY, ///< This parser can't support a document that big
24332434 MEMALLOC, ///< Error allocating memory, most likely out of memory
2434- TAPE_ERROR, ///< Something went wrong, this is a generic error
2435+ TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error.
24352436 DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
24362437 STRING_ERROR, ///< Problem while parsing a string
24372438 T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
@@ -2456,13 +2457,21 @@ enum error_code {
24562457 PARSER_IN_USE, ///< parser is already in use.
24572458 OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1)
24582459 INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
2459- INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
2460+ INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error.
24602461 SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value.
24612462 OUT_OF_BOUNDS, ///< Attempted to access location outside of document.
24622463 TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input
24632464 NUM_ERROR_CODES
24642465};
24652466
2467+ /**
2468+ * Some errors are fatal and invalidate the document. This function returns true if the
2469+ * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT.
2470+ * Once a fatal error is encountered, the on-demand document is no longer valid and
2471+ * processing should stop.
2472+ */
2473+ inline bool is_fatal(error_code error) noexcept;
2474+
24662475/**
24672476 * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether
24682477 * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code
@@ -2765,14 +2774,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=)
27652774#undef SIMDJSON_IMPL_CONCEPT
27662775} // namespace details
27672776
2777+
2778+ template <typename T>
2779+ concept string_view_like = std::is_convertible_v<T, std::string_view> &&
2780+ !std::is_convertible_v<T, const char*>;
2781+
2782+ template<typename T>
2783+ concept constructible_from_string_view = std::is_constructible_v<T, std::string_view>
2784+ && !std::is_same_v<T, std::string_view>
2785+ && std::is_default_constructible_v<T>;
2786+
2787+ template<typename M>
2788+ concept string_view_keyed_map = string_view_like<typename M::key_type>
2789+ && requires(std::remove_cvref_t<M>& m, typename M::key_type sv, typename M::mapped_type v) {
2790+ { m.emplace(sv, v) } -> std::same_as<std::pair<typename M::iterator, bool>>;
2791+ };
2792+
27682793/// Check if T is a container that we can append to, including:
27692794/// std::vector, std::deque, std::list, std::string, ...
27702795template <typename T>
27712796concept appendable_containers =
2772- details::supports_emplace_back<T> || details::supports_emplace<T> ||
2797+ ( details::supports_emplace_back<T> || details::supports_emplace<T> ||
27732798 details::supports_push_back<T> || details::supports_push<T> ||
27742799 details::supports_add<T> || details::supports_append<T> ||
2775- details::supports_insert<T>;
2800+ details::supports_insert<T>) && !string_view_keyed_map<T> ;
27762801
27772802/// Insert into the container however possible
27782803template <appendable_containers T, typename... Args>
@@ -2840,6 +2865,8 @@ concept optional_type = requires(std::remove_cvref_t<T> obj) {
28402865 { static_cast<bool>(obj) } -> std::same_as<bool>; // convertible to bool
28412866};
28422867
2868+
2869+
28432870} // namespace concepts
28442871} // namespace simdjson
28452872#endif // SIMDJSON_SUPPORTS_DESERIALIZATION
@@ -4511,6 +4538,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
45114538#include <iostream>
45124539
45134540namespace simdjson {
4541+
4542+ inline bool is_fatal(error_code error) noexcept {
4543+ return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT;
4544+ }
4545+
45144546namespace internal {
45154547 // We store the error code so we can validate the error message is associated with the right code
45164548 struct error_code_info {
@@ -4696,7 +4728,7 @@ namespace internal {
46964728 { SUCCESS, "SUCCESS: No error" },
46974729 { CAPACITY, "CAPACITY: This parser can't support a document that big" },
46984730 { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" },
4699- { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
4731+ { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error. " },
47004732 { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" },
47014733 { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" },
47024734 { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" },
@@ -4721,7 +4753,7 @@ namespace internal {
47214753 { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." },
47224754 { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." },
47234755 { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
4724- { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." },
4756+ { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error. " },
47254757 { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
47264758 { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."},
47274759 { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."}
@@ -6787,7 +6819,7 @@ class document {
67876819 * The memory allocation is strict: you
67886820 * can you use this function to increase
67896821 * or lower the amount of allocated memory.
6790- * Passsing zero clears the memory.
6822+ * Passing zero clears the memory.
67916823 */
67926824 error_code allocate(size_t len) noexcept;
67936825 /** @private Capacity in bytes, in terms
@@ -9185,7 +9217,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
91859217 // floor(log(5**power)/log(2))
91869218 //
91879219 // Note that this is not magic: 152170/(1<<16) is
9188- // approximatively equal to log(5)/log(2).
9220+ // approximately equal to log(5)/log(2).
91899221 // The 1<<16 value is a power of two; we could use a
91909222 // larger power of 2 if we wanted to.
91919223 //
@@ -15555,7 +15587,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
1555515587 // floor(log(5**power)/log(2))
1555615588 //
1555715589 // Note that this is not magic: 152170/(1<<16) is
15558- // approximatively equal to log(5)/log(2).
15590+ // approximately equal to log(5)/log(2).
1555915591 // The 1<<16 value is a power of two; we could use a
1556015592 // larger power of 2 if we wanted to.
1556115593 //
@@ -20833,14 +20865,18 @@ namespace simd {
2083320865
2083420866 // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2083520867 // Passing a 0 value for mask would be equivalent to writing out every byte to output.
20836- // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
20868+ // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2083720869 // get written.
2083820870 // Design consideration: it seems like a function with the
2083920871 // signature simd8<L> compress(uint32_t mask) would be
2084020872 // sensible, but the AVX ISA makes this kind of approach difficult.
2084120873 template<typename L>
2084220874 simdjson_inline void compress(uint64_t mask, L * output) const {
20843- _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20875+ // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
20876+ // (AMD Zen4 has terrible performance with it, it is effectively broken)
20877+ // _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20878+ __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
20879+ _mm512_storeu_si512(output, compressed); // could use a mask
2084420880 }
2084520881
2084620882 template<typename L>
@@ -21785,7 +21821,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2178521821 // floor(log(5**power)/log(2))
2178621822 //
2178721823 // Note that this is not magic: 152170/(1<<16) is
21788- // approximatively equal to log(5)/log(2).
21824+ // approximately equal to log(5)/log(2).
2178921825 // The 1<<16 value is a power of two; we could use a
2179021826 // larger power of 2 if we wanted to.
2179121827 //
@@ -23473,14 +23509,18 @@ namespace simd {
2347323509
2347423510 // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2347523511 // Passing a 0 value for mask would be equivalent to writing out every byte to output.
23476- // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
23512+ // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2347723513 // get written.
2347823514 // Design consideration: it seems like a function with the
2347923515 // signature simd8<L> compress(uint32_t mask) would be
2348023516 // sensible, but the AVX ISA makes this kind of approach difficult.
2348123517 template<typename L>
2348223518 simdjson_inline void compress(uint64_t mask, L * output) const {
23483- _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23519+ // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
23520+ // (AMD Zen4 has terrible performance with it, it is effectively broken)
23521+ // _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23522+ __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
23523+ _mm512_storeu_si512(output, compressed); // could use a mask
2348423524 }
2348523525
2348623526 template<typename L>
@@ -28171,7 +28211,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2817128211 // floor(log(5**power)/log(2))
2817228212 //
2817328213 // Note that this is not magic: 152170/(1<<16) is
28174- // approximatively equal to log(5)/log(2).
28214+ // approximately equal to log(5)/log(2).
2817528215 // The 1<<16 value is a power of two; we could use a
2817628216 // larger power of 2 if we wanted to.
2817728217 //
@@ -34923,7 +34963,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
3492334963 // floor(log(5**power)/log(2))
3492434964 //
3492534965 // Note that this is not magic: 152170/(1<<16) is
34926- // approximatively equal to log(5)/log(2).
34966+ // approximately equal to log(5)/log(2).
3492734967 // The 1<<16 value is a power of two; we could use a
3492834968 // larger power of 2 if we wanted to.
3492934969 //
@@ -41499,7 +41539,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4149941539 // floor(log(5**power)/log(2))
4150041540 //
4150141541 // Note that this is not magic: 152170/(1<<16) is
41502- // approximatively equal to log(5)/log(2).
41542+ // approximately equal to log(5)/log(2).
4150341543 // The 1<<16 value is a power of two; we could use a
4150441544 // larger power of 2 if we wanted to.
4150541545 //
@@ -47520,7 +47560,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4752047560 // floor(log(5**power)/log(2))
4752147561 //
4752247562 // Note that this is not magic: 152170/(1<<16) is
47523- // approximatively equal to log(5)/log(2).
47563+ // approximately equal to log(5)/log(2).
4752447564 // The 1<<16 value is a power of two; we could use a
4752547565 // larger power of 2 if we wanted to.
4752647566 //
@@ -53140,7 +53180,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
5314053180 // floor(log(5**power)/log(2))
5314153181 //
5314253182 // Note that this is not magic: 152170/(1<<16) is
53143- // approximatively equal to log(5)/log(2).
53183+ // approximately equal to log(5)/log(2).
5314453184 // The 1<<16 value is a power of two; we could use a
5314553185 // larger power of 2 if we wanted to.
5314653186 //
0 commit comments