@@ -89,7 +89,7 @@ xor 80 then << 1 then |
898911 -> 01 -> 1
9090=#
9191
92- const hi_mask = 0x8080_8080_8080_8080
92+ const hi_mask = CHUNKSZ == 4 ? 0x8080_8080 : 0x8080_8080_8080_8080
9393
9494@inline _count_cont (v) = (v = xor (v, hi_mask); count_ones (xor (((v << 1 ) | v), hi_mask) & hi_mask))
9595@inline msk_lead (v) = (v = xor (v, hi_mask); xor (xor (((v << 1 ) | v), hi_mask) & hi_mask, hi_mask))
@@ -106,11 +106,11 @@ const hi_mask = 0x8080_8080_8080_8080
106106end
107107
108108_length_al (:: MultiCU , :: Type{UTF8CSE} , beg:: Ptr{UInt8} , cnt:: Int ) =
109- (pnt = reinterpret (Ptr{UInt64 }, beg); _align_len_utf8 (pnt, cnt, unsafe_load (pnt)))
109+ (pnt = reinterpret (Ptr{UInt }, beg); _align_len_utf8 (pnt, cnt, unsafe_load (pnt)))
110110
111111function _length (:: MultiCU , :: Type{UTF8CSE} , beg:: Ptr{UInt8} , cnt:: Int )
112112 align = reinterpret (UInt, beg)
113- pnt = reinterpret (Ptr{UInt64 }, align & ~ CHUNKMSK)
113+ pnt = reinterpret (Ptr{UInt }, align & ~ CHUNKMSK)
114114 v = unsafe_load (pnt)
115115 if (align &= CHUNKMSK) != 0
116116 msk = _mask_bytes (align)
132132
133133@inline function _check_mask_ul (beg, cnt, msk)
134134 align = reinterpret (UInt, beg)
135- pnt = reinterpret (Ptr{UInt64 }, align & ~ CHUNKMSK)
135+ pnt = reinterpret (Ptr{UInt }, align & ~ CHUNKMSK)
136136 v = unsafe_load (pnt)
137137 if (align &= CHUNKMSK) != 0
138138 v &= ~ _mask_bytes (align)
@@ -162,7 +162,7 @@ is_ascii(vec::Vector{T}) where {T<:CodeUnitTypes} =
162162is_ascii (str:: Str{C} ) where {C<: Union {UTF8_CSEs,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,
163163 Text2CSE,Text4CSE,UTF32CSE}} =
164164 (cnt = sizeof (str)) == 0 ? true :
165- @preserve str _check_mask_al (reinterpret (Ptr{UInt64 }, pointer (str)), cnt,
165+ @preserve str _check_mask_al (reinterpret (Ptr{UInt }, pointer (str)), cnt,
166166 _ascii_mask (codeunit (C)))
167167
168168# Todo! Here you need to see that 0b11yyyyxx at least 1 y must be set,
181181
182182@inline function _check_latin_utf8_ul (beg, cnt)
183183 align = reinterpret (UInt, beg)
184- pnt = reinterpret (Ptr{UInt64 }, align & ~ CHUNKMSK)
184+ pnt = reinterpret (Ptr{UInt }, align & ~ CHUNKMSK)
185185 v = unsafe_load (pnt)
186186 if (align &= CHUNKMSK) != 0
187187 v &= ~ _mask_bytes (align)
192192
193193is_latin (str:: Str{UTF8CSE} ) =
194194 (siz = sizeof (str)) == 0 ? true :
195- @preserve str _check_latin_utf8_al (reinterpret (Ptr{UInt64 }, pointer (str)), siz)
195+ @preserve str _check_latin_utf8_al (reinterpret (Ptr{UInt }, pointer (str)), siz)
196196
197197is_latin (str:: SubString{<:Str{UTF8CSE}} ) =
198198 (cnt = sizeof (str)) == 0 ? true : @preserve str _check_latin_utf8_ul (pointer (str), cnt)
224224
225225@inline function _check_bmp_utf8_ul (beg, cnt)
226226 align = reinterpret (UInt, beg)
227- pnt = reinterpret (Ptr{UInt64 }, align & ~ CHUNKMSK)
227+ pnt = reinterpret (Ptr{UInt }, align & ~ CHUNKMSK)
228228 v = unsafe_load (pnt)
229229 if (align &= CHUNKMSK) != 0
230230 v &= ~ _mask_bytes (align)
235235
236236is_bmp (str:: Str{UTF8CSE} ) =
237237 (cnt = sizeof (str)) == 0 ? true :
238- @preserve str _check_bmp_utf8_al (reinterpret (Ptr{UInt64 }, pointer (str)), cnt)
238+ @preserve str _check_bmp_utf8_al (reinterpret (Ptr{UInt }, pointer (str)), cnt)
239239
240240is_bmp (str:: SubString{<:Str{UTF8CSE}} ) =
241241 (cnt = sizeof (str)) == 0 ? true : @preserve str _check_bmp_utf8_ul (pointer (str), cnt)
0 commit comments