22
33import Base: iterate, lastindex, getindex, sizeof, length, ncodeunits, codeunit, isvalid, read, write, setindex!, string, convert
44
5- struct Characters{N, M } <: AbstractString
6- data:: NTuple{N, M }
7- function Characters {N, M } (v:: Vector{UInt8} ) where N where M
8- new (NTuple {N, M } (v))
5+ struct Characters{N} <: AbstractString
6+ data:: NTuple{N, UInt8 }
7+ function Characters {N} (v:: Vector{UInt8} ) where N
8+ new (NTuple {N, UInt8 } (v))
99 end
10- function Characters {N, M } (itr) where {N} where {M }
10+ function Characters {N} (itr) where {N}
1111 isempty (itr) && return missing
12- new (NTuple {N, M } (rpad (itr, N)))
12+ new (NTuple {N, UInt8 } (rpad (itr, N)))
1313 end
1414end
1515
16- function Characters {N, M } (v:: Vector{UInt8} , v2) where N where M
16+ function Characters {N} (v:: Vector{UInt8} , v2) where N
1717
1818 @simd for i in 1 : min (N, length (v))
1919 @inbounds v2[i] = v[i]
@@ -22,122 +22,74 @@ function Characters{N, M}(v::Vector{UInt8}, v2) where N where M
2222 @inbounds v2[i] = 0x20
2323 end
2424
25- Characters {N, M } (v2)
25+ Characters {N} (v2)
2626end
27- function Characters {N, M } (v:: Vector{UInt8} , v2, st, en) where N where M
27+ function Characters {N} (v:: Vector{UInt8} , v2, st, en) where N
2828 o1 = min (N, en- st+ 1 )
2929 copyto! (v2, 1 , v, st, o1)
30- # for i in 1:o1
31- # @inbounds v2[i] = v[st+i-1]
32- # end
3330 @simd for i in o1+ 1 : N
3431 @inbounds v2[i] = 0x20
3532 end
3633
37- Characters {N, M} (v2)
38- end
39-
40- function Characters {N} (itr) where {N}
41- Characters {N, UInt8} (itr)
34+ Characters {N} (v2)
4235end
4336
4437Characters (s:: Characters ) = s
4538
4639function Characters (s:: AbstractString )
4740 isempty (s) && return missing
48- sl = cld (sizeof (s), length (s))
49- if sl == 1
50- Characters {length(s), UInt8} (s)
51- else
52- Characters {length(s), UInt16} (s)
53- end
54- # else
55- # throw(ArgumentError("Characters only support UInt8 and UInt16"))
56- # end
41+ Characters {ncodeunits(s)} (collect (codeunits (s)))
42+
5743end
5844
5945macro c_str (str)
6046 Characters (str)
6147end
6248
63- function Base. print (io:: IO , s:: Characters )
64- # s_end = length(s)
65- # @inbounds for i in length(s):-1:1
66- # s.data[i] == 0x20 ? s_end -= 1 : break
67- # end
68- print (io, String (view (s, 1 : length (s))))
49+ function Base. String (s:: T ) where T <: Characters
50+ len = ncodeunits (s)
51+ out = Base. _string_n (len)
52+ ref = Ref {T} (s)
53+ GC. @preserve ref out begin
54+ ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
55+ unsafe_copyto! (pointer (out), ptr, len)
56+ end
57+ return out
58+ end
59+
60+
61+ function Base. print (io:: IO , s:: T ) where T<: Characters
62+ print (io, String (s))
6963end
7064Base. string (s:: Characters ) = String (s)
7165
7266function Base.:(== )(s1:: Characters , s2:: Characters )
73- # s1end = length(s1)
74- # s2end = length(s2)
75- # @inbounds for i in length(s1):-1:1
76- # s1.data[i] == 0x20 ? s1end -= 1 : break
77- # end
78- # @inbounds for i in length(s2):-1:1
79- # s2.data[i] == 0x20 ? s2end -= 1 : break
80- # end
81- # s1end != s2end && return false
82- # @inbounds for i in 1:s1end
83- # s1.data[i] != s2.data[i] && return false
84- # end
85- # return true
86- # return view(s1, 1:length(s1)) == view(s2, 1:length(s2))
8767 cmp (s1,s2) == 0
8868end
8969
9070function Base.:(== )(s1:: Characters , s2:: AbstractString )
91- # M = max(N, length(s2))
92- if codeunit (s1) == UInt8
93- return view (codeunits (s1), 1 : length (s1)) == codeunits (s2)
94- else
95- s1 == Characters (s2)
96- end
71+ return view (codeunits (s1), 1 : length (s1)) == codeunits (s2)
72+
9773end
9874Base.:(== )(s1:: AbstractString , s2:: Characters ) = s2 == s1
9975
10076Base. isequal (s1:: Characters , s2:: Characters ) = cmp (s1, s2) == 0 # s1 == s2
10177function Base. isequal (s1:: Characters , s2:: AbstractString )
102- # M = max(N, length(s2))
103- if codeunit (s1) == UInt8
104- return isequal (view (codeunits (s1), 1 : length (s1)), codeunits (s2))
105- else
106- isequal (s1, Characters (s2))
107- end
78+ return isequal (view (codeunits (s1), 1 : length (s1)), codeunits (s2))
10879
10980end
11081Base. isequal (s1:: AbstractString , s2:: Characters ) = isequal (s2, s1)
11182
11283function Base. isless (s1:: Characters , s2:: Characters )
113- # s1end = length(s1)
114- # s2end = length(s2)
115- # @inbounds for i in length(s1):-1:1
116- # s1.data[i] == 0x20 ? s1end -= 1 : break
117- # end
118- # @inbounds for i in length(s2):-1:1
119- # s2.data[i] == 0x20 ? s2end -= 1 : break
120- # end
121- # isless(view(s1, 1:length(s1)), view(s2, 1:length(s2)))
12284 cmp (s1,s2)< 0
12385end
12486
12587
12688function Base. isless (s1:: Characters , s2:: AbstractString )
127- # M = max(N, length(s2))
128- if codeunit (s1) == UInt8
129- return isless (view (codeunits (s1), 1 : length (s1)), codeunits (s2))
130- else
131- isless (s1, Characters (s2))
132- end
89+ return isless (view (codeunits (s1), 1 : length (s1)), codeunits (s2))
13390end
13491function Base. isless (s1:: AbstractString , s2:: Characters )
135- # M = max(N, length(s1))
136- if codeunit (s2) == UInt8
137- return isless (codeunits (s1), view (codeunits (s2), 1 : length (s2)))
138- else
139- isless (Characters (s1), s2)
140- end
92+ return isless (codeunits (s1), view (codeunits (s2), 1 : length (s2)))
14193end
14294
14395function iterate (s:: Characters{N} , i:: Int = 1 ) where N
@@ -161,11 +113,10 @@ function length(s::Characters)
161113end
162114
163115
164- ncodeunits (s:: Characters ) = length (s. data )
116+ ncodeunits (s:: Characters ) = length (s)
165117
166- codeunit (:: Type{Characters{N, M}} ) where N where M = M
167- codeunit (:: Characters{N, M} ) where N where M = M
168118codeunit (:: Type{Characters{N}} ) where N = UInt8
119+ codeunit (:: Characters ) = UInt8
169120codeunit (s:: Characters , i:: Integer ) = s. data[i]
170121
171122isvalid (s:: Characters , i:: Int ) = checkbounds (Bool, s, i)
@@ -174,18 +125,16 @@ Characters(s::Symbol) = Characters(string(s))
174125
175126Characters (:: Missing ) = missing
176127Characters {N} (:: Missing ) where N = missing
177- Characters {N, M} (:: Missing ) where N where M = missing
178128
179- function read (io:: IO , T:: Type{Characters{N, M }} ) where N where M
129+ function read (io:: IO , T:: Type{Characters{N}} ) where N
180130 return read! (io, Ref {T} ())[]:: T
181131end
182132
183- function write (io:: IO , s:: Characters{N, M } ) where N where M
133+ function write (io:: IO , s:: Characters{N} ) where N
184134 return write (io, Ref (s))
185135end
186136
187- # TODO I don't know how I should do this for UInt16
188- function Base. hash (s:: Characters{N, UInt8} , h:: UInt ) where N
137+ function Base. hash (s:: Characters{N} , h:: UInt ) where N
189138 h += Base. memhash_seed
190139 ref = Ref (s. data)
191140 ccall (Base. memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), ref, length (s), h % UInt32) + h
0 commit comments