@@ -7,6 +7,144 @@ Licensed under MIT License, see LICENSE.md
77Based initially on julia/test/strings/util.jl
88=#
99
10+ function _concat (T, a, b)
11+ la = ncodeunits (a)
12+ lb = ncodeunits (b)
13+ buf, out = _allocate (T, la + lb)
14+ @preserve a unsafe_copyto! (out, pointer (a), la)
15+ @preserve b unsafe_copyto! (out + la, pointer (b), lb)
16+ buf
17+ end
18+
19+ function _string (T, a, b, rest)
20+ la = ncodeunits (a)
21+ lb = ncodeunits (b)
22+ len = la + lb
23+ @inbounds for str in rest
24+ len += ncodeunits (str)
25+ end
26+ buf, out = _allocate (T, len)
27+ @preserve a unsafe_copyto! (out, pointer (a), la)
28+ out += la
29+ @preserve b unsafe_copyto! (out, pointer (b), lb)
30+ out += lb
31+ @inbounds for str in rest
32+ len = ncodeunits (str)
33+ @preserve str unsafe_copyto! (out, pointer (str), len)
34+ out += len
35+ end
36+ buf
37+ end
38+
39+ function _string (T, coll)
40+ len = 0
41+ @inbounds for str in coll
42+ len += ncodeunits (str)
43+ end
44+ buf, out = _allocate (T, len)
45+ @inbounds for str in coll
46+ len = ncodeunits (str)
47+ @preserve str unsafe_copyto! (out, pointer (str), len)
48+ out += len
49+ end
50+ buf
51+ end
52+
53+ # Handle concatenation where all the same CSE for strings, and character set for characters
54+ #=
55+ """
56+ WIP: this is rather tricky.
57+ It really should handle any type of Chr / Str / CSE, not just the ones defined
58+ in CharSetEncodings, ChrBase and StrBase
59+ Ideally, it could also handle mixes with String and Char (or other AbstractString / AbstractChar
60+ types.
61+ It may need to do two or even three passes, one to determine the correct type to be output,
62+ another to determine the output length, and finally another to copy the strings / characters into
63+ the buffer.
64+ The result type should be based on promotion rules, i.e. outputting UCS2Str if only ASCII, Latin, UCS2 characters and strings are in the list.
65+ This is difficult to do in a way that will still be type stable.
66+ """
67+
68+ function _string_chr(a::Union{<:Chr{CS,T}, <:Str{C}, SubString{<:Str{C}}}...
69+ ) where {CS<:CharSet,T,C<:CSE{CS}}
70+ len = 0
71+ for v in a
72+ if v isa Chr
73+ len += 1
74+ else
75+ len += ncodeunits(v)
76+ end
77+ end
78+ buf, out = _allocate(T, len)
79+ for v in a
80+ len = ncodeunits(str)
81+ @preserve str unsafe_copyto!(out, pointer(str), len)
82+ out += len
83+ end
84+ buf
85+ end
86+ =#
87+
88+ string (c:: MaybeSub{<:Str} ) = c
89+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}} ...) = Str (LatinCSE, _string (UInt8, c))
90+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}} ...) = Str (UTF8CSE, _string (UInt8, c))
91+ string (c:: MaybeSub{<:Str{<:UCS2_CSEs}} ...) = Str (UCS2CSE, _string (UInt16, c))
92+ string (c:: MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}} ...) = Str (UTF16CSE, _string (UInt16, c))
93+ string (c:: MaybeSub{<:Str{<:UTF32_CSEs}} ...) = Str (UTF32CSE, _string (UInt32, c))
94+
95+ #=
96+ const MS_Str{C} = MaybeSub{<:Str{C}}
97+ string(a::MS_Str{C}, b::MS_Str{C}) where {C<:CSE} = Str(C, _concat(codeunit(C), a, b))
98+ string(a::MS_Str{C}, b::MS_Str{C}, c::MS_Str{C}...) where {C<:CSE} =
99+ Str(C, _string(codeunit(C), a, b, c))
100+
101+ string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
102+ string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
103+ string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
104+
105+ const MS_AL = MS_Str{<:Union{ASCIICSE,Latin_CSEs}}
106+ string(a::MS_AL, b::MS_AL) = Str(LatinCSE, _concat(UInt8, a, b))
107+ string(a::MS_AL, b::MS_AL, c::MS_AL...) = Str(LatinCSE, _string(UInt8, a, b, c))
108+
109+ const MS_AU = MS_Str{<:Union{ASCIICSE,UTF8CSE}}
110+ string(a::MS_AU, b::MS_AU) = Str(UTF8CSE, _concat(UInt8, a, b))
111+ string(a::MS_AU, b::MS_AU, c::MS_AU...) = Str(UTF8CSE, _string(UInt8, a, b, c))
112+
113+ const MS_U2 = MS_Str{<:UCS2_CSEs}
114+ string(a::MS_U2, b::MS_U2) = Str(UCS2CSE, _concat(UInt16, a, b))
115+ string(a::MS_U2, b::MS_U2, c::MS_U2...) = Str(UCS2CSE, _string(UInt16, a, b, c))
116+
117+ const MS_UT = MS_Str{<:Union{UCS2_CSEs,UTF16CSE}}
118+ string(a::MS_UT, b::MS_UT) = Str(UTF16CSE, _concat(UInt16, a, b))
119+ string(a::MS_UT, b::MS_UT, c::MS_UT...) = Str(UTF16CSE, _string(UInt16, a, b, c))
120+
121+ const MS_U4 = MS_Str{<:UTF32_CSEs}
122+ string(a::MS_U4, b::MS_U4) = Str(UTF32CSE, _concat(UInt32, a, b))
123+ string(a::MS_U4, b::MS_U4, c::MS_U4...) = Str(UTF32CSE, _string(UInt32, a, b, c))
124+ =#
125+
126+ #=
127+ string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) =
128+ length(c) == 1 ? c[1] : Str(LatinCSE, _string(UInt8, c))
129+
130+ string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) =
131+ length(c) == 1 ? c[1] : Str(UTF8CSE, _string(UInt8, c))
132+
133+ string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) =
134+ length(c) == 1 ? c[1] : Str(UCS2CSE, _string(UInt16, c))
135+
136+ string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) =
137+ length(c) == 1 ? c[1] : Str(UTF16CSE, _string(UInt16, c))
138+
139+ string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) =
140+ length(c) == 1 ? c[1] : Str(UTF32CSE, _string(UInt32, c))
141+ =#
142+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}} ...) = Str (LatinCSE, _string (UInt8, c))
143+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}} ...) = Str (UTF8CSE, _string (UInt8, c))
144+ string (c:: MaybeSub{<:Str{<:UCS2_CSEs}} ...) = Str (UCS2CSE, _string (UInt16, c))
145+ string (c:: MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}} ...) = Str (UTF16CSE, _string (UInt16, c))
146+ string (c:: MaybeSub{<:Str{<:UTF32_CSEs}} ...) = Str (UTF32CSE, _string (UInt32, c))
147+
10148# starts with and ends with predicates
11149
12150starts_with (a:: MaybeSub{<:Str{C}} , b:: MaybeSub{<:Str{C}} ) where {C<: CSE } =
0 commit comments