@@ -34,6 +34,9 @@ class ArrowStringArrayMixin:
3434 def __init__ (self , * args , ** kwargs ) -> None :
3535 raise NotImplementedError
3636
37+ def _from_pyarrow_array (self , pa_array ) -> Self :
38+ raise NotImplementedError
39+
3740 def _convert_bool_result (self , result , na = lib .no_default , method_name = None ):
3841 # Convert a bool-dtype result to the appropriate result type
3942 raise NotImplementedError
@@ -50,31 +53,31 @@ def _str_len(self):
5053 return self ._convert_int_result (result )
5154
5255 def _str_lower (self ) -> Self :
53- return type ( self ) (pc .utf8_lower (self ._pa_array ))
56+ return self . _from_pyarrow_array (pc .utf8_lower (self ._pa_array ))
5457
5558 def _str_upper (self ) -> Self :
56- return type ( self ) (pc .utf8_upper (self ._pa_array ))
59+ return self . _from_pyarrow_array (pc .utf8_upper (self ._pa_array ))
5760
5861 def _str_strip (self , to_strip = None ) -> Self :
5962 if to_strip is None :
6063 result = pc .utf8_trim_whitespace (self ._pa_array )
6164 else :
6265 result = pc .utf8_trim (self ._pa_array , characters = to_strip )
63- return type ( self ) (result )
66+ return self . _from_pyarrow_array (result )
6467
6568 def _str_lstrip (self , to_strip = None ) -> Self :
6669 if to_strip is None :
6770 result = pc .utf8_ltrim_whitespace (self ._pa_array )
6871 else :
6972 result = pc .utf8_ltrim (self ._pa_array , characters = to_strip )
70- return type ( self ) (result )
73+ return self . _from_pyarrow_array (result )
7174
7275 def _str_rstrip (self , to_strip = None ) -> Self :
7376 if to_strip is None :
7477 result = pc .utf8_rtrim_whitespace (self ._pa_array )
7578 else :
7679 result = pc .utf8_rtrim (self ._pa_array , characters = to_strip )
77- return type ( self ) (result )
80+ return self . _from_pyarrow_array (result )
7881
7982 def _str_pad (
8083 self ,
@@ -104,7 +107,9 @@ def _str_pad(
104107 raise ValueError (
105108 f"Invalid side: { side } . Side must be one of 'left', 'right', 'both'"
106109 )
107- return type (self )(pa_pad (self ._pa_array , width = width , padding = fillchar ))
110+ return self ._from_pyarrow_array (
111+ pa_pad (self ._pa_array , width = width , padding = fillchar )
112+ )
108113
109114 def _str_get (self , i : int ) -> Self :
110115 lengths = pc .utf8_length (self ._pa_array )
@@ -124,15 +129,17 @@ def _str_get(self, i: int) -> Self:
124129 )
125130 null_value = pa .scalar (None , type = self ._pa_array .type )
126131 result = pc .if_else (not_out_of_bounds , selected , null_value )
127- return type ( self ) (result )
132+ return self . _from_pyarrow_array (result )
128133
129134 def _str_slice (
130135 self , start : int | None = None , stop : int | None = None , step : int | None = None
131136 ) -> Self :
132137 if pa_version_under13p0 :
133138 # GH#59724
134139 result = self ._apply_elementwise (lambda val : val [start :stop :step ])
135- return type (self )(pa .chunked_array (result , type = self ._pa_array .type ))
140+ return self ._from_pyarrow_array (
141+ pa .chunked_array (result , type = self ._pa_array .type )
142+ )
136143 if start is None :
137144 if step is not None and step < 0 :
138145 # GH#59710
@@ -141,7 +148,7 @@ def _str_slice(
141148 start = 0
142149 if step is None :
143150 step = 1
144- return type ( self ) (
151+ return self . _from_pyarrow_array (
145152 pc .utf8_slice_codeunits (self ._pa_array , start = start , stop = stop , step = step )
146153 )
147154
@@ -154,7 +161,9 @@ def _str_slice_replace(
154161 start = 0
155162 if stop is None :
156163 stop = np .iinfo (np .int64 ).max
157- return type (self )(pc .utf8_replace_slice (self ._pa_array , start , stop , repl ))
164+ return self ._from_pyarrow_array (
165+ pc .utf8_replace_slice (self ._pa_array , start , stop , repl )
166+ )
158167
159168 def _str_replace (
160169 self ,
@@ -181,32 +190,32 @@ def _str_replace(
181190 replacement = repl ,
182191 max_replacements = pa_max_replacements ,
183192 )
184- return type ( self ) (result )
193+ return self . _from_pyarrow_array (result )
185194
186195 def _str_capitalize (self ) -> Self :
187- return type ( self ) (pc .utf8_capitalize (self ._pa_array ))
196+ return self . _from_pyarrow_array (pc .utf8_capitalize (self ._pa_array ))
188197
189198 def _str_title (self ) -> Self :
190- return type ( self ) (pc .utf8_title (self ._pa_array ))
199+ return self . _from_pyarrow_array (pc .utf8_title (self ._pa_array ))
191200
192201 def _str_swapcase (self ) -> Self :
193- return type ( self ) (pc .utf8_swapcase (self ._pa_array ))
202+ return self . _from_pyarrow_array (pc .utf8_swapcase (self ._pa_array ))
194203
195204 def _str_removeprefix (self , prefix : str ):
196205 if not pa_version_under13p0 :
197206 starts_with = pc .starts_with (self ._pa_array , pattern = prefix )
198207 removed = pc .utf8_slice_codeunits (self ._pa_array , len (prefix ))
199208 result = pc .if_else (starts_with , removed , self ._pa_array )
200- return type ( self ) (result )
209+ return self . _from_pyarrow_array (result )
201210 predicate = lambda val : val .removeprefix (prefix )
202211 result = self ._apply_elementwise (predicate )
203- return type ( self ) (pa .chunked_array (result ))
212+ return self . _from_pyarrow_array (pa .chunked_array (result ))
204213
205214 def _str_removesuffix (self , suffix : str ):
206215 ends_with = pc .ends_with (self ._pa_array , pattern = suffix )
207216 removed = pc .utf8_slice_codeunits (self ._pa_array , 0 , stop = - len (suffix ))
208217 result = pc .if_else (ends_with , removed , self ._pa_array )
209- return type ( self ) (result )
218+ return self . _from_pyarrow_array (result )
210219
211220 def _str_startswith (
212221 self , pat : str | tuple [str , ...], na : Scalar | lib .NoDefault = lib .no_default
0 commit comments