@@ -20,19 +20,30 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
2020
2121 Py_XINCREF (na_object );
2222 ((StringDTypeObject * )new )-> na_object = na_object ;
23+ npy_packed_static_string packed_na_name = * NPY_EMPTY_STRING ;
24+ npy_packed_static_string packed_default_string = * NPY_EMPTY_STRING ;
2325 int hasnull = na_object != NULL ;
2426 int has_nan_na = 0 ;
2527 int has_string_na = 0 ;
26- ss default_string = EMPTY_STRING ;
2728 if (hasnull ) {
2829 // first check for a string
2930 if (PyUnicode_Check (na_object )) {
3031 has_string_na = 1 ;
3132 Py_ssize_t size = 0 ;
3233 const char * buf = PyUnicode_AsUTF8AndSize (na_object , & size );
33- default_string .len = size ;
34- // discards const, how to avoid?
35- default_string .buf = (char * )buf ;
34+ int res = npy_string_newsize (buf , (size_t )size ,
35+ & packed_default_string );
36+ if (res == -1 ) {
37+ PyErr_NoMemory ();
38+ Py_DECREF (new );
39+ return NULL ;
40+ }
41+ else if (res == -2 ) {
42+ // this should never happen
43+ assert (0 );
44+ Py_DECREF (new );
45+ return NULL ;
46+ }
3647 }
3748 else {
3849 // treat as nan-like if != comparison returns a object whose truth
@@ -53,15 +64,50 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
5364 }
5465 Py_DECREF (eq );
5566 }
67+ PyObject * na_pystr = PyObject_Str (na_object );
68+ if (na_pystr == NULL ) {
69+ Py_DECREF (new );
70+ return NULL ;
71+ }
72+
73+ Py_ssize_t size = 0 ;
74+ const char * utf8_ptr = PyUnicode_AsUTF8AndSize (na_pystr , & size );
75+ // discard const to initialize buffer
76+ int res = npy_string_newsize (utf8_ptr , (size_t )size , & packed_na_name );
77+ if (res == -1 ) {
78+ PyErr_NoMemory ();
79+ Py_DECREF (new );
80+ return NULL ;
81+ }
82+ else if (res == -2 ) {
83+ // this should never happen
84+ assert (0 );
85+ Py_DECREF (new );
86+ return NULL ;
87+ }
88+ Py_DECREF (na_pystr );
5689 }
57- ((StringDTypeObject * )new )-> has_nan_na = has_nan_na ;
58- ((StringDTypeObject * )new )-> has_string_na = has_string_na ;
59- ((StringDTypeObject * )new )-> default_string = default_string ;
60- ((StringDTypeObject * )new )-> coerce = coerce ;
90+
91+ StringDTypeObject * snew = (StringDTypeObject * )new ;
92+
93+ snew -> has_nan_na = has_nan_na ;
94+ snew -> has_string_na = has_string_na ;
95+ snew -> packed_default_string = packed_default_string ;
96+ snew -> packed_na_name = packed_na_name ;
97+ snew -> coerce = coerce ;
98+
99+ npy_static_string default_string = {0 , NULL };
100+ npy_load_string (& snew -> packed_default_string , & default_string );
101+
102+ npy_static_string na_name = {0 , NULL };
103+ npy_load_string (& snew -> packed_na_name , & na_name );
104+
105+ snew -> na_name = na_name ;
106+ snew -> default_string = default_string ;
61107
62108 PyArray_Descr * base = (PyArray_Descr * )new ;
63- base -> elsize = sizeof (ss );
64- base -> alignment = _Alignof(ss );
109+ base -> elsize = sizeof (npy_static_string );
110+ base -> alignment = _Alignof(npy_static_string );
65111 base -> flags |= NPY_NEEDS_INIT ;
66112 base -> flags |= NPY_LIST_PICKLE ;
67113 base -> flags |= NPY_ITEM_REFCOUNT ;
@@ -161,20 +207,19 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls),
161207int
162208stringdtype_setitem (StringDTypeObject * descr , PyObject * obj , char * * dataptr )
163209{
164- ss * sdata = (ss * )dataptr ;
210+ npy_packed_static_string * sdata = (npy_packed_static_string * )dataptr ;
165211
166212 // free if dataptr holds preexisting string data,
167- // ssfree does a NULL check
168- ssfree (sdata );
213+ // npy_string_free does a NULL check and checks for small strings
214+ npy_string_free (sdata );
169215
170216 // borrow reference
171217 PyObject * na_object = descr -> na_object ;
172218
173219 // setting NA *must* check pointer equality since NA types might not
174220 // allow equality
175221 if (na_object != NULL && obj == na_object ) {
176- // do nothing, ssfree already NULLed the struct ssdata points to
177- // so it already contains a NA value
222+ * sdata = * NPY_NULL_STRING ;
178223 }
179224 else {
180225 PyObject * val_obj = get_value (obj , descr -> coerce );
@@ -190,8 +235,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
190235 return -1 ;
191236 }
192237
193- // copies contents of val into item_val->buf
194- int res = ssnewlen (val , length , sdata );
238+ int res = npy_string_newsize (val , length , sdata );
195239
196240 if (res == -1 ) {
197241 PyErr_NoMemory ();
@@ -213,10 +257,11 @@ static PyObject *
213257stringdtype_getitem (StringDTypeObject * descr , char * * dataptr )
214258{
215259 PyObject * val_obj = NULL ;
216- ss * sdata = (ss * )dataptr ;
260+ npy_packed_static_string * psdata = (npy_packed_static_string * )dataptr ;
261+ npy_static_string sdata = {0 , NULL };
217262 int hasnull = descr -> na_object != NULL ;
218263
219- if (ss_isnull ( sdata )) {
264+ if (npy_load_string ( psdata , & sdata )) {
220265 if (hasnull ) {
221266 PyObject * na_object = descr -> na_object ;
222267 Py_INCREF (na_object );
@@ -227,9 +272,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
227272 }
228273 }
229274 else {
230- char * data = sdata -> buf ;
231- size_t len = sdata -> len ;
232- val_obj = PyUnicode_FromStringAndSize (data , len );
275+ val_obj = PyUnicode_FromStringAndSize (sdata .buf , sdata .size );
233276 if (val_obj == NULL ) {
234277 return NULL ;
235278 }
@@ -254,7 +297,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
254297npy_bool
255298nonzero (void * data , void * NPY_UNUSED (arr ))
256299{
257- return (( ss * )data )-> len != 0 ;
300+ return npy_string_size (( npy_packed_static_string * )data ) != 0 ;
258301}
259302
260303// Implementation of PyArray_CompareFunc.
@@ -278,11 +321,13 @@ _compare(void *a, void *b, StringDTypeObject *descr)
278321 return 0 ;
279322 }
280323 }
281- const ss * default_string = & descr -> default_string ;
282- const ss * ss_a = (ss * )a ;
283- const ss * ss_b = (ss * )b ;
284- int a_is_null = ss_isnull (ss_a );
285- int b_is_null = ss_isnull (ss_b );
324+ npy_static_string * default_string = & descr -> default_string ;
325+ const npy_packed_static_string * ps_a = (npy_packed_static_string * )a ;
326+ npy_static_string s_a = {0 , NULL };
327+ int a_is_null = npy_load_string (ps_a , & s_a );
328+ const npy_packed_static_string * ps_b = (npy_packed_static_string * )b ;
329+ npy_static_string s_b = {0 , NULL };
330+ int b_is_null = npy_load_string (ps_b , & s_b );
286331 if (NPY_UNLIKELY (a_is_null || b_is_null )) {
287332 if (hasnull && !has_string_na ) {
288333 if (has_nan_na ) {
@@ -303,22 +348,22 @@ _compare(void *a, void *b, StringDTypeObject *descr)
303348 }
304349 else {
305350 if (a_is_null ) {
306- ss_a = default_string ;
351+ s_a = * default_string ;
307352 }
308353 if (b_is_null ) {
309- ss_b = default_string ;
354+ s_b = * default_string ;
310355 }
311356 }
312357 }
313- return sscmp ( ss_a , ss_b );
358+ return npy_string_cmp ( & s_a , & s_b );
314359}
315360
316361// PyArray_ArgFunc
317362// The max element is the one with the highest unicode code point.
318363int
319364argmax (void * data , npy_intp n , npy_intp * max_ind , void * arr )
320365{
321- ss * dptr = (ss * )data ;
366+ npy_packed_static_string * dptr = (npy_packed_static_string * )data ;
322367 * max_ind = 0 ;
323368 for (int i = 1 ; i < n ; i ++ ) {
324369 if (compare (& dptr [i ], & dptr [* max_ind ], arr ) > 0 ) {
@@ -333,7 +378,7 @@ argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
333378int
334379argmin (void * data , npy_intp n , npy_intp * min_ind , void * arr )
335380{
336- ss * dptr = (ss * )data ;
381+ npy_packed_static_string * dptr = (npy_packed_static_string * )data ;
337382 * min_ind = 0 ;
338383 for (int i = 1 ; i < n ; i ++ ) {
339384 if (compare (& dptr [i ], & dptr [* min_ind ], arr ) < 0 ) {
@@ -358,8 +403,8 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context),
358403{
359404 while (size -- ) {
360405 if (data != NULL ) {
361- ssfree (( ss * )data );
362- memset (data , 0 , sizeof (ss ));
406+ npy_string_free (( npy_packed_static_string * )data );
407+ memset (data , 0 , sizeof (npy_packed_static_string ));
363408 }
364409 data += stride ;
365410 }
@@ -388,9 +433,7 @@ stringdtype_fill_zero_loop(void *NPY_UNUSED(traverse_context),
388433 NpyAuxData * NPY_UNUSED (auxdata ))
389434{
390435 while (size -- ) {
391- if (ssnewlen ("" , 0 , (ss * )(data )) < 0 ) {
392- return -1 ;
393- }
436+ * (npy_packed_static_string * )(data ) = * NPY_EMPTY_STRING ;
394437 data += stride ;
395438 }
396439 return 0 ;
@@ -538,6 +581,9 @@ stringdtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
538581static void
539582stringdtype_dealloc (StringDTypeObject * self )
540583{
584+ Py_XDECREF (self -> na_object );
585+ npy_string_free (& self -> packed_default_string );
586+ npy_string_free (& self -> packed_na_name );
541587 PyArrayDescr_Type .tp_dealloc ((PyObject * )self );
542588}
543589
0 commit comments