22
33#include "static_string.h"
44
5- // defined this way so NPY_EMPTY_STRING has an in-memory representation that is
6- // distinct from a zero-filled struct, allowing us to use a NPY_NULL_STRING
7- // to represent a sentinel value
8- const npy_static_string NPY_EMPTY_STRING = {0 , "\0" };
9- const npy_static_string NPY_NULL_STRING = {0 , NULL };
5+ // Since this has no flags set, technically this is a heap-allocated string
6+ // with size zero practically, that doesn't matter because we always do size
7+ // checks before accessing heap data, but that may be confusing. The nice part
8+ // of this choice is a calloc'd array buffer (e.g. from np.empty) is filled
9+ // with empty elements for free
10+ const npy_static_string NPY_EMPTY_STRING = {
11+ .base = {.direct_buffer = {.flags_and_size = 0 , .buf = {0 }}}};
12+ // zero-filled, but with the NULL flag set to distinguish from empty string
13+ const npy_static_string NPY_NULL_STRING = {
14+ .base = {.direct_buffer = {.flags_and_size = NPY_STRING_MISSING ,
15+ .buf = {0 }}}};
16+
17+ int
18+ is_short_string (const npy_static_string * s )
19+ {
20+ unsigned char high_byte = s -> base .direct_buffer .flags_and_size ;
21+ return (high_byte & NPY_STRING_SHORT ) == NPY_STRING_SHORT ;
22+ }
23+
24+ int
25+ npy_string_isnull (const npy_static_string * s )
26+ {
27+ unsigned char high_byte = s -> base .direct_buffer .flags_and_size ;
28+ return (high_byte & NPY_STRING_MISSING ) == NPY_STRING_MISSING ;
29+ }
30+
31+ int
32+ is_not_a_vstring (const npy_static_string * s )
33+ {
34+ return is_short_string (s ) || npy_string_isnull (s );
35+ }
1036
1137int
1238npy_string_newsize (const char * init , size_t size , npy_static_string * to_init )
1339{
14- if (( to_init == NULL ) || (to_init -> buf != NULL ) ||
15- ( npy_string_size ( to_init ) != 0 ) ) {
40+ if (to_init == NULL || npy_string_size (to_init ) != 0 ||
41+ size > MAX_STRING_SIZE ) {
1642 return -2 ;
1743 }
1844
@@ -21,114 +47,128 @@ npy_string_newsize(const char *init, size_t size, npy_static_string *to_init)
2147 return 0 ;
2248 }
2349
24- char * ret_buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
25-
26- if (ret_buf == NULL ) {
27- return -1 ;
28- }
29-
30- to_init -> size = size ;
50+ if (size > NPY_SHORT_STRING_MAX_SIZE ) {
51+ char * ret_buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
3152
32- memcpy (ret_buf , init , size );
53+ if (ret_buf == NULL ) {
54+ return -1 ;
55+ }
3356
34- to_init -> buf = ret_buf ;
57+ to_init -> base . vstring . size = size ;
3558
36- return 0 ;
37- }
59+ memcpy (ret_buf , init , size );
3860
39- void
40- npy_string_free (npy_static_string * str )
41- {
42- if (str -> buf != NULL && str -> buf != NPY_EMPTY_STRING .buf ) {
43- PyMem_RawFree (str -> buf );
44- str -> buf = NULL ;
45- }
46- str -> size = 0 ;
47- }
48-
49- int
50- npy_string_dup (const npy_static_string * in , npy_static_string * out )
51- {
52- if (npy_string_isnull (in )) {
53- out -> size = 0 ;
54- out -> buf = NULL ;
55- return 0 ;
61+ to_init -> base .vstring .buf = ret_buf ;
5662 }
5763 else {
58- return npy_string_newsize (in -> buf , in -> size , out );
64+ // size can be no longer than 7 or 15, depending on CPU architecture
65+ // in either case, the size data is in at most the least significant 4
66+ // bits of the byte so it's safe to | with one of 0x10, 0x20, 0x40, or
67+ // 0x80.
68+ to_init -> base .direct_buffer .flags_and_size = NPY_STRING_SHORT | size ;
69+ memcpy (& (to_init -> base .direct_buffer .buf ), init , size );
5970 }
71+
72+ return 0 ;
6073}
6174
6275int
6376npy_string_newemptysize (size_t size , npy_static_string * out )
6477{
65- if (out -> size != 0 || out -> buf != NULL ) {
78+ if (out == NULL || npy_string_size ( out ) != 0 || size > MAX_STRING_SIZE ) {
6679 return -2 ;
6780 }
6881
69- out -> size = size ;
70-
7182 if (size == 0 ) {
7283 * out = NPY_EMPTY_STRING ;
7384 return 0 ;
7485 }
7586
76- char * buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
87+ if (size > NPY_SHORT_STRING_MAX_SIZE ) {
88+ char * buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
7789
78- if (buf == NULL ) {
79- return -1 ;
80- }
90+ if (buf == NULL ) {
91+ return -1 ;
92+ }
8193
82- out -> buf = buf ;
94+ out -> base .vstring .buf = buf ;
95+ out -> base .vstring .size = size ;
96+ }
97+ else {
98+ out -> base .direct_buffer .flags_and_size = NPY_STRING_SHORT | size ;
99+ }
83100
84101 return 0 ;
85102}
86103
104+ void
105+ npy_string_free (npy_static_string * str )
106+ {
107+ if (is_not_a_vstring (str )) {
108+ // zero out
109+ memcpy (str , & NPY_EMPTY_STRING , sizeof (npy_static_string ));
110+ }
111+ else {
112+ if (str -> base .vstring .size != 0 ) {
113+ PyMem_RawFree (str -> base .vstring .buf );
114+ }
115+ str -> base .vstring .buf = NULL ;
116+ str -> base .vstring .size = 0 ;
117+ }
118+ }
119+
120+ int
121+ npy_string_dup (const npy_static_string * in , npy_static_string * out )
122+ {
123+ if (npy_string_isnull (in )) {
124+ * out = NPY_NULL_STRING ;
125+ return 0 ;
126+ }
127+
128+ return npy_string_newsize (npy_string_buf (in ), npy_string_size (in ), out );
129+ }
130+
87131int
88132npy_string_cmp (const npy_static_string * s1 , const npy_static_string * s2 )
89133{
90- size_t minsize = s1 -> size < s2 -> size ? s1 -> size : s2 -> size ;
134+ size_t s1_size = npy_string_size (s1 );
135+ size_t s2_size = npy_string_size (s2 );
136+
137+ char * s1_buf = npy_string_buf (s1 );
138+ char * s2_buf = npy_string_buf (s2 );
91139
92- int cmp = strncmp (s1 -> buf , s2 -> buf , minsize );
140+ size_t minsize = s1_size < s2_size ? s1_size : s2_size ;
141+
142+ int cmp = strncmp (s1_buf , s2_buf , minsize );
93143
94144 if (cmp == 0 ) {
95- if (s1 -> size > minsize ) {
145+ if (s1_size > minsize ) {
96146 return 1 ;
97147 }
98- if (s2 -> size > minsize ) {
148+ if (s2_size > minsize ) {
99149 return -1 ;
100150 }
101151 }
102152
103153 return cmp ;
104154}
105155
106- int
107- npy_string_isnull (const npy_static_string * in )
108- {
109- if (in -> size == 0 && in -> buf == NULL ) {
110- return 1 ;
111- }
112- return 0 ;
113- }
114-
115156size_t
116157npy_string_size (const npy_static_string * s )
117158{
118- return s -> size ;
159+ if (is_short_string (s )) {
160+ unsigned char high_byte = s -> base .direct_buffer .flags_and_size ;
161+ return high_byte & NPY_SHORT_STRING_SIZE_MASK ;
162+ }
163+ return s -> base .vstring .size ;
119164}
120165
121166char *
122167npy_string_buf (const npy_static_string * s )
123168{
124- return s -> buf ;
125- }
126-
127- int
128- npy_string_size_and_buf (const npy_static_string * s , size_t * size , char * * buf )
129- {
130- * size = s -> size ;
131- * buf = s -> buf ;
132-
133- return 0 ;
169+ if (is_short_string (s )) {
170+ // the cast drops const, is there a better way?
171+ return (char * )& s -> base .direct_buffer .buf [0 ];
172+ }
173+ return s -> base .vstring .buf ;
134174}
0 commit comments