@@ -49,6 +49,9 @@ typedef union _npy_static_string_u {
4949#define NPY_SHORT_STRING_SIZE_MASK 0x0F // 0000 1111
5050#define NPY_SHORT_STRING_MAX_SIZE \
5151 (sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
52+ // one bit is used to signal a medium string
53+ #define NPY_MEDIUM_STRING_MAX_SIZE 0x7F // 0111 1111 or 127
54+ #define NPY_MEDIUM_STRING_FLAG 0x80 // 1000 0000
5255
5356// Since this has no flags set, technically this is a heap-allocated string
5457// with size zero. Practically, that doesn't matter because we always do size
@@ -110,9 +113,13 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
110113 size_t size )
111114{
112115 // one extra size_t to store the size of the allocation
113- size_t string_storage_size = size + sizeof (size_t );
114- // expand size to nearest multiple of 8 bytes to ensure 64 bit alignment
115- string_storage_size += (8 - string_storage_size % 8 );
116+ size_t string_storage_size ;
117+ if (size <= NPY_MEDIUM_STRING_MAX_SIZE ) {
118+ string_storage_size = size + sizeof (char );
119+ }
120+ else {
121+ string_storage_size = size + sizeof (size_t );
122+ }
116123 if ((arena -> size - arena -> cursor ) <= string_storage_size ) {
117124 // realloc the buffer so there is enough room
118125 // first guess is to double the size of the buffer
@@ -130,7 +137,7 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
130137 // doubling the current size isn't enough
131138 newsize = 2 * (arena -> cursor + size );
132139 }
133- // realloc passed a NULL pointer acts like malloc
140+ // passing a NULL buffer to realloc is the same as malloc
134141 char * newbuf = r (arena -> buffer , newsize );
135142 if (newbuf == NULL ) {
136143 return NULL ;
@@ -139,9 +146,17 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
139146 arena -> buffer = newbuf ;
140147 arena -> size = newsize ;
141148 }
142- size_t * size_loc = (size_t * )& arena -> buffer [arena -> cursor ];
143- * size_loc = size ;
144- char * ret = & arena -> buffer [arena -> cursor + sizeof (size_t )];
149+ char * ret ;
150+ if (size <= NPY_MEDIUM_STRING_MAX_SIZE ) {
151+ char * size_loc = (char * )& arena -> buffer [arena -> cursor ];
152+ * size_loc = size | NPY_MEDIUM_STRING_FLAG ;
153+ ret = & arena -> buffer [arena -> cursor + sizeof (char )];
154+ }
155+ else {
156+ size_t * size_ptr = (size_t * )& arena -> buffer [arena -> cursor ];
157+ memcpy (size_ptr , & size , sizeof (size_t ));
158+ ret = & arena -> buffer [arena -> cursor + sizeof (size_t )];
159+ }
145160 arena -> cursor += string_storage_size ;
146161 return ret ;
147162}
@@ -207,6 +222,12 @@ is_short_string(const npy_packed_static_string *s)
207222 return has_short_flag && !has_on_heap_flag ;
208223}
209224
225+ int
226+ is_medium_string (const char * buf )
227+ {
228+ return ((buf [0 ] & NPY_MEDIUM_STRING_FLAG ) != 0 );
229+ }
230+
210231int
211232npy_string_isnull (const npy_packed_static_string * s )
212233{
@@ -286,7 +307,15 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
286307 if (buf == NULL ) {
287308 return NULL ;
288309 }
289- size_t alloc_size = * ((size_t * )(buf - 1 ));
310+ size_t alloc_size ;
311+ if (is_medium_string (buf )) {
312+ // stored in a char so direct access is OK
313+ alloc_size = (size_t ) * (buf - 1 ) & ~NPY_MEDIUM_STRING_FLAG ;
314+ }
315+ else {
316+ // not necessarily memory-aligned, so need to use memcpy
317+ memcpy (& alloc_size , ((size_t * )buf - 1 ), sizeof (size_t ));
318+ }
290319 if (size <= alloc_size ) {
291320 // we have room!
292321 * flags = NPY_STRING_ARENA_FREED ;
0 commit comments