@@ -44,11 +44,14 @@ typedef union _npy_static_string_u {
4444#define NPY_STRING_SHORT 0x40 // 0100 0000
4545#define NPY_STRING_ARENA_FREED 0x20 // 0010 0000
4646#define NPY_STRING_ON_HEAP 0x10 // 0001 0000
47+ #define NPY_STRING_MEDIUM 0x08 // 0000 1000
48+ #define NPY_STRING_FLAG_MASK 0xF8 // 1111 1000
4749
4850// short string sizes fit in a 4-bit integer
4951#define NPY_SHORT_STRING_SIZE_MASK 0x0F // 0000 1111
5052#define NPY_SHORT_STRING_MAX_SIZE \
51- (sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
53+ (sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
54+ #define NPY_MEDIUM_STRING_MAX_SIZE 0xFF // 256
5255
5356// Since this has no flags set, technically this is a heap-allocated string
5457// with size zero. Practically, that doesn't matter because we always do size
@@ -86,8 +89,7 @@ struct npy_string_allocator {
8689void
8790set_vstring_size (_npy_static_string_u * str , size_t size )
8891{
89- unsigned char * flags = & str -> direct_buffer .flags_and_size ;
90- unsigned char current_flags = * flags & ~NPY_SHORT_STRING_SIZE_MASK ;
92+ unsigned char current_flags = str -> direct_buffer .flags_and_size ;
9193 str -> vstring .size = size ;
9294 str -> direct_buffer .flags_and_size = current_flags ;
9395}
@@ -110,9 +112,13 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
110112 size_t size )
111113{
112114 // one extra size_t to store the size of the allocation
113- size_t string_storage_size = size + sizeof (size_t );
114- // expand size to nearest multiple of 8 bytes to ensure 64 bit alignment
115- string_storage_size += (8 - string_storage_size % 8 );
115+ size_t string_storage_size ;
116+ if (size <= NPY_MEDIUM_STRING_MAX_SIZE ) {
117+ string_storage_size = size + sizeof (unsigned char );
118+ }
119+ else {
120+ string_storage_size = size + sizeof (size_t );
121+ }
116122 if ((arena -> size - arena -> cursor ) <= string_storage_size ) {
117123 // realloc the buffer so there is enough room
118124 // first guess is to double the size of the buffer
@@ -130,7 +136,7 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
130136 // doubling the current size isn't enough
131137 newsize = 2 * (arena -> cursor + size );
132138 }
133- // realloc passed a NULL pointer acts like malloc
139+ // passing a NULL buffer to realloc is the same as malloc
134140 char * newbuf = r (arena -> buffer , newsize );
135141 if (newbuf == NULL ) {
136142 return NULL ;
@@ -139,9 +145,18 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
139145 arena -> buffer = newbuf ;
140146 arena -> size = newsize ;
141147 }
142- size_t * size_loc = (size_t * )& arena -> buffer [arena -> cursor ];
143- * size_loc = size ;
144- char * ret = & arena -> buffer [arena -> cursor + sizeof (size_t )];
148+ char * ret ;
149+ if (size <= NPY_MEDIUM_STRING_MAX_SIZE ) {
150+ unsigned char * size_loc =
151+ (unsigned char * )& arena -> buffer [arena -> cursor ];
152+ * size_loc = size ;
153+ ret = & arena -> buffer [arena -> cursor + sizeof (char )];
154+ }
155+ else {
156+ char * size_ptr = (char * )& arena -> buffer [arena -> cursor ];
157+ memcpy (size_ptr , & size , sizeof (size_t ));
158+ ret = & arena -> buffer [arena -> cursor + sizeof (size_t )];
159+ }
145160 arena -> cursor += string_storage_size ;
146161 return ret ;
147162}
@@ -207,6 +222,15 @@ is_short_string(const npy_packed_static_string *s)
207222 return has_short_flag && !has_on_heap_flag ;
208223}
209224
225+ int
226+ is_medium_string (const _npy_static_string_u * s )
227+ {
228+ unsigned char high_byte = s -> direct_buffer .flags_and_size ;
229+ int has_short_flag = (high_byte & NPY_STRING_SHORT );
230+ int has_medium_flag = (high_byte & NPY_STRING_MEDIUM );
231+ return (!has_short_flag && has_medium_flag );
232+ }
233+
210234int
211235npy_string_isnull (const npy_packed_static_string * s )
212236{
@@ -286,10 +310,19 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
286310 if (buf == NULL ) {
287311 return NULL ;
288312 }
289- size_t alloc_size = * ((size_t * )(buf - 1 ));
313+ size_t alloc_size ;
314+ if (is_medium_string (to_init_u )) {
315+ // stored in a char so direct access is OK
316+ alloc_size = (size_t ) * (buf - 1 );
317+ }
318+ else {
319+ // not necessarily memory-aligned, so need to use memcpy
320+ size_t * size_loc = (size_t * )((uintptr_t )buf - sizeof (size_t ));
321+ memcpy (& alloc_size , size_loc , sizeof (size_t ));
322+ }
290323 if (size <= alloc_size ) {
291324 // we have room!
292- * flags = NPY_STRING_ARENA_FREED ;
325+ * flags &= ~ NPY_STRING_ARENA_FREED ;
293326 return buf ;
294327 }
295328 else {
@@ -316,8 +349,12 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
316349 if (arena == NULL ) {
317350 return NULL ;
318351 }
319- return npy_string_arena_malloc (arena , allocator -> realloc ,
320- sizeof (char ) * size );
352+ char * ret = npy_string_arena_malloc (arena , allocator -> realloc ,
353+ sizeof (char ) * size );
354+ if (size < NPY_MEDIUM_STRING_MAX_SIZE ) {
355+ * flags |= NPY_STRING_MEDIUM ;
356+ }
357+ return ret ;
321358}
322359
323360int
0 commit comments