From 5ac5eade65dcf3d6c2616fa19d44e7d395b9320f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+ndossche@users.noreply.github.com> Date: Thu, 6 Nov 2025 21:17:58 +0100 Subject: [PATCH] [PoC] support sorting packed arrays without converting to hashed arrays first --- Zend/zend_hash.c | 99 +++++++++++++++++++++++++++----------------- ext/standard/array.c | 6 +-- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 6064b4221832..51db806944cb 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -2966,19 +2966,13 @@ ZEND_API void zend_hash_bucket_renum_swap(Bucket *p, Bucket *q) q->val = val; } -ZEND_API void zend_hash_bucket_packed_swap(Bucket *p, Bucket *q) +ZEND_API void zend_hash_packed_renum_swap(zval *p, zval *q) { zval val; - zend_ulong h; - - val = p->val; - h = p->h; - p->val = q->val; - p->h = q->h; - - q->val = val; - q->h = h; + val = *p; + *p = *q; + *q = val; } static void zend_hash_sort_internal(HashTable *ht, sort_func_t sort, bucket_compare_func_t compar, bool renumber) @@ -2993,30 +2987,47 @@ static void zend_hash_sort_internal(HashTable *ht, sort_func_t sort, bucket_comp return; } - if (HT_IS_PACKED(ht)) { - zend_hash_packed_to_hash(ht); // TODO: ??? - } - if (HT_IS_WITHOUT_HOLES(ht)) { /* Store original order of elements in extra space to allow stable sorting. */ - for (i = 0; i < ht->nNumUsed; i++) { - Z_EXTRA(ht->arData[i].val) = i; + if (HT_IS_PACKED(ht)) { + for (i = 0; i < ht->nNumUsed; i++) { + Z_EXTRA(ht->arPacked[i]) = i; + } + } else { + for (i = 0; i < ht->nNumUsed; i++) { + Z_EXTRA(ht->arData[i].val) = i; + } } } else { /* Remove holes and store original order. */ - for (j = 0, i = 0; j < ht->nNumUsed; j++) { - p = ht->arData + j; - if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) continue; - if (i != j) { - ht->arData[i] = *p; + if (HT_IS_PACKED(ht)) { + for (j = 0, i = 0; j < ht->nNumUsed; j++) { + zval *zv = ht->arPacked + j; + if (UNEXPECTED(Z_TYPE_P(zv) == IS_UNDEF)) continue; + if (i != j) { + ht->arPacked[i] = *zv; + } + Z_EXTRA(ht->arPacked[i]) = i; + i++; + } + } else { + for (j = 0, i = 0; j < ht->nNumUsed; j++) { + p = ht->arData + j; + if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) continue; + if (i != j) { + ht->arData[i] = *p; + } + Z_EXTRA(ht->arData[i].val) = i; + i++; } - Z_EXTRA(ht->arData[i].val) = i; - i++; } ht->nNumUsed = i; } - if (!HT_IS_PACKED(ht)) { + if (HT_IS_PACKED(ht)) { + sort((void *)ht->arPacked, ht->nNumUsed, sizeof(zval), (compare_func_t) compar, + (swap_func_t)zend_hash_packed_renum_swap); + } else { /* We broke the hash collisions chains overriding Z_NEXT() by Z_EXTRA(). * Reset the hash headers table as well to avoid possible inconsistent * access on recursive data structures. @@ -3024,21 +3035,22 @@ static void zend_hash_sort_internal(HashTable *ht, sort_func_t sort, bucket_comp * See Zend/tests/bug63882_2.phpt */ HT_HASH_RESET(ht); - } - sort((void *)ht->arData, ht->nNumUsed, sizeof(Bucket), (compare_func_t) compar, - (swap_func_t)(renumber? zend_hash_bucket_renum_swap : - (HT_IS_PACKED(ht) ? zend_hash_bucket_packed_swap : zend_hash_bucket_swap))); + sort((void *)ht->arData, ht->nNumUsed, sizeof(Bucket), (compare_func_t) compar, + (swap_func_t)(renumber? zend_hash_bucket_renum_swap : zend_hash_bucket_swap)); + } ht->nInternalPointer = 0; - +// TODO: simpify some conditions prolly if (renumber) { - for (j = 0; j < i; j++) { - p = ht->arData + j; - p->h = j; - if (p->key) { - zend_string_release(p->key); - p->key = NULL; + if (!HT_IS_PACKED(ht)) { + for (j = 0; j < i; j++) { + p = ht->arData + j; + p->h = j; + if (p->key) { + zend_string_release(p->key); + p->key = NULL; + } } } @@ -3046,7 +3058,16 @@ static void zend_hash_sort_internal(HashTable *ht, sort_func_t sort, bucket_comp } if (HT_IS_PACKED(ht)) { if (!renumber) { + /* Necessary to allow CoW violation due to the refcount increase in zend_array_sort() */ +#if ZEND_DEBUG + uint32_t keep_flags = HT_FLAGS(ht) & HASH_FLAG_ALLOW_COW_VIOLATION; + HT_ALLOW_COW_VIOLATION(ht); +#endif zend_hash_packed_to_hash(ht); +#if ZEND_DEBUG + HT_FLAGS(ht) &= ~HASH_FLAG_ALLOW_COW_VIOLATION; + HT_FLAGS(ht) |= keep_flags; +#endif } } else { if (renumber) { @@ -3076,6 +3097,11 @@ static void zend_hash_sort_internal(HashTable *ht, sort_func_t sort, bucket_comp ZEND_API void ZEND_FASTCALL zend_hash_sort_ex(HashTable *ht, sort_func_t sort, bucket_compare_func_t compar, bool renumber) { HT_ASSERT_RC1(ht); + + if (!renumber && HT_IS_PACKED(ht)) { + zend_hash_packed_to_hash(ht); + } + zend_hash_sort_internal(ht, sort, compar, renumber); } @@ -3083,8 +3109,7 @@ ZEND_API void ZEND_FASTCALL zend_array_sort_ex(HashTable *ht, sort_func_t sort, { HT_ASSERT_RC1(ht); - /* Unpack the array early to avoid RCn assertion failures. */ - if (HT_IS_PACKED(ht)) { + if (!renumber && HT_IS_PACKED(ht)) { zend_hash_packed_to_hash(ht); } diff --git a/ext/standard/array.c b/ext/standard/array.c index 9f7709092dde..28c5ed3c482d 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -285,15 +285,15 @@ static zend_always_inline int php_array_key_compare_string_locale_unstable_i(Buc static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucket *s) /* {{{ */ { - int result = zend_compare(&f->val, &s->val); + zval *lhs = (zval *) f; + zval *rhs = (zval *) s; + int result = zend_compare(lhs, rhs); /* Special enums handling for array_unique. We don't want to add this logic to zend_compare as * that would be observable via comparison operators. */ - zval *rhs = &s->val; ZVAL_DEREF(rhs); if (UNEXPECTED(Z_TYPE_P(rhs) == IS_OBJECT) && result == ZEND_UNCOMPARABLE && (Z_OBJCE_P(rhs)->ce_flags & ZEND_ACC_ENUM)) { - zval *lhs = &f->val; ZVAL_DEREF(lhs); if (Z_TYPE_P(lhs) == IS_OBJECT && (Z_OBJCE_P(lhs)->ce_flags & ZEND_ACC_ENUM)) { // Order doesn't matter, we just need to group the same enum values