Skip to content

Design primitive layout

Yonggang Luo edited this page Nov 27, 2024 · 19 revisions

Design primitive layout

  • Optimize lit_is_utf8_string_pair_magic by suffix-array
  • Optimize ecma_string_t with quickjs atom like things
#include <assert.h>
#include <stdint.h>

typedef uint32_t lit_string_hash_t;

typedef enum
{
  /**
   * For small string, all string content are resident in ecma_string_t
   */
  ECMA_STRING_CONTAINER_SIZE8_ASCII, /* 0 <= size < 128, ASCII string, actual data is on the heap */
  ECMA_STRING_CONTAINER_SIZE32_ASCII, /**< 128 <= size < 2^32, actual data is on the heap */
  ECMA_STRING_CONTAINER_SIZE32_UTF16, /**< 128 <= size < 2^32, actual data is on the heap */
  ECMA_STRING_CONTAINER_EXTERNAL_ASCII, /**< any size, actual data is allocated by external */
  ECMA_STRING_CONTAINER_EXTERNAL_UTF16, /**< any size, actual data is allocated by external */
} ecma_string_container_t;
typedef struct
{
  uint32_t container : 3;
  /** Reference counter for the string */
  uint32_t refs : 29;
  lit_string_hash_t hash; /**< hash of the ASCII/UTF16 string */
} ecma_string_header_t;

typedef struct
{
  ecma_string_header_t header;

  /**
   * Actual data of it's place in container (depending on 'container' field)
   */
  union
  {
    struct
    {
      uint8_t size;
      uint8_t ascii[7];
    } size8;

    struct
    {
      uint32_t size;
      union
      {
        uint8_t ascii[4];
        uint16_t wide[2];
      } u;
    } size32;
  } u;
} ecma_string_t;

#if UINTPTR_MAX > UINT32_MAX
static_assert (sizeof (ecma_string_t) == 16, "");
#else
static_assert (sizeof (ecma_string_t) == 16, "");
#endif

typedef struct
{
  union
  {
    uint8_t *ascii;
    uint16_t *wide;
  } u;
  void *user_p; /**< user pointer passed to the callback when the string is freed */
} ecma_external_string_body_t;

typedef struct
{
  ecma_string_header_t header;

  /**
   * Actual data of it's place in container (depending on 'container' field)
   */
  uintptr_t size;
#if UINTPTR_MAX > UINT32_MAX
  ecma_external_string_body_t body;
#else
  ecma_external_string_body_t *body_ptr;
#endif
} ecma_external_string_t;

#if UINTPTR_MAX > UINT32_MAX
static_assert (sizeof (ecma_external_string_t) == 32, "");
#else
static_assert (sizeof (ecma_string_t) == 16, "");
#endif
/**
 * @brief testing command
 * clang-cl --target=i686-pc-windows-msvc -c test-api.c
 * clang-cl --target=x86_64-pc-windows-msvc -c test-api.c
 */
  • 8 ECMA_TYPE_INTEGER
    • directly encoded number value 31 bit signed integer
  • 2 ECMA_TYPE_FLOAT
    • pointer to a 64 bit floating point number (8 byte aligned)
  • 2 ECMA_TYPE_POINTER
    • pointer to (object, function, bigint, exception, extend point for future) (8 byte aligned)
  • 1 ECMA_TYPE_STRING
    • pointer to description of a string , that is ecma_string_t, the length > 3, (16 byte aligned)
  • 1 ECMA_TYPE_SYMBOL
    • pointer to description of a symbol that is ecma_string_t, the length > 3, (16 byte aligned)
  • 1 ECMA_TYPE_DIRECT
    • directly encoded string/symbol value(0 <= size<= 3), there is a bit to show if it a integer(size<= 3) string
    • directly encoded magic symbol/string value(size> 3)
    • directly encoded simple value
  • 1 ECMA_TYPE_BIGINT
    • directly encoded bigint value 28 bit signed integer

ecma_value_direct_string_t

check if ecma_value_direct_string_t is ASCII string can be done with v & 0x808080

struct
{
  uint8_t c1;
  uint8_t c2;
  uint8_t c3;
  uint8_t size:2; /* size of CESU8 string */
  /* UTF16 code point count can be calculated with size - ((popcount(v & 0x808080) + 1) >> 1) */
  uint8_t is_integer: 1;
  uint8_t is_symbol: 1;
  uint8_t type:4;
} ecma_value_direct_string_t;
struct
{
  uint32_t id:26;
  uint32_t is_ascii: 1; /* When is_ascii==true && size < 0, means it's integer string */
  uint32_t is_symbol: 1;
  uint32_t type:4;
} ecma_value_magic_string_t;

code point count calculate

0yyyzzzz0yyyzzzz0yyyzzzz (0+1)>>1=0 3
0yyyzzzz110xxxyy10yyzzzz (2+1)>>1=1 2
110xxxyy10yyzzzz0yyyzzzz (2+1)>>1=1 2
1110wwww10xxxxyy10yyzzzz (3+1)>>1=2 1


0yyyzzzz0yyyzzzz00000000 (0+1)>>1=0 2
110xxxyy10yyzzzz00000000 (2+1)>>1=1 1

Clone this wiki locally