Skip to content

Design primitive layout

Yonggang Luo edited this page Nov 28, 2024 · 19 revisions

Design primitive layout

  • Optimize lit_is_utf8_string_pair_magic by suffix-array
  • Optimize ecma_string_t with quickjs atom like things
#include <assert.h>
#include <stdint.h>

#ifndef JERRY_CPOINTER_32_BIT
#define JERRY_CPOINTER_32_BIT 0
#endif

#if JERRY_CPOINTER_32_BIT
typedef uint32_t lit_string_hash_t;
typedef uint32_t ecma_ref_t;
#else
typedef uint16_t lit_string_hash_t;
typedef uint16_t ecma_ref_t;
#endif

typedef struct
{
#if JERRY_CPOINTER_32_BIT
  ecma_ref_t type : 2;
  ecma_ref_t ascii : 1;
  ecma_ref_t refs : 29;
#else
  ecma_ref_t type : 2;
  ecma_ref_t ascii : 1;
  ecma_ref_t refs : 13;
#endif
} ecma_string_t;

#if JERRY_CPOINTER_32_BIT
static_assert (sizeof (ecma_string_t) == 4, "");
#else
static_assert (sizeof (ecma_string_t) == 2, "");
#endif

typedef enum
{
  ECMA_STRING_CONTAINER_SIZE8, /* 0 <= size <= 4, ASCII/UTF16 string, actual data is on the heap */
  ECMA_STRING_CONTAINER_SIZE16, /* 4 < size < 2^16, ASCII/UTF16 string, actual data is on the heap */
  ECMA_STRING_CONTAINER_SIZE32, /* < 2^16 <= size < 2^32, ASCII/UTF16 string, actual data is on the heap */
  ECMA_STRING_CONTAINER_EXTERNAL, /**< any size, ASCII/UTF16 string, actual data is allocated by external */
} ecma_string_container_t;

typedef struct
{
  /** type : 2 bit : ecma_string_container_t
      ascii : 1 bit : 1 means narrow string
      refs : 13 / 29 bit (max 8190 / 536870910) */
  ecma_string_t base;
  union
  {
    lit_string_hash_t hash;
    struct
    {
#if JERRY_CPOINTER_32_BIT
      /* The size,str[0..2] is hash */
      uint8_t size;
      uint8_t str[3];
#else
      /* The size,hash,str[0..0] plus hash is hash */
      uint8_t size : 3;
      uint8_t hash : 5;
      uint8_t str[5];
#endif
    } ascii;
    struct
    {
      /* The content of str[0..1] or str[0..0] is hash */
      /* size always be 1 */
      uint16_t str[2];
    } utf16;
  } u;
} ecma_string_size8_t;

static_assert (sizeof (ecma_string_size8_t) == 8, "");

typedef struct
{
  ecma_string_t header;
  lit_string_hash_t hash;

  uint16_t size;
  /**
   * Actual data of it's place in container (depending on 'ecma_string_container_t' field)
   */
  union
  {
#if JERRY_CPOINTER_32_BIT
    uint8_t ascii[6];
    uint16_t utf16[3];
#else
    uint8_t ascii[10];
    uint16_t utf16[5];
#endif
  } u;
} ecma_string_size16_t;
static_assert (sizeof (ecma_string_size16_t) == 16, "");

typedef struct
{
  ecma_string_t header;
  lit_string_hash_t hash;

  uint32_t size;
  /**
   * Actual data of it's place in container (depending on 'container' field)
   */
  union
  {
#if JERRY_CPOINTER_32_BIT
    uint8_t ascii[4];
    uint16_t utf16[2];
#else
    uint8_t ascii[8];
    uint16_t utf16[4];
#endif
  } u;
} ecma_string_size32_t;
static_assert (sizeof (ecma_string_size32_t) == 16, "");

typedef struct
{
  union
  {
    uint8_t *ascii;
    uint16_t *wide;
  } u;
  void *user_p; /**< user pointer passed to the callback when the string is freed */
} ecma_external_string_body_t;

typedef struct
{
  ecma_string_t header;
  lit_string_hash_t hash;
  uintptr_t size;
#if JERRY_CPOINTER_32_BIT && (UINTPTR_MAX == UINT32_MAX)
  ecma_external_string_body_t *body_ptr;
#else
  ecma_external_string_body_t body;
#endif
} ecma_external_string_t;

#if UINTPTR_MAX > UINT32_MAX
static_assert (sizeof (ecma_external_string_t) == 32, "");
#else
static_assert (sizeof (ecma_external_string_t) == 16, "");
#endif

typedef struct
{
  void *origin;
  uint8_t *str;
  uint32_t len;
} ecma_string_tracked_utf8_t;

typedef struct
{
  void *origin;
  uint8_t *str;
  uint32_t len;
} ecma_string_tracked_cesu8_t;

typedef struct
{
  void *origin;
  uint16_t *str;
  uint32_t len;
} ecma_string_tracked_utf16_t;

typedef enum
{
  ECMA_ATOM_TYPE_SYMBOL,
  ECMA_ATOM_TYPE_GLOBAL_SYMBOL,
  ECMA_ATOM_TYPE_STRING,
  ECMA_ATOM_TYPE_PRIVATE
} ecma_atom_type_t;

typedef struct
{
  uint32_t index : 29;
  ecma_ref_t type : 2; /* ecma_atom_type_t */
  uint32_t is_string : 1;
} ecma_atom_t;

ecma_atom_t

  • 4 ECMA_ATOM_NUMBER 31 bit unsigned integer

  • 1 ECMA_ATOM_SYMBOL

  • 1 ECMA_ATOM_GLOBAL_SYMBOL

  • 1 ECMA_ATOM_STRING

  • 1 ECMA_ATOM_PRIVATE

  • 1 ECMA_TYPE_INTEGER

    • directly encoded number value 30 bit signed integer
  • 1 ECMA_TYPE_DIRECT

    • directly encoded simple value, extend point for future
  • 1 ECMA_TYPE_FLOAT

    • pointer to a 64 bit floating point number (8 byte aligned)
  • 1 ECMA_TYPE_OBJECT

    • pointer to (object, function, extend point for future) (8 byte aligned)
  • 1 ECMA_TYPE_STRING

    • pointer to description of a string , that is ecma_string_t (8 byte aligned)
  • 1 ECMA_TYPE_SYMBOL

    • pointer to description of a symbol that is ecma_symbol_t (8 byte aligned)
  • 1 ECMA_TYPE_BIG_NUMBER

    • pointer to description of a bignumber that is ecma_bignumber_t (8 byte aligned)
      • The bignumber maybe bigint, bigfloat or bigdecimal
  • 1 ECMA_TYPE_ERROR

    • pointer to description of an error reference(exception) (only supported by C API)

ecma_value_direct_string_t

check if ecma_value_direct_string_t is ASCII string can be done with v & 0x808080

code point count calculate

0yyyzzzz0yyyzzzz0yyyzzzz (0+1)>>1=0 3
0yyyzzzz110xxxyy10yyzzzz (2+1)>>1=1 2
110xxxyy10yyzzzz0yyyzzzz (2+1)>>1=1 2
1110wwww10xxxxyy10yyzzzz (3+1)>>1=2 1


0yyyzzzz0yyyzzzz00000000 (0+1)>>1=0 2
110xxxyy10yyzzzz00000000 (2+1)>>1=1 1

brief testing command

clang-cl --target=i686-pc-windows-msvc -DJERRY_CPOINTER_32_BIT=0 -c test-api.c
clang-cl --target=x86_64-pc-windows-msvc -DJERRY_CPOINTER_32_BIT=0 -c test-api.c
clang-cl --target=i686-pc-windows-msvc -DJERRY_CPOINTER_32_BIT=1 -c test-api.c
clang-cl --target=x86_64-pc-windows-msvc -DJERRY_CPOINTER_32_BIT=1 -c test-api.c

Clone this wiki locally