Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ ggml-metal.metal
News*.json
sqlite-lembed.h
dist/

tests/__pycache__/
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ include_directories(${SQLITE_AMALGAMATION_DIR})

add_library(sqlite_lembed SHARED sqlite-lembed.c)
add_dependencies(sqlite_lembed sqlite_amalgamation)
target_link_libraries(sqlite_lembed ggml_static llama)
target_link_libraries(sqlite_lembed ggml llama)
target_include_directories(sqlite_lembed PRIVATE ${LLAMA_CPP_DIR})
set_target_properties(sqlite_lembed PROPERTIES PREFIX "")
set_target_properties(sqlite_lembed PROPERTIES OUTPUT_NAME "lembed0")
Expand Down
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ CFLAGS+=-framework Accelerate -framework Foundation -framework Metal -framework
LLAMA_CMAKE_FLAGS+=-DLLAMA_METAL=0
endif

ifeq ($(shell uname -sm),Darwin x86_64)
LLAMA_CMAKE_FLAGS+=-DCMAKE_OSX_ARCHITECTURES=x86_64
endif
ifeq ($(shell uname -sm),Darwin arm64)
LLAMA_CMAKE_FLAGS+=-DCMAKE_OSX_ARCHITECTURES=arm64
endif

ifdef CONFIG_LINUX
LOADABLE_EXTENSION=so
LLAMA_CMAKE_FLAGS+=-DCMAKE_POSITION_INDEPENDENT_CODE=ON
Expand Down
60 changes: 28 additions & 32 deletions sqlite-lembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static void normalize(float *vec, float *out, int n) {
int tokenize(struct llama_model *model, const char *input, size_t input_length,
int *token_count, llama_token **tokens) {
int input_token_count_estimate =
llama_tokenize(model, input, input_length, NULL, 0, true, true);
llama_tokenize(llama_model_get_vocab(model), input, input_length, NULL, 0, true, true);
if (input_token_count_estimate >= 0) {
return SQLITE_ERROR;
}
Expand All @@ -41,7 +41,7 @@ int tokenize(struct llama_model *model, const char *input, size_t input_length,
return SQLITE_NOMEM;
}
int input_token_count =
llama_tokenize(model, input, input_length, *tokens,
llama_tokenize(llama_model_get_vocab(model), input, input_length, *tokens,
abs(input_token_count_estimate), true, true);
if (input_token_count != abs(input_token_count_estimate)) {
sqlite3_free(*tokens);
Expand All @@ -59,7 +59,7 @@ int embed_single(struct llama_model *model, struct llama_context *context,
/** Output embedding length (n dimensions) */
int *out_dimensions) {
int n_batch = 512;
int n_ctx_train = llama_n_ctx_train(model);
int n_ctx_train = llama_model_n_ctx_train(model);
int n_ctx = llama_n_ctx(context);

llama_token *tokens;
Expand All @@ -85,14 +85,14 @@ int embed_single(struct llama_model *model, struct llama_context *context,
batch.n_tokens++;
}

int dimensions = llama_n_embd(model);
int dimensions = llama_model_n_embd(model);
float *output_embedding = sqlite3_malloc(sizeof(float) * dimensions);
if(!output_embedding) {
llama_batch_free(batch);
return SQLITE_NOMEM;
}

llama_kv_cache_clear(context); // KV not needed for embeddings?
llama_memory_clear(llama_get_memory(context), false); // KV not needed for embeddings?
rc = llama_decode(context, batch);
if(rc != 0) {
sqlite3_free(output_embedding);
Expand Down Expand Up @@ -143,8 +143,9 @@ void api_free(void *p) {
typedef struct lembed_model_options lembed_model_options;
struct lembed_model_options {
int32_t n_gpu_layers;
uint32_t seed;

int8_t defined[1];
int8_t defined[2];
};
static char *POINTER_NAME_MODEL = "lembed_model";
static char *POINTER_NAME_MODEL_OPTIONS = "lembed_model_options";
Expand Down Expand Up @@ -174,6 +175,11 @@ static void lembed_model_options_(sqlite3_context *context, int argc,
if (sqlite3_stricmp(k, "n_gpu_layers") == 0) {
o->n_gpu_layers = sqlite3_value_int(value);
o->defined[0] = 1;
} else if (sqlite3_stricmp("seed", k) == 0) {
sqlite3_int64 v = sqlite3_value_int64(value);
assert(v > 0);
o->seed = v;
o->defined[1] = 1;
} else {
abort();
}
Expand All @@ -183,12 +189,11 @@ static void lembed_model_options_(sqlite3_context *context, int argc,

typedef struct lembed_context_options lembed_context_options;
struct lembed_context_options {
uint32_t seed;
uint32_t n_ctx;
enum llama_rope_scaling_type rope_scaling_type;
float rope_freq_scale;

int8_t defined[4];
int8_t defined[3];
};
static char *POINTER_NAME_CONTEXT_OPTIONS = "lembed_context_options";

Expand All @@ -205,16 +210,11 @@ static void lembed_context_options_(sqlite3_context *context, int argc,
sqlite3_value *value = argv[i + 1];
assert(sqlite3_value_type(key) == SQLITE_TEXT);
const char *k = (const char *)sqlite3_value_text(key);
if (sqlite3_stricmp("seed", k) == 0) {
sqlite3_int64 v = sqlite3_value_int64(value);
assert(v > 0);
o->seed = v;
o->defined[0] = 1;
} else if (sqlite3_stricmp("n_ctx", k) == 0) {
if (sqlite3_stricmp("n_ctx", k) == 0) {
sqlite3_int64 v = sqlite3_value_int64(value);
assert(v > 0);
o->n_ctx = v;
o->defined[1] = 1;
o->defined[0] = 1;
} else if (sqlite3_stricmp("rope_scaling_type", k) == 0) {
const char *v = (const char *)sqlite3_value_text(value);
if (sqlite3_stricmp(v, "none")) {
Expand All @@ -227,10 +227,10 @@ static void lembed_context_options_(sqlite3_context *context, int argc,
abort();
}

o->defined[2] = 1;
o->defined[1] = 1;
} else if (sqlite3_stricmp(k, "rope_freq_scale") == 0) {
o->rope_freq_scale = sqlite3_value_double(value);
o->defined[3] = 1;
o->defined[2] = 1;
} else {
abort();
}
Expand All @@ -249,8 +249,7 @@ static void lembed_model_from_file(sqlite3_context *context, int argc,
}


static void _static_text_func(sqlite3_context *context, int argc,
sqlite3_value **argv) {
static void _static_text_func(sqlite3_context *context, int argc, sqlite3_value **argv) {
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
sqlite3_result_text(context, sqlite3_user_data(context), -1, SQLITE_STATIC);
Expand Down Expand Up @@ -347,7 +346,7 @@ static void lembed_token_score(sqlite3_context *context, int argc,

int32_t token = sqlite3_value_int(argv[1]);

float score = llama_token_get_score(model, token);
float score = llama_vocab_get_score(llama_model_get_vocab(model), token);
sqlite3_result_double(context, score);
}
static void lembed_token_to_piece_(sqlite3_context *context, int argc,
Expand All @@ -360,7 +359,7 @@ static void lembed_token_to_piece_(sqlite3_context *context, int argc,
int32_t token = sqlite3_value_int(argv[1]);
#define BUFLEN 256
char buf[BUFLEN];
int n = llama_token_to_piece(model, token, buf, BUFLEN, false);
int n = llama_token_to_piece(llama_model_get_vocab(model), token, buf, BUFLEN, 0, false);
if (n) {
sqlite3_result_text(context, buf, n, SQLITE_TRANSIENT);
} else {
Expand Down Expand Up @@ -470,7 +469,7 @@ static int lembed_modelsUpdate(sqlite3_vtab *pVTab, int argc,
mparams.n_gpu_layers = modelOptions->n_gpu_layers;
}

model = llama_load_model_from_file(modelPath, mparams);
model = llama_model_load_from_file(modelPath, mparams);
if (!model) {
return SQLITE_ERROR;
}
Expand All @@ -480,22 +479,19 @@ static int lembed_modelsUpdate(sqlite3_vtab *pVTab, int argc,
cparams.embeddings = 1;
if (contextOptions) {
if (contextOptions->defined[0]) {
cparams.seed = contextOptions->seed;
}
if (contextOptions->defined[1]) {
cparams.n_ctx = contextOptions->n_ctx;
}
if (contextOptions->defined[2]) {
if (contextOptions->defined[1]) {
cparams.rope_scaling_type = contextOptions->rope_scaling_type;
}
if (contextOptions->defined[3]) {
if (contextOptions->defined[2]) {
cparams.rope_freq_scale = contextOptions->rope_freq_scale;
}
}

ctx = llama_new_context_with_model(model, cparams);
ctx = llama_init_from_model(model, cparams);
if (!ctx) {
llama_free_model(model);
llama_model_free(model);
return SQLITE_ERROR;
}
p->api->models[idx].model = model;
Expand Down Expand Up @@ -742,7 +738,7 @@ static int lembed_chunksFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
for (int j = 0; j < chunk_size; j++) {
int32_t token = tokens[i * chunk_size + j];
int32_t piece_len_neg =
llama_token_to_piece(model, token, NULL, 0, false);
llama_token_to_piece(llama_model_get_vocab(model), token, NULL, 0, 0, false);
// printf("%d\n", piece_len_neg);
// assert(piece_len_neg < 0);
int32_t piece_len = abs(piece_len_neg);
Expand All @@ -753,7 +749,7 @@ static int lembed_chunksFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,

char *piece = sqlite3_malloc(piece_len);
assert(piece);
llama_token_to_piece(model, token, piece, piece_len, false);
llama_token_to_piece(llama_model_get_vocab(model), token, piece, piece_len, 0, false);
// printf("'%.*s' %d ", piece_len, piece, tokens[i*chunk_size + j]);

char *begin = ptr;
Expand Down Expand Up @@ -918,4 +914,4 @@ __declspec(dllexport)
sqlite3_create_module_v2(db, "lembed_chunks", &lembed_chunksModule, a, NULL);
sqlite3_create_module_v2(db, "lembed_models", &lembed_modelsModule, a, NULL);
return SQLITE_OK;
}
}
4 changes: 2 additions & 2 deletions tests/test-loadable.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_lembed():
a = lembed("aaa", "alex garcia")
assert len(a) == (384 * 4)
assert struct.unpack("1f", a[0:4])[0] == pytest.approx(
-0.09205757826566696, rel=1e-2
0.002983473241329193, rel=1e-2
)

with _raises(
Expand All @@ -138,7 +138,7 @@ def test_lembed():
a = lembed("alex garcia")
assert len(a) == (384 * 4)
assert struct.unpack("1f", a[0:4])[0] == pytest.approx(
-0.09205757826566696, rel=1e-2
0.002983473241329193, rel=1e-2
)


Expand Down
2 changes: 1 addition & 1 deletion vendor/llama.cpp