From ed6d22d4589eeda8eb3fb973bc66957fda6092a0 Mon Sep 17 00:00:00 2001 From: ngxson Date: Sun, 12 May 2024 11:27:38 +0200 Subject: [PATCH 1/2] gguf: update README --- packages/gguf/README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/packages/gguf/README.md b/packages/gguf/README.md index 260d602983..c322ecab77 100644 --- a/packages/gguf/README.md +++ b/packages/gguf/README.md @@ -18,6 +18,8 @@ npm install @huggingface/gguf ## Usage +### Basic usage + ```ts import { GGMLQuantizationType, gguf } from "@huggingface/gguf"; @@ -56,6 +58,44 @@ console.log(tensorInfos); ``` +### Reading a local file + +```ts +// Reading a local file. (Not supported on browser) +const { metadata, tensorInfos } = await gguf( + './my_model.gguf', + { allowLocalFile: true }, +); +``` + +### Strictly typed + +By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml). + +```ts +const { metadata, tensorInfos } = await gguf(URL_MODEL); + +// Type check for model architecture at runtime +if (metadata["general.architecture"] === "llama") { + + // "llama.attention.head_count" is a valid key for llama architecture + console.log(model["llama.attention.head_count"]); + + // "mamba.ssm.conv_kernel" is an invalid key, because it requires model architecture to be mamba + console.log(model["mamba.ssm.conv_kernel"]); // error +} +``` + +### Disable strictly typed + +Because GGUF format can be used to store tensors, we can technically use it for other usages. For example, storing [control vectors](https://github.com/ggerganov/llama.cpp/pull/5970), [lora weights](https://github.com/ggerganov/llama.cpp/pull/2632),... + +In case you want to use your own GGUF metadata structure, you can disable strict type by casting the parse output to `GGUFParseOutput<{ strict: false }>`: + +```ts +const { metadata, tensorInfos }: GGUFParseOutput<{ strict: false }> = await gguf(URL_LLAMA); +``` + ## Hugging Face Hub The Hub supports all file formats and has built-in features for GGUF format. From a6e27890ab069daa4fd97e1a2f282ac16451ea9c Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 13 May 2024 22:08:43 +0200 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Julien Chaumond Co-authored-by: Mishig --- packages/gguf/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/gguf/README.md b/packages/gguf/README.md index c322ecab77..763b494708 100644 --- a/packages/gguf/README.md +++ b/packages/gguf/README.md @@ -78,7 +78,7 @@ const { metadata, tensorInfos } = await gguf(URL_MODEL); // Type check for model architecture at runtime if (metadata["general.architecture"] === "llama") { - // "llama.attention.head_count" is a valid key for llama architecture + // "llama.attention.head_count" is a valid key for llama architecture, this is typed as a number console.log(model["llama.attention.head_count"]); // "mamba.ssm.conv_kernel" is an invalid key, because it requires model architecture to be mamba @@ -88,9 +88,9 @@ if (metadata["general.architecture"] === "llama") { ### Disable strictly typed -Because GGUF format can be used to store tensors, we can technically use it for other usages. For example, storing [control vectors](https://github.com/ggerganov/llama.cpp/pull/5970), [lora weights](https://github.com/ggerganov/llama.cpp/pull/2632),... +Because GGUF format can be used to store tensors, we can technically use it for other usages. For example, storing [control vectors](https://github.com/ggerganov/llama.cpp/pull/5970), [lora weights](https://github.com/ggerganov/llama.cpp/pull/2632), etc. -In case you want to use your own GGUF metadata structure, you can disable strict type by casting the parse output to `GGUFParseOutput<{ strict: false }>`: +In case you want to use your own GGUF metadata structure, you can disable strict typing by casting the parse output to `GGUFParseOutput<{ strict: false }>`: ```ts const { metadata, tensorInfos }: GGUFParseOutput<{ strict: false }> = await gguf(URL_LLAMA);