Skip to content

Commit 30d4b20

Browse files
Local apps: Add RyzenAI models to Lemonade (#1729)
# Description This PR adds models labeled `ryzenai-hybrid` and `ryzenai-npu` to the Lemonade app instructions. This makes Lemonade the first Hugging Face app to offer instructions on how to run AMD NPU-accelerated models. Once this PR is merged, we intend add the proposed tags to all AMD compatible models. Currently, two models are labeled to test this integration: `amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid` and `amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix`. ## Note on implementation @krampstudio during our last PR you made a change called "multi line snippet and format" as shown [here](cafd218). Breaking the lines with `\` as you proposed caused the instructions to not work for Windows users (they had to manually remove those slashes). I reverted this change here. Please let me know if that is ok or if those are absolutely needed for the content to be properly displayed. ## How it looks ### Hybrid model example ``` 1. Pull the model ----------------- Setup: # Download Lemonade from https://lemonade-server.ai/ Content: lemonade-server pull user.Phi-3-mini-4k-instruct-Hybrid --checkpoint amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid --recipe oga-hybrid # Note: If you installed from source, use the lemonade-server-dev command instead. 2. Run and chat with the model (requires RyzenAI 300 series) ------------------------------------------------------------ Content: lemonade-server run user.Phi-3-mini-4k-instruct-Hybrid 3. List all available models ---------------------------- Content: lemonade-server list ``` ### NPU model example ``` 1. Pull the model ----------------- Setup: # Download Lemonade from https://lemonade-server.ai/ Content: lemonade-server pull user.Phi-3-mini-4k-instruct-NPU --checkpoint amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix --recipe oga-npu # Note: If you installed from source, use the lemonade-server-dev command instead. 2. Run and chat with the model (requires RyzenAI 300 series) ------------------------------------------------------------ Content: lemonade-server run user.Phi-3-mini-4k-instruct-NPU 3. List all available models ---------------------------- Content: lemonade-server list ``` ### GGUF model example ``` 1. Pull the model ----------------- Setup: # Download Lemonade from https://lemonade-server.ai/ Content: lemonade-server pull user.gpt-oss-20b-GGUF --checkpoint unsloth/gpt-oss-20b-GGUF:{{QUANT_TAG}} --recipe llamacpp # Note: If you installed from source, use the lemonade-server-dev command instead. 2. Run and chat with the model ------------------------------ Content: lemonade-server run user.gpt-oss-20b-GGUF 3. List all available models ---------------------------- Content: lemonade-server list ``` @Vaibhavs10 Please review :) --------- Co-authored-by: Bertrand CHEVRIER <bertrand@huggingface.co>
1 parent cacf243 commit 30d4b20

File tree

1 file changed

+28
-7
lines changed

1 file changed

+28
-7
lines changed

packages/tasks/src/local-apps.ts

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ function isLlamaCppGgufModel(model: ModelData) {
9090
return !!model.gguf?.context_length;
9191
}
9292

93+
function isAmdRyzenModel(model: ModelData) {
94+
return model.tags.includes("ryzenai-hybrid") || model.tags.includes("ryzenai-npu");
95+
}
96+
9397
function isMlxModel(model: ModelData) {
9498
return model.tags.includes("mlx");
9599
}
@@ -317,21 +321,38 @@ const snippetDockerModelRunner = (model: ModelData, filepath?: string): string =
317321

318322
const snippetLemonade = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
319323
const tagName = getQuantTag(filepath);
320-
const modelName = model.id.split("/")[1];
324+
const modelName = model.id.includes("/") ? model.id.split("/")[1] : model.id;
325+
326+
// Get recipe according to model type
327+
let simplifiedModelName: string;
328+
let recipe: string;
329+
let checkpoint: string;
330+
let requirements: string;
331+
if (model.tags.some((tag) => ["ryzenai-npu", "ryzenai-hybrid"].includes(tag))) {
332+
recipe = model.tags.includes("ryzenai-npu") ? "oga-npu" : "oga-hybrid";
333+
checkpoint = model.id;
334+
requirements = " (requires RyzenAI 300 series)";
335+
simplifiedModelName = modelName.split("-awq-")[0];
336+
simplifiedModelName += recipe === "oga-npu" ? "-NPU" : "-Hybrid";
337+
} else {
338+
recipe = "llamacpp";
339+
checkpoint = `${model.id}${tagName}`;
340+
requirements = "";
341+
simplifiedModelName = modelName;
342+
}
343+
321344
return [
322345
{
323346
title: "Pull the model",
324347
setup: "# Download Lemonade from https://lemonade-server.ai/",
325348
content: [
326-
`lemonade-server pull user.${modelName} \\
327-
--checkpoint ${model.id}${tagName} \\
328-
--recipe llamacpp`,
349+
`lemonade-server pull user.${simplifiedModelName} --checkpoint ${checkpoint} --recipe ${recipe}`,
329350
"# Note: If you installed from source, use the lemonade-server-dev command instead.",
330351
].join("\n"),
331352
},
332353
{
333-
title: "Run and chat with the model",
334-
content: `lemonade-server run user.${modelName}`,
354+
title: `Run and chat with the model${requirements}`,
355+
content: `lemonade-server run user.${simplifiedModelName}`,
335356
},
336357
{
337358
title: "List all available models",
@@ -521,7 +542,7 @@ export const LOCAL_APPS = {
521542
prettyLabel: "Lemonade",
522543
docsUrl: "https://lemonade-server.ai",
523544
mainTask: "text-generation",
524-
displayOnModelPage: isLlamaCppGgufModel,
545+
displayOnModelPage: (model) => isLlamaCppGgufModel(model) || isAmdRyzenModel(model),
525546
snippet: snippetLemonade,
526547
},
527548
} satisfies Record<string, LocalApp>;

0 commit comments

Comments
 (0)