Local apps: Add RyzenAI models to Lemonade (#1729)

danielholanda · krampstudio · web-flow · commit 30d4b2096beb · 2025-09-10T09:28:34.000+02:00
# Description This PR adds models labeled `ryzenai-hybrid` and `ryzenai-npu` to the Lemonade app instructions. This makes Lemonade the first Hugging Face app to offer instructions on how to run AMD NPU-accelerated models. Once this PR is merged, we intend add the proposed tags to all AMD compatible models. Currently, two models are labeled to test this integration: `amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid` and `amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix`. ## Note on implementation @krampstudio during our last PR you made a change called "multi line snippet and format" as shown [here](cafd218). Breaking the lines with `\` as you proposed caused the instructions to not work for Windows users (they had to manually remove those slashes). I reverted this change here. Please let me know if that is ok or if those are absolutely needed for the content to be properly displayed. ## How it looks ### Hybrid model example ``` 1. Pull the model ----------------- Setup: # Download Lemonade from https://lemonade-server.ai/ Content: lemonade-server pull user.Phi-3-mini-4k-instruct-Hybrid --checkpoint amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid --recipe oga-hybrid # Note: If you installed from source, use the lemonade-server-dev command instead. 2. Run and chat with the model (requires RyzenAI 300 series) ------------------------------------------------------------ Content: lemonade-server run user.Phi-3-mini-4k-instruct-Hybrid 3. List all available models ---------------------------- Content: lemonade-server list ``` ### NPU model example ``` 1. Pull the model ----------------- Setup: # Download Lemonade from https://lemonade-server.ai/ Content: lemonade-server pull user.Phi-3-mini-4k-instruct-NPU --checkpoint amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix --recipe oga-npu # Note: If you installed from source, use the lemonade-server-dev command instead. 2. Run and chat with the model (requires RyzenAI 300 series) ------------------------------------------------------------ Content: lemonade-server run user.Phi-3-mini-4k-instruct-NPU 3. List all available models ---------------------------- Content: lemonade-server list ``` ### GGUF model example ``` 1. Pull the model ----------------- Setup: # Download Lemonade from https://lemonade-server.ai/ Content: lemonade-server pull user.gpt-oss-20b-GGUF --checkpoint unsloth/gpt-oss-20b-GGUF:{{QUANT_TAG}} --recipe llamacpp # Note: If you installed from source, use the lemonade-server-dev command instead. 2. Run and chat with the model ------------------------------ Content: lemonade-server run user.gpt-oss-20b-GGUF 3. List all available models ---------------------------- Content: lemonade-server list ``` @Vaibhavs10 Please review :) --------- Co-authored-by: Bertrand CHEVRIER <bertrand@huggingface.co>
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
@@ -90,6 +90,10 @@ function isLlamaCppGgufModel(model: ModelData) {
 	return !!model.gguf?.context_length;
 }
 
+function isAmdRyzenModel(model: ModelData) {
+	return model.tags.includes("ryzenai-hybrid") || model.tags.includes("ryzenai-npu");
+}
+
 function isMlxModel(model: ModelData) {
 	return model.tags.includes("mlx");
 }
@@ -317,21 +321,38 @@ const snippetDockerModelRunner = (model: ModelData, filepath?: string): string =
 
 const snippetLemonade = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
 	const tagName = getQuantTag(filepath);
-	const modelName = model.id.split("/")[1];
+	const modelName = model.id.includes("/") ? model.id.split("/")[1] : model.id;
+
+	// Get recipe according to model type
+	let simplifiedModelName: string;
+	let recipe: string;
+	let checkpoint: string;
+	let requirements: string;
+	if (model.tags.some((tag) => ["ryzenai-npu", "ryzenai-hybrid"].includes(tag))) {
+		recipe = model.tags.includes("ryzenai-npu") ? "oga-npu" : "oga-hybrid";
+		checkpoint = model.id;
+		requirements = " (requires RyzenAI 300 series)";
+		simplifiedModelName = modelName.split("-awq-")[0];
+		simplifiedModelName += recipe === "oga-npu" ? "-NPU" : "-Hybrid";
+	} else {
+		recipe = "llamacpp";
+		checkpoint = `${model.id}${tagName}`;
+		requirements = "";
+		simplifiedModelName = modelName;
+	}
+
 	return [
 		{
 			title: "Pull the model",
 			setup: "# Download Lemonade from https://lemonade-server.ai/",
 			content: [
-				`lemonade-server pull user.${modelName} \\
-	--checkpoint ${model.id}${tagName} \\
-	--recipe llamacpp`,
+				`lemonade-server pull user.${simplifiedModelName} --checkpoint ${checkpoint} --recipe ${recipe}`,
 				"# Note: If you installed from source, use the lemonade-server-dev command instead.",
 			].join("\n"),
 		},
 		{
-			title: "Run and chat with the model",
-			content: `lemonade-server run user.${modelName}`,
+			title: `Run and chat with the model${requirements}`,
+			content: `lemonade-server run user.${simplifiedModelName}`,
 		},
 		{
 			title: "List all available models",
@@ -521,7 +542,7 @@ export const LOCAL_APPS = {
 		prettyLabel: "Lemonade",
 		docsUrl: "https://lemonade-server.ai",
 		mainTask: "text-generation",
-		displayOnModelPage: isLlamaCppGgufModel,
+		displayOnModelPage: (model) => isLlamaCppGgufModel(model) || isAmdRyzenModel(model),
 		snippet: snippetLemonade,
 	},
 } satisfies Record<string, LocalApp>;