add vibevoice snippet (#1756)

Deep-unlearning · Wauplin · Vaibhavs10 · web-flow · commit 4642b606db1a · 2025-10-06T15:16:14.000+02:00
Waiting for these pr to be merged : https://huggingface.co/aoi-ot/VibeVoice-1.5B/discussions/1 https://huggingface.co/aoi-ot/VibeVoice-7B/discussions/1 https://huggingface.co/aoi-ot/VibeVoice-Large/discussions/7 --------- Co-authored-by: Lucain <lucain@huggingface.co> Co-authored-by: vb <vaibhavs10@gmail.com>
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
@@ -1699,6 +1699,29 @@ image = sana(
 ) `,
 ];
 
+export const vibevoice = (model: ModelData): string[] => [
+	`import torch, soundfile as sf, librosa, numpy as np
+from vibevoice.processor.vibevoice_processor import VibeVoiceProcessor
+from vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference
+
+# Load voice sample (should be 24kHz mono)
+voice, sr = sf.read("path/to/voice_sample.wav")
+if voice.ndim > 1: voice = voice.mean(axis=1)
+if sr != 24000: voice = librosa.resample(voice, sr, 24000)
+
+processor = VibeVoiceProcessor.from_pretrained("${model.id}")
+model = VibeVoiceForConditionalGenerationInference.from_pretrained(
+    "${model.id}", torch_dtype=torch.bfloat16
+).to("cuda").eval()
+model.set_ddpm_inference_steps(5)
+
+inputs = processor(text=["Speaker 0: Hello!\\nSpeaker 1: Hi there!"],
+                   voice_samples=[[voice]], return_tensors="pt")
+audio = model.generate(**inputs, cfg_scale=1.3,
+                       tokenizer=processor.tokenizer).speech_outputs[0]
+sf.write("output.wav", audio.cpu().numpy().squeeze(), 24000)`,
+];
+
 export const videoprism = (model: ModelData): string[] => [
 	`# Install from https://github.com/google-deepmind/videoprism
 import jax
diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts
@@ -1250,6 +1250,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		countDownloads: `path_extension:"pt"`,
 		snippets: snippets.vui,
 	},
+	vibevoice: {
+		prettyLabel: "VibeVoice",
+		repoName: "VibeVoice",
+		repoUrl: "https://github.com/microsoft/VibeVoice",
+		snippets: snippets.vibevoice,
+		filter: false,
+	},
 	"wan2.2": {
 		prettyLabel: "Wan2.2",
 		repoName: "Wan2.2",