feat: Add Image to Video Task to fal provider (#1657)

Warlord-K · hanouticelina · web-flow · commit 1a65394b7419 · 2025-07-28T17:36:18.000+02:00
Adding support for Image to Video task for adding new LTX Video and new
Wan 2.2 models

---------

Co-authored-by: Celina Hanouti &lt;hanouticelina@gmail.com&gt;
diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts
@@ -68,6 +68,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
 		"image-to-image": new FalAI.FalAIImageToImageTask(),
 		"automatic-speech-recognition": new FalAI.FalAIAutomaticSpeechRecognitionTask(),
 		"image-segmentation": new FalAI.FalAIImageSegmentationTask(),
+		"image-to-video": new FalAI.FalAIImageToVideoTask(),
 	},
 	"featherless-ai": {
 		conversational: new FeatherlessAI.FeatherlessAIConversationalTask(),
diff --git a/packages/inference/src/providers/fal-ai.ts b/packages/inference/src/providers/fal-ai.ts
@@ -27,6 +27,7 @@ import {
 	TaskProviderHelper,
 	type TextToImageTaskHelper,
 	type TextToVideoTaskHelper,
+	type ImageToVideoTaskHelper,
 } from "./providerHelper.js";
 import { HF_HUB_URL } from "../config.js";
 import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js";
@@ -35,7 +36,7 @@ import {
 	InferenceClientProviderApiError,
 	InferenceClientProviderOutputError,
 } from "../errors.js";
-import type { ImageToImageArgs } from "../tasks/index.js";
+import type { ImageToImageArgs, ImageToVideoArgs } from "../tasks/index.js";
 import type { ImageSegmentationArgs } from "../tasks/cv/imageSegmentation.js";
 
 export interface FalAiQueueOutput {
@@ -329,6 +330,73 @@ export class FalAITextToVideoTask extends FalAiQueueTask implements TextToVideoT
 	}
 }
 
+export class FalAIImageToVideoTask extends FalAiQueueTask implements ImageToVideoTaskHelper {
+	task: InferenceTask;
+
+	constructor() {
+		super("https://queue.fal.run");
+		this.task = "image-to-video";
+	}
+
+	/** Same queue routing rule as the other Fal queue tasks */
+	override makeRoute(params: UrlParams): string {
+		return params.authMethod !== "provider-key" ? `/${params.model}?_subdomain=queue` : `/${params.model}`;
+	}
+
+	/** Synchronous case – caller already gave us base64 or a URL */
+	override preparePayload(params: BodyParams): Record<string, unknown> {
+		return {
+			...omit(params.args, ["inputs", "parameters"]),
+			...(params.args.parameters as Record<string, unknown>),
+			// args.inputs is expected to be a base64 data URI or an URL
+			image_url: params.args.image_url,
+		};
+	}
+
+	/** Asynchronous helper – caller gave us a Blob */
+	async preparePayloadAsync(args: ImageToVideoArgs): Promise<RequestArgs> {
+		const mimeType = args.inputs instanceof Blob ? args.inputs.type : "image/png";
+		return {
+			...omit(args, ["inputs", "parameters"]),
+			image_url: `data:${mimeType};base64,${base64FromBytes(
+				new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await (args.inputs as Blob).arrayBuffer())
+			)}`,
+			...args.parameters,
+			...args,
+		};
+	}
+
+	/** Queue polling + final download – mirrors Text‑to‑Video */
+	override async getResponse(
+		response: FalAiQueueOutput,
+		url?: string,
+		headers?: Record<string, string>
+	): Promise<Blob> {
+		const result = await this.getResponseFromQueueApi(response, url, headers);
+
+		if (
+			typeof result === "object" &&
+			result !== null &&
+			"video" in result &&
+			typeof result.video === "object" &&
+			result.video !== null &&
+			"url" in result.video &&
+			typeof result.video.url === "string" &&
+			"url" in result.video &&
+			isUrl(result.video.url)
+		) {
+			const urlResponse = await fetch(result.video.url);
+			return await urlResponse.blob();
+		}
+
+		throw new InferenceClientProviderOutputError(
+			`Received malformed response from Fal.ai image‑to‑video API: expected { video: { url: string } }, got: ${JSON.stringify(
+				result
+			)}`
+		);
+	}
+}
+
 export class FalAIAutomaticSpeechRecognitionTask extends FalAITask implements AutomaticSpeechRecognitionTaskHelper {
 	override prepareHeaders(params: HeaderParams, binary: boolean): Record<string, string> {
 		const headers = super.prepareHeaders(params, binary);
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -1100,6 +1100,17 @@ describe.skip("InferenceClient", () => {
 					text: " he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca",
 				});
 			});
+			it("imageToVideo - fal-ai", async () => {
+				const res = await client.imageToVideo({
+					model: "fal-ai/ltxv-13b-098-distilled/image-to-video",
+					provider: "fal-ai",
+					inputs: new Blob([readTestFile("cats.png")], { type: "image/png" }),
+					parameters: {
+						prompt: "The cats are jumping around in a playful manner",
+					},
+				});
+				expect(res).toBeInstanceOf(Blob);
+			});
 		},
 		TIMEOUT
 	);