Skip to content

Commit 1a65394

Browse files
feat: Add Image to Video Task to fal provider (#1657)
Adding support for Image to Video task for adding new LTX Video and new Wan 2.2 models --------- Co-authored-by: Celina Hanouti <hanouticelina@gmail.com>
1 parent 6b48ebc commit 1a65394

File tree

3 files changed

+81
-1
lines changed

3 files changed

+81
-1
lines changed

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
6868
"image-to-image": new FalAI.FalAIImageToImageTask(),
6969
"automatic-speech-recognition": new FalAI.FalAIAutomaticSpeechRecognitionTask(),
7070
"image-segmentation": new FalAI.FalAIImageSegmentationTask(),
71+
"image-to-video": new FalAI.FalAIImageToVideoTask(),
7172
},
7273
"featherless-ai": {
7374
conversational: new FeatherlessAI.FeatherlessAIConversationalTask(),

packages/inference/src/providers/fal-ai.ts

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
TaskProviderHelper,
2828
type TextToImageTaskHelper,
2929
type TextToVideoTaskHelper,
30+
type ImageToVideoTaskHelper,
3031
} from "./providerHelper.js";
3132
import { HF_HUB_URL } from "../config.js";
3233
import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js";
@@ -35,7 +36,7 @@ import {
3536
InferenceClientProviderApiError,
3637
InferenceClientProviderOutputError,
3738
} from "../errors.js";
38-
import type { ImageToImageArgs } from "../tasks/index.js";
39+
import type { ImageToImageArgs, ImageToVideoArgs } from "../tasks/index.js";
3940
import type { ImageSegmentationArgs } from "../tasks/cv/imageSegmentation.js";
4041

4142
export interface FalAiQueueOutput {
@@ -329,6 +330,73 @@ export class FalAITextToVideoTask extends FalAiQueueTask implements TextToVideoT
329330
}
330331
}
331332

333+
export class FalAIImageToVideoTask extends FalAiQueueTask implements ImageToVideoTaskHelper {
334+
task: InferenceTask;
335+
336+
constructor() {
337+
super("https://queue.fal.run");
338+
this.task = "image-to-video";
339+
}
340+
341+
/** Same queue routing rule as the other Fal queue tasks */
342+
override makeRoute(params: UrlParams): string {
343+
return params.authMethod !== "provider-key" ? `/${params.model}?_subdomain=queue` : `/${params.model}`;
344+
}
345+
346+
/** Synchronous case – caller already gave us base64 or a URL */
347+
override preparePayload(params: BodyParams): Record<string, unknown> {
348+
return {
349+
...omit(params.args, ["inputs", "parameters"]),
350+
...(params.args.parameters as Record<string, unknown>),
351+
// args.inputs is expected to be a base64 data URI or an URL
352+
image_url: params.args.image_url,
353+
};
354+
}
355+
356+
/** Asynchronous helper – caller gave us a Blob */
357+
async preparePayloadAsync(args: ImageToVideoArgs): Promise<RequestArgs> {
358+
const mimeType = args.inputs instanceof Blob ? args.inputs.type : "image/png";
359+
return {
360+
...omit(args, ["inputs", "parameters"]),
361+
image_url: `data:${mimeType};base64,${base64FromBytes(
362+
new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await (args.inputs as Blob).arrayBuffer())
363+
)}`,
364+
...args.parameters,
365+
...args,
366+
};
367+
}
368+
369+
/** Queue polling + final download – mirrors Text‑to‑Video */
370+
override async getResponse(
371+
response: FalAiQueueOutput,
372+
url?: string,
373+
headers?: Record<string, string>
374+
): Promise<Blob> {
375+
const result = await this.getResponseFromQueueApi(response, url, headers);
376+
377+
if (
378+
typeof result === "object" &&
379+
result !== null &&
380+
"video" in result &&
381+
typeof result.video === "object" &&
382+
result.video !== null &&
383+
"url" in result.video &&
384+
typeof result.video.url === "string" &&
385+
"url" in result.video &&
386+
isUrl(result.video.url)
387+
) {
388+
const urlResponse = await fetch(result.video.url);
389+
return await urlResponse.blob();
390+
}
391+
392+
throw new InferenceClientProviderOutputError(
393+
`Received malformed response from Fal.ai image‑to‑video API: expected { video: { url: string } }, got: ${JSON.stringify(
394+
result
395+
)}`
396+
);
397+
}
398+
}
399+
332400
export class FalAIAutomaticSpeechRecognitionTask extends FalAITask implements AutomaticSpeechRecognitionTaskHelper {
333401
override prepareHeaders(params: HeaderParams, binary: boolean): Record<string, string> {
334402
const headers = super.prepareHeaders(params, binary);

packages/inference/test/InferenceClient.spec.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,17 @@ describe.skip("InferenceClient", () => {
11001100
text: " he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca",
11011101
});
11021102
});
1103+
it("imageToVideo - fal-ai", async () => {
1104+
const res = await client.imageToVideo({
1105+
model: "fal-ai/ltxv-13b-098-distilled/image-to-video",
1106+
provider: "fal-ai",
1107+
inputs: new Blob([readTestFile("cats.png")], { type: "image/png" }),
1108+
parameters: {
1109+
prompt: "The cats are jumping around in a playful manner",
1110+
},
1111+
});
1112+
expect(res).toBeInstanceOf(Blob);
1113+
});
11031114
},
11041115
TIMEOUT
11051116
);

0 commit comments

Comments
 (0)