openvinotoolkit · likholat · Jul 11, 2025 · Jul 21, 2025 · Jul 21, 2025 · Jul 21, 2025
diff --git a/ltx-video.py b/ltx-video.py
@@ -0,0 +1,56 @@
+import argparse
+import torch
+import openvino
+import numpy as np
+from optimum.intel.openvino import OVLTXPipeline  # OVDiffusionPipeline
+from diffusers import LTXPipeline
+from diffusers.utils import export_to_video
+
+
+def generate(pipeline, frame_rate):
+    # prompt = "Will Smith eating spaghetti"
+    prompt = "A woman with long brown hair and light skin smiles at another woman...A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage."
+    negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+    ltx_pipeline_output = pipeline(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=128,
+        width=128,
+        num_frames=65,
+        frame_rate=frame_rate,
+        num_inference_steps=15,
+        generator=torch.Generator(device="cpu").manual_seed(42),
+        guidance_scale=3,
+    )
+    return ltx_pipeline_output.frames[0]
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('model_dir', help='Path to the model directory')
+    args = parser.parse_args()
+    frame_rate = 25
+
+    ov_pipe = OVLTXPipeline.from_pretrained(
+        args.model_dir,
+        device='CPU',
+        load_in_8bit=False,
+        ov_config={openvino.properties.hint.inference_precision: openvino.Type.f32},
+    )
+    ov_video = generate(ov_pipe, frame_rate)
+    print(ov_video)
+    export_to_video(ov_video, "optimum_video.mp4", fps=frame_rate)
+
+    # diffusers_pipeline = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.float32)
+    # diffusers_video = generate(diffusers_pipeline, frame_rate)
+    # export_to_video(diffusers_video, "diffusers_video.mp4", fps=frame_rate)
+
+    # max_diff = np.abs(
+    #     np.stack(ov_video, dtype=np.int16) - np.stack(diffusers_video, dtype=np.int16)
+    # ).max()
+    # print(max_diff)
+    # assert max_diff <= 9
+
+
+if "__main__" == __name__:
+    main()
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
@@ -4,6 +4,7 @@
 if(ENABLE_SAMPLES)
     add_subdirectory(cpp/text_generation)
     add_subdirectory(cpp/image_generation)
+    add_subdirectory(cpp/video_generation)
     add_subdirectory(cpp/speech_generation)
     add_subdirectory(cpp/visual_language_chat)
     add_subdirectory(cpp/whisper_speech_recognition)
@@ -23,6 +24,7 @@ install(FILES
 install(DIRECTORY
             cpp/text_generation
             cpp/image_generation
+            cpp/video_generation
             cpp/speech_generation
             cpp/visual_language_chat
             cpp/whisper_speech_recognition
@@ -32,6 +34,7 @@ install(DIRECTORY
 install(DIRECTORY
             python/text_generation
             python/image_generation
+            python/video_generation
             python/speech_generation
             python/visual_language_chat
             python/whisper_speech_recognition

diff --git a/samples/cpp/image_generation/progress_bar.hpp b/samples/cpp/image_generation/progress_bar.hpp
@@ -4,6 +4,7 @@
 #include <optional>
 
 #include "indicators/progress_bar.hpp"
+#include <openvino/runtime/tensor.hpp>
 
 bool progress_bar(size_t step, size_t num_steps, ov::Tensor& /* latent */) {
     using namespace indicators;
@@ -21,7 +22,7 @@ bool progress_bar(size_t step, size_t num_steps, ov::Tensor& /* latent */) {
     }
 
     std::stringstream stream;
-    stream << "Image generation step " << (step + 1) << " / " << num_steps;
+    stream << "Generation step " << (step + 1) << " / " << num_steps;
 
     bar->set_option(option::PostfixText{stream.str()});
     bar->set_progress((100 * (step + 1)) / num_steps);

diff --git a/samples/cpp/video_generation/CMakeLists.txt b/samples/cpp/video_generation/CMakeLists.txt
@@ -0,0 +1,41 @@
+# Copyright (C) 2023-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED
+    PATHS
+        "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+        ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+    NO_CMAKE_FIND_ROOT_PATH
+)
+
+file(DOWNLOAD https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image_write.h ${CMAKE_BINARY_DIR}/stb_image_write.h
+     EXPECTED_HASH MD5=845b8b43d7d941890a57a477455558ad)
+
+include(FetchContent)
+
+if(POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+endif()
+
+FetchContent_Declare(indicators
+    URL https://github.com/p-ranav/indicators/archive/refs/tags/v2.3.tar.gz
+    URL_HASH SHA256=70da7a693ff7a6a283850ab6d62acf628eea17d386488af8918576d0760aef7b)
+FetchContent_MakeAvailable(indicators)
+
+# create main sample executable
+
+add_executable(text2video text2video.cpp)
+
+target_include_directories(text2video PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/../image_generation/" "${CMAKE_CURRENT_SOURCE_DIR}/../../../src/cpp/src/")
+target_link_libraries(text2video PRIVATE openvino::genai indicators::indicators)
+
+target_sources(text2video PRIVATE imwrite_video.cpp)
+
+set_target_properties(text2video PROPERTIES
+    # Ensure out of box LC_RPATH on macOS with SIP
+    INSTALL_RPATH_USE_LINK_PATH ON)
+
+install(TARGETS text2video
+        RUNTIME DESTINATION samples_bin/
+        COMPONENT samples_bin
+        EXCLUDE_FROM_ALL)