+ tests

krzyczar · krzyczar · commit 0281d1b141b6 · 2025-11-18T20:59:44.000+01:00
diff --git a/tests/python_tests/samples/conftest.py b/tests/python_tests/samples/conftest.py
@@ -387,3 +387,23 @@ def run_gc_after_test():
     """
     yield
     gc.collect()
+
+PROMPT_REPO_URL = "https://github.com/intel-innersource/frameworks.ai.openvino.llm.prompts/tree/master"
+
+@pytest.fixture(scope="session")
+def download_test_video():
+    github_raw_url = f"{PROMPT_REPO_URL}/multimodal/video/spinning-earth-480.mp4"
+    response = requests.get(github_raw_url, stream=True)
+    response.raise_for_status()
+
+    temp_dir = tempfile.mkdtemp()
+    video_path = os.path.join(temp_dir, "spinning-earth-480.mp4")
+    with open(video_path, 'wb') as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            f.write(chunk)
+    yield video_path
+
+    if os.path.exists(video_path):
+        os.remove(video_path)
+    os.rmdir(temp_dir)
+
diff --git a/tests/python_tests/samples/test_tools_llm_benchmark.py b/tests/python_tests/samples/test_tools_llm_benchmark.py
@@ -5,44 +5,52 @@
 import pytest
 import sys
 
+import requests
+from pathlib import Path
+import tempfile
+
 from test_utils import run_sample
 from data.models import get_gguf_model_list
 from utils.hugging_face import download_gguf_model
-from conftest import SAMPLES_PY_DIR, convert_model, download_test_content
+from conftest import SAMPLES_PY_DIR, convert_model, download_test_content, download_test_video
 from utils.hugging_face import download_and_convert_embeddings_models, download_and_convert_model
 
 convert_draft_model = convert_model
 download_mask_image = download_test_content
 
-image_generation_prompt = "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"
+image_generation_prompt = \
+   "side profile centered painted portrait, Gandhi rolling a blunt, "\
+   "Gloomhaven, matte painting concept art, art nouveau, "\
+   "8K HD Resolution, beautifully background"
 image_generation_json = [
     {"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "prompt": image_generation_prompt},
     {"steps": 4, "width": 64, "height": 32, "guidance_scale": 7.0, "prompt": image_generation_prompt}
 ]
-image_generation_inpainting_json = [
-    {"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "overture-creations.png", "mask_image": "overture-creations-mask.png", "prompt": image_generation_prompt},
-]
-image_generation_i2i_prompt = "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
-image_generation_i2i_json = [
-    {"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "cat.png", "prompt": image_generation_i2i_prompt},
-]
+image_generation_inpainting_json = [{
+    "steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8",
+    "media": "overture-creations.png", "prompt": image_generation_prompt,
+    "mask_image": "overture-creations-mask.png"
+}]
+image_generation_i2i_json = [{
+    "steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "cat.png",
+    "prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
+}]
+
+
 
 class TestBenchmarkLLM:
+
     @pytest.mark.samples
-    @pytest.mark.parametrize(
-        "download_model, sample_args",
-        [
-            pytest.param("tiny-dummy-qwen2", ["-d", "cpu", "-n", "1", "-f", "pt", "-ic", "20"]),
-        ],
-        indirect=["download_model"],
-    )
+    @pytest.mark.parametrize("download_model, sample_args", [
+        pytest.param("tiny-dummy-qwen2", ["-d", "cpu", "-n", "1", "-f", "pt", "-ic", "20"]),
+    ], indirect=["download_model"])
     def test_python_tool_llm_benchmark_download_model(self, download_model, sample_args):
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [sys.executable, benchmark_script, "-m" , download_model] + sample_args
         run_sample(benchmark_py_command)
-        
-        
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize(
         "convert_model, sample_args",
@@ -54,31 +62,27 @@ def test_python_tool_llm_benchmark_download_model(self, download_model, sample_a
             pytest.param("tiny-random-llava", [ "-ic", "4", "--optimum", "-pf", os.path.join(SAMPLES_PY_DIR, "llm_bench/prompts/llava-1.5-7b.jsonl")]),
             pytest.param("tiny-random-latent-consistency", [ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "-p", "'an astronaut riding a horse on mars'"]),
             pytest.param("tiny-random-latent-consistency", [ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "-p", "'an astronaut riding a horse on mars'", "--optimum"]),
-        ],
-        indirect=["convert_model"],
-    )
+        ], indirect=["convert_model"])
     def test_python_tool_llm_benchmark_convert_model(self, convert_model, sample_args):
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model] + sample_args
-        run_sample(benchmark_py_command)       
-        
-        
+        run_sample(benchmark_py_command)
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize(
         "convert_model, sample_args",
         [
             pytest.param("tiny-random-llava", [ "-ic", "20", "--prompt", "'What is unusual on this image?'"]),
             pytest.param("tiny-random-llava", [ "-ic", "20", "--optimum", "--prompt", "'What is unusual on this image?'"]),
-        ],
-        indirect=["convert_model"],
-    )
+        ], indirect=["convert_model"])
     @pytest.mark.parametrize("download_test_content", ["cat"], indirect=True)
     def test_python_tool_llm_benchmark_convert_model_media(self, convert_model, download_test_content, sample_args):
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "--media", download_test_content] + sample_args
-        run_sample(benchmark_py_command)      
+        run_sample(benchmark_py_command)
 
 
     @pytest.mark.samples
@@ -102,7 +106,7 @@ def test_python_tool_llm_benchmark_speculative(self, convert_model, convert_draf
 
 
     @pytest.mark.samples
-    @pytest.mark.parametrize("sample_args", 
+    @pytest.mark.parametrize("sample_args",
         [
             ["-d", "cpu", "-n", "1", "--num_steps", "4", "--optimum"],
             ["-d", "cpu", "-n", "1", "--num_steps", "4"],
@@ -117,51 +121,51 @@ def test_python_tool_llm_benchmark_jsonl(self, convert_model, generate_image_gen
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
-            "-pf", generate_image_generation_jsonl, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
+            "-pf", generate_image_generation_jsonl,
         ] + sample_args
         run_sample(benchmark_py_command)
-        
-        
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4"], ["-d", "cpu", "-n", "1", "--num_steps", "4", "--empty_lora"]])
     @pytest.mark.parametrize("convert_model", ["tiny-random-latent-consistency"], indirect=True)
     @pytest.mark.parametrize("download_model", ["tiny-random-latent-consistency-lora"], indirect=True)
     @pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation.jsonl", image_generation_json)], indirect=True)
     def test_python_tool_llm_benchmark_jsonl_lora(self, request, convert_model, download_model, generate_image_generation_jsonl, sample_args):
         model_name = request.node.callspec.params['download_model']
-        
+
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "-pf", generate_image_generation_jsonl,
             "--lora", f'{download_model}/{model_name}.safetensors',
         ] + sample_args
         run_sample(benchmark_py_command)
-        
-        
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4", "--task", "inpainting"]])
     @pytest.mark.parametrize("convert_model", ["tiny-random-latent-consistency"], indirect=True)
     @pytest.mark.parametrize("download_test_content", ["overture-creations.png"], indirect=True)
     @pytest.mark.parametrize("download_mask_image", ["overture-creations-mask.png"], indirect=True)
     @pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation_inpainting.jsonl", image_generation_inpainting_json)], indirect=True)
     def test_python_tool_llm_benchmark_inpainting(self, convert_model, download_test_content, download_mask_image, generate_image_generation_jsonl, sample_args):
-        
+
         # to use the relative media and mask_image paths
         os.chdir(os.path.dirname(download_test_content))
 
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "-pf", generate_image_generation_jsonl,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -173,31 +177,33 @@ def test_python_tool_llm_benchmark_inpainting(self, convert_model, download_test
     @pytest.mark.parametrize("download_test_content", ["cat.png"], indirect=True)
     @pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation_i2i.jsonl", image_generation_i2i_json)], indirect=True)
     def test_python_tool_llm_benchmark_i2i(self, convert_model, download_test_content, generate_image_generation_jsonl, sample_args):
-        
+
         # to use the relative media and mask_image paths
         os.chdir(os.path.dirname(download_test_content))
 
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "-pf", generate_image_generation_jsonl,
         ] + sample_args
         run_sample(benchmark_py_command)
 
 
     @pytest.mark.samples
-    @pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'"], ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'", "--optimum"]])
+    @pytest.mark.parametrize("sample_args", [
+        ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'"],
+        ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'", "--optimum"]])
     @pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
     @pytest.mark.parametrize("download_test_content", ["cmu_us_awb_arctic-wav-arctic_a0001.bin"], indirect=True)
     def test_python_tool_llm_benchmark_tts(self, convert_model, download_test_content, sample_args):
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
+            sys.executable,
+            benchmark_script,
             "-m", convert_model,
             "--speaker_embeddings", download_test_content
         ] + sample_args
@@ -214,9 +220,9 @@ def test_python_tool_llm_benchmark_optimum(self, convert_model, download_test_co
         # Run Python benchmark
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "--media", media_path,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -232,9 +238,9 @@ def test_python_tool_llm_benchmark_optimum(self, convert_model, download_test_co
     def test_python_tool_llm_benchmark_text_embeddings(self, convert_model, sample_args):
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
         ] + sample_args
         run_sample(benchmark_py_command)
 
@@ -249,8 +255,8 @@ def test_python_tool_llm_benchmark_text_embeddings_qwen3(self, download_and_conv
         convert_model, hf_tokenizer, models_path = download_and_convert_embeddings_models
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
+            sys.executable,
+            benchmark_script,
             "-m", models_path,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -283,8 +289,8 @@ def test_python_tool_llm_benchmark_text_reranking_qwen3(self, model_id, sample_a
         model, hf_tokenizer, models_path = download_and_convert_model(model_id)
         benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
+            sys.executable,
+            benchmark_script,
             "-m", models_path,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -305,3 +311,21 @@ def test_python_tool_llm_benchmark_gguf_format(self, sample_args):
             "-m", gguf_full_path,
         ] + sample_args
         run_sample(benchmark_py_command)
+
+    @pytest.mark.samples
+    @pytest.mark.parametrize("convert_model, sample_args", [
+        pytest.param("katuni4ka/tiny-random-qwen2vl", ["-d", "cpu", "-n", "1", "--genai", "-vf", "5"]),
+        pytest.param("katuni4ka/tiny-random-qwen2vl", ["-d", "cpu", "-n", "1", "--genai", "-vf", "-150"]),
+        pytest.param("katuni4ka/tiny-random-qwen2vl", ["-d", "cpu", "-n", "1", "--optimum", "-vf", "5"]),
+    ], indirect=["convert_model"])
+    def test_python_tool_llm_benchmark_video_prompts(self, download_test_video, convert_model, sample_args):
+        benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
+        benchmark_py_command = [
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
+            "--video", download_test_video,
+            "--prompt", "What_is_presented_in_the_video?"
+        ]
+        benchmark_py_command.extend(sample_args)
+        run_sample(benchmark_py_command)
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
@@ -304,12 +304,12 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo:
     ids=lambda p: f"{p[0]}/{p[1]}",
     indirect=["ov_pipe_model"],
 )
-    
+
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[0])
     return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU")
-    
+
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[8])
@@ -424,7 +424,7 @@ def test_images(request: pytest.FixtureRequest):
 def test_vlm_pipeline(ov_pipe_model: VlmModelInfo, test_images: list[openvino.Tensor]):
     ov_pipe = ov_pipe_model.pipeline
     result_from_streamer = []
-    
+
     def streamer(word: str) -> bool:
         nonlocal result_from_streamer
         result_from_streamer.append(word)
diff --git a/tools/llm_bench/benchmark.py b/tools/llm_bench/benchmark.py
@@ -231,7 +231,7 @@ def get_argprser():
     parser.add_argument("--vocoder_path", type=str, default=None,
                         help="Path to vocoder  for text to speech scenarios")
     parser.add_argument("-vf", "--video_frames", type=int, default=None,
-                        help="controller of video frames to process (required frame number or decymation factor if negative)")
+                        help="controller of video frames to process (required frame number if positive or decymation factor if negative)")
     return parser.parse_args()
 
 
@@ -316,10 +316,6 @@ def main():
             iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
                 model_path, framework, args.device, args.tokens_len, args.streaming, model_args,
                 args.num_iters, memory_data_collector)
-        elif model_args['use_case'].task == "visual_text_gen":
-            iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
-                model_path, framework, args.device, model_args, args.num_iters,
-                memory_data_collector, decym_frames=args.video_frames)
         else:
             iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
                 model_path, framework, args.device, model_args, args.num_iters, memory_data_collector)
diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -142,7 +142,7 @@ def analyze_args(args):
     model_args["rerank_texts"] = args.texts
     model_args["rerank_texts_file"] = args.texts_file
     model_args["apply_chat_template"] = args.apply_chat_template
-
+    model_args["video_frames"] = args.video_frames
     optimum = args.optimum
 
     if optimum and args.genai:
diff --git a/tools/llm_bench/llm_bench_utils/prompt_utils.py b/tools/llm_bench/llm_bench_utils/prompt_utils.py
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py