diff --git a/tests/python_tests/samples/conftest.py b/tests/python_tests/samples/conftest.py
index b84a6aad80..3aa71fcb3c 100644
--- a/tests/python_tests/samples/conftest.py
+++ b/tests/python_tests/samples/conftest.py
@@ -26,7 +26,7 @@
 # - "name": the model's name or path
 # - "convert_args": a list of arguments for the conversion command
 MODELS: Dict[str, Dict[str, Any]] = {
-    "TinyLlama-1.1B-Chat-v1.0": { 
+    "TinyLlama-1.1B-Chat-v1.0": {
         "name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         "convert_args": ['--weight-format', 'fp16']
     },
@@ -46,7 +46,7 @@
     "SmolLM2-360M": {
         "name": "HuggingFaceTB/SmolLM2-360M",
         "convert_args": ['--trust-remote-code']
-    },  
+    },
     "WhisperTiny": {
         "name": "openai/whisper-tiny",
         "convert_args": ['--trust-remote-code', '--weight-format', 'fp16']
@@ -84,11 +84,11 @@
     "LCM_Dreamshaper_v7-int8-ov": {
         "name": "OpenVINO/LCM_Dreamshaper_v7-int8-ov",
         "convert_args": []
-    },   
+    },
     "llava-1.5-7b-hf": {
         "name": "llava-hf/llava-1.5-7b-hf",
         "convert_args": ['--trust-remote-code', '--weight-format', 'fp16']
-    },    
+    },
     "llava-v1.6-mistral-7b-hf": {
         "name": "llava-hf/llava-v1.6-mistral-7b-hf",
         "convert_args": ['--trust-remote-code', '--weight-format', 'fp16']
@@ -129,6 +129,10 @@
         "name": "katuni4ka/tiny-random-llava",
         "convert_args": ["--trust-remote-code", "--task", "image-text-to-text"]
     },
+    "tiny-random-qwen2vl": {
+        "name": "katuni4ka/tiny-random-qwen2vl",
+        "convert_args": ["--trust-remote-code", "--task", "image-text-to-text"]
+    },
     "bge-small-en-v1.5": {
         "name": "BAAI/bge-small-en-v1.5",
         "convert_args": ["--trust-remote-code"]
@@ -148,6 +152,10 @@
     "tiny-random-SpeechT5ForTextToSpeech": {
         "name": "hf-internal-testing/tiny-random-SpeechT5ForTextToSpeech",
         "convert_args": ["--model-kwargs",  json.dumps({"vocoder": "fxmarty/speecht5-hifigan-tiny"})]
+    },
+    "tiny-random-llava-next-video": {
+        "name": "katuni4ka/tiny-random-llava-next-video",
+        "convert_args": ["--trust-remote-code", "--task", "image-text-to-text"]
     }
 }
 
@@ -164,7 +172,8 @@
     "cat.png": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png",
     "cat": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11",
     "3283_1447_000.tar.gz": "https://huggingface.co/datasets/facebook/multilingual_librispeech/resolve/main/data/mls_polish/train/audio/3283_1447_000.tar.gz",
-    "cmu_us_awb_arctic-wav-arctic_a0001.bin": "https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_awb_arctic-wav-arctic_a0001.bin"
+    "cmu_us_awb_arctic-wav-arctic_a0001.bin": "https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_awb_arctic-wav-arctic_a0001.bin",
+    "video0.mp4": "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4"
 }
 
 SAMPLES_PY_DIR = Path(
@@ -182,23 +191,24 @@
     )
 )
 
+
 @pytest.fixture(scope="session", autouse=True)
 def setup_and_teardown(request, tmp_path_factory):
     """Fixture to set up and tear down the temporary directories."""
-    
-    ov_cache = get_ov_cache_dir(tmp_path_factory.mktemp("ov_cache"))  
+
+    ov_cache = get_ov_cache_dir(tmp_path_factory.mktemp("ov_cache"))
     downloaded_models_dir = get_ov_cache_downloaded_models_dir()
     converted_models_dir = get_ov_cache_converted_models_dir()
     test_data = ov_cache / "test_data"
-    
+
     logger.info(f"Creating directories: {downloaded_models_dir}, {converted_models_dir}, and {test_data}")
     test_data.mkdir(parents=True, exist_ok=True)
-    
+
     request.config.cache.set("OV_CACHE", str(ov_cache))
     request.config.cache.set("TEST_DATA", str(test_data))
-    
+
     yield
-    
+
     if os.environ.get("CLEANUP_CACHE", "false").lower() != "false":
         if os.path.exists(ov_cache):
             logger.info(f"Removing temporary directory: {ov_cache}")
@@ -213,9 +223,9 @@ def download_gguf_model(model: Dict[str, Any], model_path: str) -> None:
     model_name = model["name"]
     model_gguf_filename = model["gguf_filename"]
     dest_dir = Path(model_path)
-    
+
     manager = AtomicDownloadManager(dest_dir)
-    
+
     def download_to_temp(temp_path: Path) -> None:
         command = ["huggingface-cli", "download", model_name, model_gguf_filename, "--local-dir", str(temp_path)]
         logger.info(f"Downloading command: {' '.join(command)}")
@@ -325,26 +335,27 @@ def download_to_temp(temp_path: Path) -> None:
         command = ["huggingface-cli", "download", model_name, "--local-dir", str(temp_path)]
         logger.info(f"Downloading command: {' '.join(command)}")
         retry_request(lambda: subprocess.run(command, check=True, capture_output=True, text=True, env=sub_env))
-    
+
     manager.execute(download_to_temp)
-            
+
     yield str(model_path)
-    
+
     if os.environ.get("CLEANUP_CACHE", "false").lower() == "true":
         if model_cache.exists():
             logger.info(f"Removing downloaded model: {model_cache}")
             shutil.rmtree(model_cache)
 
+
 @pytest.fixture(scope="session")
 def download_test_content(request):
     """Download the test content from the given URL and return the file path or extracted folder."""
-    
+
     test_data = request.config.cache.get("TEST_DATA", None)
-    
+
     file_name = request.param
     file_url = TEST_FILES[file_name]
     file_path = os.path.join(test_data, file_name)
-    
+
     if not os.path.exists(file_path):
         logger.info(f"Downloading test content from {file_url} to {file_path}...")
         os.makedirs(os.path.dirname(file_path), exist_ok=True)
@@ -384,9 +395,9 @@ def download_test_content(request):
 @pytest.fixture(scope="session")
 def generate_test_content(request):
     """Generate an image of lines and return the file path."""
-    
+
     test_data = request.config.cache.get("TEST_DATA", None)
-    
+
     file_name = request.param
     file_path = os.path.join(test_data, file_name)
     if not os.path.exists(file_path):
@@ -412,24 +423,24 @@ def generate_test_content(request):
 @pytest.fixture(scope="session")
 def generate_image_generation_jsonl(request):
     """Generate a JSONL file for image generation prompts."""
-    
+
     test_data = request.config.cache.get("TEST_DATA", None)
     file_name, json_entries = request.param
     file_path = os.path.join(test_data, file_name)
-    
+
     if not os.path.exists(file_path):
         os.makedirs(os.path.dirname(file_path), exist_ok=True)
-        
+
         with open(file_path, "w", encoding="utf-8") as f:
             for entry in json_entries:
                 f.write(json.dumps(entry) + "\n")
-        
+
         logger.info(f"Generated image generation JSONL file at {file_path}")
     else:
         logger.info(f"Image generation JSONL file already exists at {file_path}")
-    
+
     yield file_path
-    
+
     # Cleanup the JSONL file after tests
     if os.environ.get("CLEANUP_CACHE", "false").lower() == "true":
         if os.path.exists(file_path):
diff --git a/tests/python_tests/samples/test_tools_llm_benchmark.py b/tests/python_tests/samples/test_tools_llm_benchmark.py
index 42f16bd413..4bd1c51a13 100644
--- a/tests/python_tests/samples/test_tools_llm_benchmark.py
+++ b/tests/python_tests/samples/test_tools_llm_benchmark.py
@@ -14,35 +14,39 @@
 convert_draft_model = convert_model
 download_mask_image = download_test_content
 
-image_generation_prompt = "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"
+image_generation_prompt = \
+   "side profile centered painted portrait, Gandhi rolling a blunt, "\
+   "Gloomhaven, matte painting concept art, art nouveau, "\
+   "8K HD Resolution, beautifully background"
 image_generation_json = [
     {"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "prompt": image_generation_prompt},
     {"steps": 4, "width": 64, "height": 32, "guidance_scale": 7.0, "prompt": image_generation_prompt}
 ]
-image_generation_inpainting_json = [
-    {"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "overture-creations.png", "mask_image": "overture-creations-mask.png", "prompt": image_generation_prompt},
-]
-image_generation_i2i_prompt = "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
-image_generation_i2i_json = [
-    {"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "cat.png", "prompt": image_generation_i2i_prompt},
-]
+image_generation_inpainting_json = [{
+    "steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8",
+    "media": "overture-creations.png", "prompt": image_generation_prompt,
+    "mask_image": "overture-creations-mask.png"
+}]
+image_generation_i2i_json = [{
+    "steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "cat.png",
+    "prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
+}]
+
+
 
 class TestBenchmarkLLM:
+
     @pytest.mark.samples
-    @pytest.mark.parametrize(
-        "download_model, sample_args",
-        [
-            pytest.param("tiny-dummy-qwen2", ["-d", "cpu", "-n", "1", "-f", "pt", "-ic", "20"]),
-        ],
-        indirect=["download_model"],
-    )
+    @pytest.mark.parametrize("download_model, sample_args", [
+        pytest.param("tiny-dummy-qwen2", ["-d", "cpu", "-n", "1", "-f", "pt", "-ic", "20"]),
+    ], indirect=["download_model"])
     def test_python_tool_llm_benchmark_download_model(self, download_model, sample_args):
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [sys.executable, benchmark_script, "-m" , download_model] + sample_args
         run_sample(benchmark_py_command)
-        
-        
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize(
         "convert_model, sample_args",
@@ -54,31 +58,28 @@ def test_python_tool_llm_benchmark_download_model(self, download_model, sample_a
             pytest.param("tiny-random-llava", [ "-ic", "4", "--optimum", "-pf", SAMPLES_PY_DIR / "llm_bench/prompts/llava-1.5-7b.jsonl"]),
             pytest.param("tiny-random-latent-consistency", [ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "-p", "'an astronaut riding a horse on mars'"]),
             pytest.param("tiny-random-latent-consistency", [ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "-p", "'an astronaut riding a horse on mars'", "--optimum"]),
-        ],
-        indirect=["convert_model"],
-    )
+        ], indirect=["convert_model"])
     def test_python_tool_llm_benchmark_convert_model(self, convert_model, sample_args):
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model] + sample_args
-        run_sample(benchmark_py_command)       
-        
-        
+        run_sample(benchmark_py_command)
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize(
         "convert_model, sample_args",
         [
             pytest.param("tiny-random-llava", [ "-ic", "20", "--prompt", "'What is unusual on this image?'"]),
             pytest.param("tiny-random-llava", [ "-ic", "20", "--optimum", "--prompt", "'What is unusual on this image?'"]),
-        ],
-        indirect=["convert_model"],
-    )
+        ], indirect=["convert_model"])
     @pytest.mark.parametrize("download_test_content", ["cat"], indirect=True)
     def test_python_tool_llm_benchmark_convert_model_media(self, convert_model, download_test_content, sample_args):
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
-        benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "--media", download_test_content] + sample_args
-        run_sample(benchmark_py_command)      
+        benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "--media", download_test_content]
+        benchmark_py_command += sample_args
+        run_sample(benchmark_py_command)
 
 
     @pytest.mark.samples
@@ -102,7 +103,7 @@ def test_python_tool_llm_benchmark_speculative(self, convert_model, convert_draf
 
 
     @pytest.mark.samples
-    @pytest.mark.parametrize("sample_args", 
+    @pytest.mark.parametrize("sample_args",
         [
             ["-d", "cpu", "-n", "1", "--num_steps", "4", "--optimum"],
             ["-d", "cpu", "-n", "1", "--num_steps", "4"],
@@ -117,14 +118,14 @@ def test_python_tool_llm_benchmark_jsonl(self, convert_model, generate_image_gen
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
-            "-pf", generate_image_generation_jsonl, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
+            "-pf", generate_image_generation_jsonl,
         ] + sample_args
         run_sample(benchmark_py_command)
-        
-        
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4"], ["-d", "cpu", "-n", "1", "--num_steps", "4", "--empty_lora"]])
     @pytest.mark.parametrize("convert_model", ["tiny-random-latent-consistency"], indirect=True)
@@ -132,19 +133,19 @@ def test_python_tool_llm_benchmark_jsonl(self, convert_model, generate_image_gen
     @pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation.jsonl", image_generation_json)], indirect=True)
     def test_python_tool_llm_benchmark_jsonl_lora(self, request, convert_model, download_model, generate_image_generation_jsonl, sample_args):
         model_name = request.node.callspec.params['download_model']
-        
+
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "-pf", generate_image_generation_jsonl,
             "--lora", f'{download_model}/{model_name}.safetensors',
         ] + sample_args
         run_sample(benchmark_py_command)
-        
-        
+
+
     @pytest.mark.samples
     @pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4", "--task", "inpainting"]])
     @pytest.mark.parametrize("convert_model", ["tiny-random-latent-consistency"], indirect=True)
@@ -152,16 +153,16 @@ def test_python_tool_llm_benchmark_jsonl_lora(self, request, convert_model, down
     @pytest.mark.parametrize("download_mask_image", ["overture-creations-mask.png"], indirect=True)
     @pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation_inpainting.jsonl", image_generation_inpainting_json)], indirect=True)
     def test_python_tool_llm_benchmark_inpainting(self, convert_model, download_test_content, download_mask_image, generate_image_generation_jsonl, sample_args):
-        
+
         # to use the relative media and mask_image paths
         os.chdir(os.path.dirname(download_test_content))
 
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "-pf", generate_image_generation_jsonl,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -173,31 +174,33 @@ def test_python_tool_llm_benchmark_inpainting(self, convert_model, download_test
     @pytest.mark.parametrize("download_test_content", ["cat.png"], indirect=True)
     @pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation_i2i.jsonl", image_generation_i2i_json)], indirect=True)
     def test_python_tool_llm_benchmark_i2i(self, convert_model, download_test_content, generate_image_generation_jsonl, sample_args):
-        
+
         # to use the relative media and mask_image paths
         os.chdir(os.path.dirname(download_test_content))
 
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "-pf", generate_image_generation_jsonl,
         ] + sample_args
         run_sample(benchmark_py_command)
 
 
     @pytest.mark.samples
-    @pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'"], ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'", "--optimum"]])
+    @pytest.mark.parametrize("sample_args", [
+        ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'"],
+        ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'", "--optimum"]])
     @pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
     @pytest.mark.parametrize("download_test_content", ["cmu_us_awb_arctic-wav-arctic_a0001.bin"], indirect=True)
     def test_python_tool_llm_benchmark_tts(self, convert_model, download_test_content, sample_args):
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
+            sys.executable,
+            benchmark_script,
             "-m", convert_model,
             "--speaker_embeddings", download_test_content
         ] + sample_args
@@ -214,9 +217,9 @@ def test_python_tool_llm_benchmark_optimum(self, convert_model, download_test_co
         # Run Python benchmark
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
             "--media", media_path,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -232,9 +235,9 @@ def test_python_tool_llm_benchmark_optimum(self, convert_model, download_test_co
     def test_python_tool_llm_benchmark_text_embeddings(self, convert_model, sample_args):
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", convert_model, 
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
         ] + sample_args
         run_sample(benchmark_py_command)
 
@@ -248,8 +251,8 @@ def test_python_tool_llm_benchmark_text_embeddings(self, convert_model, sample_a
     def test_python_tool_llm_benchmark_text_embeddings_qwen3(self, convert_model, sample_args):
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
+            sys.executable,
+            benchmark_script,
             "-m", convert_model,
         ] + sample_args
         run_sample(benchmark_py_command)
@@ -283,10 +286,9 @@ def test_python_tool_llm_benchmark_text_reranking_qwen3(self, model_id, sample_a
         model_schema = download_and_convert_model(model_id)
         benchmark_script = SAMPLES_PY_DIR / 'llm_bench/benchmark.py'
         benchmark_py_command = [
-            sys.executable, 
-            benchmark_script, 
-            "-m", 
-            model_schema.models_path,
+            sys.executable,
+            benchmark_script,
+            "-m", model_schema.models_path
         ] + sample_args
         run_sample(benchmark_py_command)
 
@@ -306,3 +308,24 @@ def test_python_tool_llm_benchmark_gguf_format(self, sample_args):
             "-m", gguf_full_path,
         ] + sample_args
         run_sample(benchmark_py_command)
+
+
+
+    @pytest.mark.samples
+    @pytest.mark.parametrize("download_test_content", ["video0.mp4"], indirect=True)
+    @pytest.mark.parametrize("convert_model, sample_args", [
+        pytest.param("tiny-random-llava-next-video", ["-d", "cpu", "-n", "1", "--genai", "-vf", "5"]),
+        pytest.param("tiny-random-llava-next-video", ["-d", "cpu", "-n", "1", "--genai", "-vf", "-3"]),
+        pytest.param("tiny-random-llava-next-video", ["-d", "cpu", "-n", "1", "--optimum", "-vf", "5"]),
+    ], indirect=["convert_model"])
+    def test_python_tool_llm_benchmark_video_prompts(self, download_test_content, convert_model, sample_args):
+        benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
+        benchmark_py_command = [
+            sys.executable,
+            benchmark_script,
+            "-m", convert_model,
+            "--video", download_test_content,
+            "--prompt", "What_is_presented_in_the_video?"
+        ]
+        benchmark_py_command.extend(sample_args)
+        run_sample(benchmark_py_command)
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index e9de552a70..8168ad86cf 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -313,12 +313,12 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo:
     ids=lambda p: f"{p[0]}/{p[1]}",
     indirect=["ov_pipe_model"],
 )
-    
+
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[0])
     return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU")
-    
+
 @pytest.fixture(scope="module")
 def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline:
     models_path = _get_ov_model(MODEL_IDS[8])
@@ -433,7 +433,7 @@ def test_images(request: pytest.FixtureRequest):
 def test_vlm_pipeline(ov_pipe_model: VlmModelInfo, test_images: list[openvino.Tensor]):
     ov_pipe = ov_pipe_model.pipeline
     result_from_streamer = []
-    
+
     def streamer(word: str) -> bool:
         nonlocal result_from_streamer
         result_from_streamer.append(word)
diff --git a/tools/llm_bench/benchmark.py b/tools/llm_bench/benchmark.py
index e584d5fd82..f4f63a137c 100644
--- a/tools/llm_bench/benchmark.py
+++ b/tools/llm_bench/benchmark.py
@@ -78,6 +78,7 @@ def get_argprser():
         'if the value equals 0 (default), execute the warm-up iteration(0th iteration).',
     )
     parser.add_argument('-i', '--images', default=None, help='test images for vision tasks. Can be directory or path to single image')
+    parser.add_argument('-v', '--video', default=None, help='test video for vision tasks. Can be directory or path to single video')
     parser.add_argument('-s', '--seed', type=int, default=42, required=False, help='specific random seed to generate fix result. Default 42.')
     parser.add_argument(
         '-lc',
@@ -229,15 +230,17 @@ def get_argprser():
                         help="Path to .bin or .pt file with speaker embeddings for text to speech scenarios")
     parser.add_argument("--vocoder_path", type=str, default=None,
                         help="Path to vocoder  for text to speech scenarios")
+    parser.add_argument("-vf", "--video_frames", type=int, default=None,
+                        help="controller of video frames to process (required frame number if positive or decimation factor if negative)")
     return parser.parse_args()
 
 
 CASE_TO_BENCH = {
-    'text_gen': bench_text.run_text_generation_benchmark,
-    'image_gen': bench_image.run_image_generation_benchmark,
-    'code_gen': bench_text.run_text_generation_benchmark,
-    'ldm_super_resolution': bench_ldm_sr.run_ldm_super_resolution_benchmark,
-    'speech_to_text': bench_speech.run_speech_2_txt_benchmark,
+    "text_gen": bench_text.run_text_generation_benchmark,
+    "image_gen": bench_image.run_image_generation_benchmark,
+    "code_gen": bench_text.run_text_generation_benchmark,
+    "ldm_super_resolution": bench_ldm_sr.run_ldm_super_resolution_benchmark,
+    "speech_to_text": bench_speech.run_speech_2_txt_benchmark,
     "visual_text_gen": bench_vlm.run_visual_language_generation_benchmark,
     "text_embed": bench_text_embed.run_text_embddings_benchmark,
     "text_to_speech": bench_text_to_speech.run_text_2_speech_benchmark,
@@ -316,6 +319,7 @@ def main():
         else:
             iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
                 model_path, framework, args.device, model_args, args.num_iters, memory_data_collector)
+
         if args.report is not None or args.report_json is not None:
             model_precision = ''
             if framework == 'ov':
diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
index 7a83d525ec..1a9ccaf237 100644
--- a/tools/llm_bench/llm_bench_utils/model_utils.py
+++ b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -43,6 +43,7 @@ def get_param_from_file(args, input_key):
 
             elif args[input_key] is not None and args['prompt_file'] is not None:
                 raise RuntimeError(f'== {input_key} and prompt file should not exist together ==')
+
             else:
                 if args[input_key] is not None:
                     if args[input_key] != '':
@@ -56,11 +57,15 @@ def get_param_from_file(args, input_key):
             if "media" in input_key:
                 if args["media"] is None and args["images"] is None:
                     if args["use_case"].task == "visual_text_gen":
-                        log.warn("Input image is not provided. Only text generation part will be evaluated")
+                        if args["video"] is None:
+                            log.warn("Input image/video is not provided. Only text generation part will be evaluated")
                     elif args["use_case"].task != "image_gen":
                         raise RuntimeError("No input image. ImageToImage/Inpainting Models cannot start generation without one. Please, provide an image.")
                 else:
                     data_dict["media"] = args["media"] if args["media"] is not None else args["images"]
+            if "video" in input_key and args["video"] is not None:
+                data_dict["video"] = args["video"]
+
             if args["prompt"] is None:
                 if args["use_case"].task == "visual_text_gen":
                     data_dict["prompt"] = "What is OpenVINO?" if data_dict.get("media") is None else "Describe image"
@@ -112,6 +117,7 @@ def analyze_args(args):
     model_args["height"] = args.height
     model_args["width"] = args.width
     model_args['images'] = args.images
+    model_args['video'] = args.video
     model_args['seed'] = args.seed
     model_args['mem_consumption'] = args.memory_consumption
     model_args['batch_size'] = args.batch_size
@@ -135,7 +141,7 @@ def analyze_args(args):
     model_args["rerank_texts"] = args.texts
     model_args["rerank_texts_file"] = args.texts_file
     model_args["apply_chat_template"] = args.apply_chat_template
-
+    model_args["video_frames"] = args.video_frames
     optimum = args.optimum
 
     if optimum and args.genai:
diff --git a/tools/llm_bench/llm_bench_utils/parse_json_data.py b/tools/llm_bench/llm_bench_utils/parse_json_data.py
index 4e2a13f841..e93bfe0e45 100644
--- a/tools/llm_bench/llm_bench_utils/parse_json_data.py
+++ b/tools/llm_bench/llm_bench_utils/parse_json_data.py
@@ -2,60 +2,52 @@
 # Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+def create_base_prompt(json_data, key='prompt'):
+    prompt_data = {}
+    if key not in json_data:
+        raise RuntimeError(f"== key word '{key}' does not exist ==")
+    if json_data[key] == "":
+        raise RuntimeError(f"== {key} should not be empty string ==")
+    prompt_data[key] = json_data[key]
+    return prompt_data
+
 
 def parse_text_json_data(json_data_list):
     text_param_list = []
     for json_data in json_data_list:
-        if 'prompt' in json_data:
-            if json_data['prompt'] != '':
-                text_param_list.append(json_data['prompt'])
-            else:
-                raise RuntimeError('== prompt should not be empty string ==')
-        else:
-            raise RuntimeError('== key word "prompt" does not exist ==')
+        prompt_data = create_base_prompt(json_data)
+        text_param_list.append(prompt_data["prompt"])
     return text_param_list
 
 
 def parse_vlm_json_data(json_data_list):
     text_param_list = []
     for json_data in json_data_list:
-        prompt_data = {}
-        if 'prompt' in json_data:
-            if json_data['prompt'] != '':
-                prompt_data["prompt"] = json_data['prompt']
-            else:
-                raise RuntimeError('== prompt should not be empty string ==')
-        else:
-            raise RuntimeError('== key word "prompt" does not exist ==')
+        prompt_data = create_base_prompt(json_data)
         if "media" in json_data:
             prompt_data["media"] = json_data["media"]
+        if "video" in json_data:
+            prompt_data["video"] = json_data["video"]
         text_param_list.append(prompt_data)
     return text_param_list
 
 
 def parse_image_json_data(json_data_list):
     image_param_list = []
-    for data in json_data_list:
-        image_param = {}
-        if 'prompt' in data:
-            if data['prompt'] != '':
-                image_param['prompt'] = data['prompt']
-            else:
-                raise RuntimeError('== prompt should not be empty string ==')
-        else:
-            raise RuntimeError('== key word "prompt" does not exist in prompt file ==')
-        if 'width' in data:
-            image_param['width'] = int(data['width'])
-        if 'height' in data:
-            image_param['height'] = int(data['height'])
-        if 'steps' in data:
-            image_param['steps'] = int(data['steps'])
-        if 'guidance_scale' in data:
-            image_param['guidance_scale'] = float(data['guidance_scale'])
-        if 'media' in data:
-            image_param['media'] = data['media']
-        if 'mask_image' in data:
-            image_param['mask_image'] = data['mask_image']
+    for json_data in json_data_list:
+        image_param = create_base_prompt(json_data)
+        if 'width' in json_data:
+            image_param['width'] = int(json_data['width'])
+        if 'height' in json_data:
+            image_param['height'] = int(json_data['height'])
+        if 'steps' in json_data:
+            image_param['steps'] = int(json_data['steps'])
+        if 'guidance_scale' in json_data:
+            image_param['guidance_scale'] = float(json_data['guidance_scale'])
+        if 'media' in json_data:
+            image_param['media'] = json_data['media']
+        if 'mask_image' in json_data:
+            image_param['mask_image'] = json_data['mask_image']
         image_param_list.append(image_param)
     return image_param_list
 
@@ -63,17 +55,10 @@ def parse_image_json_data(json_data_list):
 def parse_speech_json_data(json_data_list):
     speech_param_list = []
     for json_data in json_data_list:
-        speech_param = {}
-        if 'media' in json_data:
-            if json_data['media'] != '':
-                speech_param['media'] = json_data['media']
-            else:
-                raise RuntimeError('== media path should not be empty string ==')
-        else:
-            raise RuntimeError('== key word "media" does not exist ==')
-        if 'language' in json_data:
-            speech_param['language'] = json_data['language']
-        if 'timestamp' in json_data:
-            speech_param['timestamp'] = json_data['timestamp']
+        speech_param = create_base_prompt(json_data, "media")
+        if "language" in json_data:
+            speech_param["language"] = json_data["language"]
+        if "timestamp" in json_data:
+            speech_param["timestamp"] = json_data["timestamp"]
         speech_param_list.append(speech_param)
     return speech_param_list
diff --git a/tools/llm_bench/llm_bench_utils/prompt_utils.py b/tools/llm_bench/llm_bench_utils/prompt_utils.py
index 5f19cb6d7f..dbff0e87f4 100644
--- a/tools/llm_bench/llm_bench_utils/prompt_utils.py
+++ b/tools/llm_bench/llm_bench_utils/prompt_utils.py
@@ -2,8 +2,20 @@
 # Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+
+import os
+import numpy as np
+from PIL import Image
+import logging as log
+from transformers.image_utils import load_image
 from .model_utils import get_param_from_file
+from .model_utils import resolve_media_file_path
 from .parse_json_data import parse_text_json_data
+from .parse_json_data import parse_vlm_json_data
+from pathlib import Path
+import openvino as ov
+import math
+import cv2
 
 
 def get_text_prompt(args):
@@ -17,3 +29,116 @@ def get_text_prompt(args):
     else:
         text_list.append(output_data_list[0])
     return text_list
+
+
+def print_video_frames_number_and_convert_to_tensor(func):
+    def inner(video_path, decim_frames, genai_flag):
+        log.info(f"Input video file: {video_path}")
+        if decim_frames is not None:
+            log.info(f"Requested to reduce into {decim_frames} frames")
+        out_frames = func(video_path, decim_frames)
+        log.info(f"Final frames number: {len(out_frames)}")
+        log.info(f"First frame shape: {out_frames[0].shape}")
+        log.info(f"First frame dtype: {out_frames[0].dtype}")
+        if genai_flag:
+            return ov.Tensor(out_frames)
+        return np.array(out_frames)
+    return inner
+
+
+@print_video_frames_number_and_convert_to_tensor
+def make_video_tensor(video_path, decim_frames=None):
+    assert os.path.exists(video_path), f"no input video file: {video_path}"
+    cap = cv2.VideoCapture(video_path)
+
+    output_frames = []
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        pil_image = Image.fromarray(frame_rgb)
+
+        np_img_array = np.array(pil_image)
+        log.debug(f"Video shape: {np_img_array.shape}")
+        log.debug(f"Video dtype: {np_img_array.dtype}")
+        output_frames.append(np_img_array)
+
+    if not decim_frames:
+        log.info(f"Video decim: no-set: {decim_frames}: skip")
+        return output_frames
+
+    # decimation procedure
+    # decim_frames is required max frame number if positive
+    # or decimation factor if negative
+    # e.g. if input frames number is 100 and decim_fames = 5:
+    #         then number of processed frames are: 0, 20, 40, 60, 80
+    #      if input frames number is 100 and decim_fames = -5:
+    #         then number of processed frames are: 0, 5, 10, 15, 20, ...
+
+    decim_frames = int(decim_frames)
+    if decim_frames > 0:
+        if len(output_frames) <= decim_frames:
+            log.info(f"Video decim: too short to decim: crop: {decim_frames}")
+            return list(output_frames[:decim_frames])
+        decim_factor_f = float(len(output_frames)) / decim_frames
+        decim_factor = int(math.ceil(decim_factor_f))
+    else:
+        decim_factor = -decim_frames
+    log.info(f"Video decim factor: {decim_factor}")
+    if decim_factor >= 2:
+        return list(output_frames[::decim_factor])
+    log.info("Video decim: too large decim factor: skip")
+    return output_frames
+
+
+def load_image_genai(image_path):
+    pil_image = load_image(image_path)
+    image_data = np.array(pil_image)[None]
+    return ov.Tensor(image_data)
+
+
+def extract_prompt_data(inputs, required_frames, genai_flag):
+    prompts, images, videos = [], [], []
+    if not isinstance(inputs, (list, tuple, set)):
+        inputs = [inputs]
+    for input_data in inputs:
+        if input_data.get("video") is not None:
+            entry = Path(input_data["video"])
+            if entry.is_dir():
+                for filename in sorted(entry.iterdir()):
+                    video_tensor = make_video_tensor(filename, required_frames, genai_flag)
+                    videos.append(video_tensor)
+            else:
+                video_tensor = make_video_tensor(entry, required_frames, genai_flag)
+                videos.append(video_tensor)
+        if input_data.get("media") is not None:
+            func_load_image = load_image_genai if genai_flag else load_image
+            entry = Path(input_data["media"])
+            if entry.is_dir():
+                for file in sorted(entry.iterdir()):
+                    img = func_load_image(str(file))
+                    images.append(img)
+            else:
+                img = func_load_image(input_data["media"])
+                images.append(img)
+        prompts.append(input_data["prompt"])
+    return prompts, images, videos
+
+
+def get_image_text_prompt(args):
+    vlm_file_list = []
+    output_data_list, is_json_data = get_param_from_file(args, ["video", "media", "prompt"])
+    if is_json_data:
+        vlm_param_list = parse_vlm_json_data(output_data_list)
+        if len(vlm_param_list) > 0:
+            for vlm_file in vlm_param_list:
+                if args['prompt_file'] is not None and len(args['prompt_file']) > 0 and 'media' in vlm_file:
+                    vlm_file['media'] = resolve_media_file_path(vlm_file.get('media'), args['prompt_file'][0])
+                if args['prompt_file'] is not None and len(args['prompt_file']) > 0 and 'video' in vlm_file:
+                    vlm_file['video'] = resolve_media_file_path(vlm_file.get('video'), args['prompt_file'][0])
+                vlm_file_list.append(vlm_file)
+    else:
+        vlm_file_list.append(output_data_list)
+    return vlm_file_list
diff --git a/tools/llm_bench/requirements.txt b/tools/llm_bench/requirements.txt
index ca1f523560..2829489eda 100644
--- a/tools/llm_bench/requirements.txt
+++ b/tools/llm_bench/requirements.txt
@@ -8,7 +8,7 @@ pillow
 torch
 transformers[sentencepiece]>=4.40.0
 diffusers>=0.22.0
-#optimum is in dependency list of optimum-intel 
+#optimum is in dependency list of optimum-intel
 optimum-intel[nncf]>=1.25.0
 packaging
 psutil
@@ -21,3 +21,4 @@ scipy
 gguf_parser
 gguf>=0.10
 num2words
+opencv-python
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 4b626bdfaa..5c513b5ad4 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -9,49 +9,47 @@
 import llm_bench_utils.pt_utils
 import llm_bench_utils.model_utils as model_utils
 import numpy as np
-import openvino as ov
 import hashlib
-import llm_bench_utils.metrics_print as metrics_print
 from transformers import set_seed
-from transformers.image_utils import load_image
 import llm_bench_utils.output_file
+import llm_bench_utils.metrics_print as metrics_print
 import llm_bench_utils.gen_output_data as gen_output_data
-import llm_bench_utils.parse_json_data as parse_json_data
-from pathlib import Path
-
+from llm_bench_utils.prompt_utils import extract_prompt_data
+from llm_bench_utils.prompt_utils import get_image_text_prompt
 
-FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils}
 
 DEFAULT_OUTPUT_TOKEN_SIZE = 512
+FW_UTILS = {
+    'pt': llm_bench_utils.pt_utils,
+    'ov': llm_bench_utils.ov_utils
+}
 
 
 def run_visual_language_generation_optimum(
-    inputs, num, model, processor, args, iter_data_list, md5_list, prompt_index, bench_hook, model_precision, proc_id, mem_consumption
-):
+        inputs, num, model, processor, args, iter_data_list, md5_list, prompt_index,
+        bench_hook, model_precision, proc_id, mem_consumption):
     from optimum.intel.utils.import_utils import is_transformers_version
     set_seed(args['seed'])
     if args['batch_size'] != 1:
         log.warning("Only batch size 1 available for benchmarking")
         args["batch_size"] = 1
-    images = []
-    prompts = []
-    inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
-    for input_data in inputs:
-        if input_data.get("media", None):
-            entry = Path(input_data["media"])
-            if entry.is_dir():
-                for file in sorted(entry.iterdir()):
-                    images.append(load_image(str(file)))
-            else:
-                images.append(load_image(input_data["media"]))
-        prompts.append(input_data["prompt"])
-    prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
-    log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
+
+    decim_frames = args["video_frames"]
+    prompts, images, videos = extract_prompt_data(inputs, decim_frames, False)
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):
-            llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
+            llm_bench_utils.output_file.output_input_text(
+                in_text, args, model_precision,
+                prompt_index, bs_index, proc_id)
     tok_encode_start = time.perf_counter()
-    input_data = model.preprocess_inputs(text=prompts[0], image=images[0] if images else None, **processor)
+
+    prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
+    log.info(f'{prefix}[P{prompt_index}] Input image nums: {len(images)}')
+    log.info(f'{prefix}[P{prompt_index}] Input video nums: {len(videos)}')
+    input_data = model.preprocess_inputs(image=images[0] if images else None,
+                                         video=videos[0] if videos else None,
+                                         text=prompts[0], **processor)
+
     tok_encode_end = time.perf_counter()
     tok_encode_time = (tok_encode_end - tok_encode_start) * 1000
     # Remove `token_type_ids` from inputs
@@ -182,33 +180,21 @@ def run_visual_language_generation_optimum(
         bench_hook.clear_mm_embeddins_time_list()
 
 
-def load_image_genai(image_path):
-    pil_image = load_image(image_path)
-    image_data = np.array(pil_image)[None]
-    return ov.Tensor(image_data)
-
-
 def run_visual_language_generation_genai(
-    inputs, num, model, processor, args, iter_data_list, md5_list, prompt_index, streamer, model_precision, proc_id, mem_consumption
-):
+        inputs, num, model, processor, args, iter_data_list, md5_list,
+        prompt_index, streamer, model_precision, proc_id, mem_consumption):
     if args['batch_size'] != 1:
         log.warning("Only batch size 1 available for benchmarking")
         args["batch_size"] = 1
-    images = []
-    prompts = []
-    inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
-    for input_data in inputs:
-        if input_data.get("media", None):
-            entry = Path(input_data["media"])
-            if entry.is_dir():
-                for file in sorted(entry.iterdir()):
-                    images.append(load_image_genai(str(file)))
-            else:
-                images.append(load_image_genai(input_data["media"]))
-        prompts.append(input_data["prompt"])
+
+    decim_frames = args["video_frames"]
+    prompts, images, videos = extract_prompt_data(inputs, decim_frames, True)
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):
-            llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
+            llm_bench_utils.output_file.output_input_text(
+                in_text, args, model_precision,
+                prompt_index, bs_index, proc_id)
+
     max_rss_mem_consumption = ''
     max_sys_mem_consumption = ''
     max_rss_mem_increase = ''
@@ -221,11 +207,17 @@ def run_visual_language_generation_genai(
     gen_config.num_beams = args["num_beams"]
     gen_config.do_sample = False
     gen_config.ignore_eos = True
+
     kwargs = {}
-    if len(images) >= 1:
-        kwargs["images"] = images
     prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
-    log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
+    log.info(f'{prefix}[P{prompt_index}] Input image nums: {len(images)}')
+    log.info(f'{prefix}[P{prompt_index}] Input video nums: {len(videos)}')
+
+    if images:
+        kwargs["images"] = images
+    if videos:
+        kwargs["videos"] = videos
+
     start = time.perf_counter()
     generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs)
     end = time.perf_counter()
@@ -305,7 +297,8 @@ def run_visual_language_generation_genai(
 
 
 def run_visual_language_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
-    model, processor, pretrain_time, bench_hook, use_genai = FW_UTILS[framework].create_image_text_gen_model(model_path, device, mem_consumption, **args)
+    outs = FW_UTILS[framework].create_image_text_gen_model(model_path, device, mem_consumption, **args)
+    model, processor, pretrain_time, bench_hook, use_genai = outs
     model_precision = model_utils.get_model_precision(model_path.parts)
     iter_data_list = []
     md5_list = {num : {} for num in range(num_iters + 1)}
@@ -325,10 +318,10 @@ def run_visual_language_generation_benchmark(model_path, framework, device, args
     log.info(f"Numbeams: {args['num_beams']}, benchmarking iter nums(exclude warm-up): {num_iters}, "
              f'prompt nums: {len(image_text_list)}, prompt idx: {prompt_idx_list}')
 
-    if not use_genai:
-        gen_fn = run_visual_language_generation_optimum
-    else:
+    if use_genai:
         gen_fn = run_visual_language_generation_genai
+    else:
+        gen_fn = run_visual_language_generation_optimum
 
     proc_id = os.getpid()
     iter_timestamp = model_utils.init_timestamp(num_iters, image_text_list, prompt_idx_list)
@@ -337,42 +330,29 @@ def run_visual_language_generation_benchmark(model_path, framework, device, args
             for idx, input_text in enumerate(image_text_list):
                 p_idx = prompt_idx_list[idx]
                 if num == 0:
-                    metrics_print.print_unicode(f'[warm-up][P{p_idx}] Input text: {input_text}', max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
+                    prefix = f'[warm-up][P{p_idx}] Input text: {input_text}'
+                    metrics_print.print_unicode(prefix, max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
                 iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
                 gen_fn(
                     input_text, num, model, processor, args, iter_data_list, md5_list,
                     p_idx, bench_hook, model_precision, proc_id, mem_consumption)
                 iter_timestamp[num][p_idx]['end'] = datetime.datetime.now().isoformat()
-                prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
-                log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
+                prefix = f"[warm-up][P{p_idx}]" if num == 0 else f"[{num}][P{p_idx}]"
+                log.info(f"{prefix} start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
     else:
         for idx, input_text in enumerate(image_text_list):
             p_idx = prompt_idx_list[idx]
             for num in range(num_iters + 1):
                 if num == 0:
-                    metrics_print.print_unicode(f'[warm-up][P{p_idx}] Input text: {input_text}', max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
+                    prefix = f'[warm-up][P{p_idx}] Input text: {input_text}'
+                    metrics_print.print_unicode(prefix, max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
                 iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
                 gen_fn(
                     input_text, num, model, processor, args, iter_data_list, md5_list,
                     prompt_idx_list[idx], bench_hook, model_precision, proc_id, mem_consumption)
                 iter_timestamp[num][p_idx]['end'] = datetime.datetime.now().isoformat()
-                prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
-                log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
+                prefix = f"[warm-up][P{p_idx}]" if num == 0 else f"[{num}][P{p_idx}]"
+                log.info(f"{prefix} start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
 
     metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], True)
     return iter_data_list, pretrain_time, iter_timestamp
-
-
-def get_image_text_prompt(args):
-    vlm_file_list = []
-    output_data_list, is_json_data = model_utils.get_param_from_file(args, ['media', "prompt"])
-    if is_json_data:
-        vlm_param_list = parse_json_data.parse_vlm_json_data(output_data_list)
-        if len(vlm_param_list) > 0:
-            for vlm_file in vlm_param_list:
-                if args['prompt_file'] is not None and len(args['prompt_file']) > 0:
-                    vlm_file['media'] = model_utils.resolve_media_file_path(vlm_file.get("media"), args['prompt_file'][0])
-                vlm_file_list.append(vlm_file)
-    else:
-        vlm_file_list.append(output_data_list)
-    return vlm_file_list