Skip to content

Commit 0281d1b

Browse files
committed
+ tests
1 parent f9b317c commit 0281d1b

File tree

7 files changed

+128
-82
lines changed

7 files changed

+128
-82
lines changed

tests/python_tests/samples/conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,23 @@ def run_gc_after_test():
387387
"""
388388
yield
389389
gc.collect()
390+
391+
PROMPT_REPO_URL = "https://github.com/intel-innersource/frameworks.ai.openvino.llm.prompts/tree/master"
392+
393+
@pytest.fixture(scope="session")
394+
def download_test_video():
395+
github_raw_url = f"{PROMPT_REPO_URL}/multimodal/video/spinning-earth-480.mp4"
396+
response = requests.get(github_raw_url, stream=True)
397+
response.raise_for_status()
398+
399+
temp_dir = tempfile.mkdtemp()
400+
video_path = os.path.join(temp_dir, "spinning-earth-480.mp4")
401+
with open(video_path, 'wb') as f:
402+
for chunk in response.iter_content(chunk_size=8192):
403+
f.write(chunk)
404+
yield video_path
405+
406+
if os.path.exists(video_path):
407+
os.remove(video_path)
408+
os.rmdir(temp_dir)
409+

tests/python_tests/samples/test_tools_llm_benchmark.py

Lines changed: 86 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -5,44 +5,52 @@
55
import pytest
66
import sys
77

8+
import requests
9+
from pathlib import Path
10+
import tempfile
11+
812
from test_utils import run_sample
913
from data.models import get_gguf_model_list
1014
from utils.hugging_face import download_gguf_model
11-
from conftest import SAMPLES_PY_DIR, convert_model, download_test_content
15+
from conftest import SAMPLES_PY_DIR, convert_model, download_test_content, download_test_video
1216
from utils.hugging_face import download_and_convert_embeddings_models, download_and_convert_model
1317

1418
convert_draft_model = convert_model
1519
download_mask_image = download_test_content
1620

17-
image_generation_prompt = "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"
21+
image_generation_prompt = \
22+
"side profile centered painted portrait, Gandhi rolling a blunt, "\
23+
"Gloomhaven, matte painting concept art, art nouveau, "\
24+
"8K HD Resolution, beautifully background"
1825
image_generation_json = [
1926
{"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "prompt": image_generation_prompt},
2027
{"steps": 4, "width": 64, "height": 32, "guidance_scale": 7.0, "prompt": image_generation_prompt}
2128
]
22-
image_generation_inpainting_json = [
23-
{"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "overture-creations.png", "mask_image": "overture-creations-mask.png", "prompt": image_generation_prompt},
24-
]
25-
image_generation_i2i_prompt = "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
26-
image_generation_i2i_json = [
27-
{"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "cat.png", "prompt": image_generation_i2i_prompt},
28-
]
29+
image_generation_inpainting_json = [{
30+
"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8",
31+
"media": "overture-creations.png", "prompt": image_generation_prompt,
32+
"mask_image": "overture-creations-mask.png"
33+
}]
34+
image_generation_i2i_json = [{
35+
"steps": 30, "width": 64, "height": 128, "guidance_scale": 1.0, "strength": "0.8", "media": "cat.png",
36+
"prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
37+
}]
38+
39+
2940

3041
class TestBenchmarkLLM:
42+
3143
@pytest.mark.samples
32-
@pytest.mark.parametrize(
33-
"download_model, sample_args",
34-
[
35-
pytest.param("tiny-dummy-qwen2", ["-d", "cpu", "-n", "1", "-f", "pt", "-ic", "20"]),
36-
],
37-
indirect=["download_model"],
38-
)
44+
@pytest.mark.parametrize("download_model, sample_args", [
45+
pytest.param("tiny-dummy-qwen2", ["-d", "cpu", "-n", "1", "-f", "pt", "-ic", "20"]),
46+
], indirect=["download_model"])
3947
def test_python_tool_llm_benchmark_download_model(self, download_model, sample_args):
4048
# Run Python benchmark
4149
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
4250
benchmark_py_command = [sys.executable, benchmark_script, "-m" , download_model] + sample_args
4351
run_sample(benchmark_py_command)
44-
45-
52+
53+
4654
@pytest.mark.samples
4755
@pytest.mark.parametrize(
4856
"convert_model, sample_args",
@@ -54,31 +62,27 @@ def test_python_tool_llm_benchmark_download_model(self, download_model, sample_a
5462
pytest.param("tiny-random-llava", [ "-ic", "4", "--optimum", "-pf", os.path.join(SAMPLES_PY_DIR, "llm_bench/prompts/llava-1.5-7b.jsonl")]),
5563
pytest.param("tiny-random-latent-consistency", [ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "-p", "'an astronaut riding a horse on mars'"]),
5664
pytest.param("tiny-random-latent-consistency", [ "-d", "cpu", "-n", "1", "--num_steps", "4", "--static_reshape", "-p", "'an astronaut riding a horse on mars'", "--optimum"]),
57-
],
58-
indirect=["convert_model"],
59-
)
65+
], indirect=["convert_model"])
6066
def test_python_tool_llm_benchmark_convert_model(self, convert_model, sample_args):
6167
# Run Python benchmark
6268
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
6369
benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model] + sample_args
64-
run_sample(benchmark_py_command)
65-
66-
70+
run_sample(benchmark_py_command)
71+
72+
6773
@pytest.mark.samples
6874
@pytest.mark.parametrize(
6975
"convert_model, sample_args",
7076
[
7177
pytest.param("tiny-random-llava", [ "-ic", "20", "--prompt", "'What is unusual on this image?'"]),
7278
pytest.param("tiny-random-llava", [ "-ic", "20", "--optimum", "--prompt", "'What is unusual on this image?'"]),
73-
],
74-
indirect=["convert_model"],
75-
)
79+
], indirect=["convert_model"])
7680
@pytest.mark.parametrize("download_test_content", ["cat"], indirect=True)
7781
def test_python_tool_llm_benchmark_convert_model_media(self, convert_model, download_test_content, sample_args):
7882
# Run Python benchmark
7983
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
8084
benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "--media", download_test_content] + sample_args
81-
run_sample(benchmark_py_command)
85+
run_sample(benchmark_py_command)
8286

8387

8488
@pytest.mark.samples
@@ -102,7 +106,7 @@ def test_python_tool_llm_benchmark_speculative(self, convert_model, convert_draf
102106

103107

104108
@pytest.mark.samples
105-
@pytest.mark.parametrize("sample_args",
109+
@pytest.mark.parametrize("sample_args",
106110
[
107111
["-d", "cpu", "-n", "1", "--num_steps", "4", "--optimum"],
108112
["-d", "cpu", "-n", "1", "--num_steps", "4"],
@@ -117,51 +121,51 @@ def test_python_tool_llm_benchmark_jsonl(self, convert_model, generate_image_gen
117121
# Run Python benchmark
118122
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
119123
benchmark_py_command = [
120-
sys.executable,
121-
benchmark_script,
122-
"-m", convert_model,
123-
"-pf", generate_image_generation_jsonl,
124+
sys.executable,
125+
benchmark_script,
126+
"-m", convert_model,
127+
"-pf", generate_image_generation_jsonl,
124128
] + sample_args
125129
run_sample(benchmark_py_command)
126-
127-
130+
131+
128132
@pytest.mark.samples
129133
@pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4"], ["-d", "cpu", "-n", "1", "--num_steps", "4", "--empty_lora"]])
130134
@pytest.mark.parametrize("convert_model", ["tiny-random-latent-consistency"], indirect=True)
131135
@pytest.mark.parametrize("download_model", ["tiny-random-latent-consistency-lora"], indirect=True)
132136
@pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation.jsonl", image_generation_json)], indirect=True)
133137
def test_python_tool_llm_benchmark_jsonl_lora(self, request, convert_model, download_model, generate_image_generation_jsonl, sample_args):
134138
model_name = request.node.callspec.params['download_model']
135-
139+
136140
# Run Python benchmark
137141
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
138142
benchmark_py_command = [
139-
sys.executable,
140-
benchmark_script,
141-
"-m", convert_model,
143+
sys.executable,
144+
benchmark_script,
145+
"-m", convert_model,
142146
"-pf", generate_image_generation_jsonl,
143147
"--lora", f'{download_model}/{model_name}.safetensors',
144148
] + sample_args
145149
run_sample(benchmark_py_command)
146-
147-
150+
151+
148152
@pytest.mark.samples
149153
@pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "--num_steps", "4", "--task", "inpainting"]])
150154
@pytest.mark.parametrize("convert_model", ["tiny-random-latent-consistency"], indirect=True)
151155
@pytest.mark.parametrize("download_test_content", ["overture-creations.png"], indirect=True)
152156
@pytest.mark.parametrize("download_mask_image", ["overture-creations-mask.png"], indirect=True)
153157
@pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation_inpainting.jsonl", image_generation_inpainting_json)], indirect=True)
154158
def test_python_tool_llm_benchmark_inpainting(self, convert_model, download_test_content, download_mask_image, generate_image_generation_jsonl, sample_args):
155-
159+
156160
# to use the relative media and mask_image paths
157161
os.chdir(os.path.dirname(download_test_content))
158162

159163
# Run Python benchmark
160164
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
161165
benchmark_py_command = [
162-
sys.executable,
163-
benchmark_script,
164-
"-m", convert_model,
166+
sys.executable,
167+
benchmark_script,
168+
"-m", convert_model,
165169
"-pf", generate_image_generation_jsonl,
166170
] + sample_args
167171
run_sample(benchmark_py_command)
@@ -173,31 +177,33 @@ def test_python_tool_llm_benchmark_inpainting(self, convert_model, download_test
173177
@pytest.mark.parametrize("download_test_content", ["cat.png"], indirect=True)
174178
@pytest.mark.parametrize("generate_image_generation_jsonl", [("image_generation_i2i.jsonl", image_generation_i2i_json)], indirect=True)
175179
def test_python_tool_llm_benchmark_i2i(self, convert_model, download_test_content, generate_image_generation_jsonl, sample_args):
176-
180+
177181
# to use the relative media and mask_image paths
178182
os.chdir(os.path.dirname(download_test_content))
179183

180184
# Run Python benchmark
181185
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
182186
benchmark_py_command = [
183-
sys.executable,
184-
benchmark_script,
185-
"-m", convert_model,
187+
sys.executable,
188+
benchmark_script,
189+
"-m", convert_model,
186190
"-pf", generate_image_generation_jsonl,
187191
] + sample_args
188192
run_sample(benchmark_py_command)
189193

190194

191195
@pytest.mark.samples
192-
@pytest.mark.parametrize("sample_args", [["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'"], ["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'", "--optimum"]])
196+
@pytest.mark.parametrize("sample_args", [
197+
["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'"],
198+
["-d", "cpu", "-n", "1", "-p", "'Why is the Sun yellow?'", "--optimum"]])
193199
@pytest.mark.parametrize("convert_model", ["tiny-random-SpeechT5ForTextToSpeech"], indirect=True)
194200
@pytest.mark.parametrize("download_test_content", ["cmu_us_awb_arctic-wav-arctic_a0001.bin"], indirect=True)
195201
def test_python_tool_llm_benchmark_tts(self, convert_model, download_test_content, sample_args):
196202
# Run Python benchmark
197203
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
198204
benchmark_py_command = [
199-
sys.executable,
200-
benchmark_script,
205+
sys.executable,
206+
benchmark_script,
201207
"-m", convert_model,
202208
"--speaker_embeddings", download_test_content
203209
] + sample_args
@@ -214,9 +220,9 @@ def test_python_tool_llm_benchmark_optimum(self, convert_model, download_test_co
214220
# Run Python benchmark
215221
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
216222
benchmark_py_command = [
217-
sys.executable,
218-
benchmark_script,
219-
"-m", convert_model,
223+
sys.executable,
224+
benchmark_script,
225+
"-m", convert_model,
220226
"--media", media_path,
221227
] + sample_args
222228
run_sample(benchmark_py_command)
@@ -232,9 +238,9 @@ def test_python_tool_llm_benchmark_optimum(self, convert_model, download_test_co
232238
def test_python_tool_llm_benchmark_text_embeddings(self, convert_model, sample_args):
233239
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
234240
benchmark_py_command = [
235-
sys.executable,
236-
benchmark_script,
237-
"-m", convert_model,
241+
sys.executable,
242+
benchmark_script,
243+
"-m", convert_model,
238244
] + sample_args
239245
run_sample(benchmark_py_command)
240246

@@ -249,8 +255,8 @@ def test_python_tool_llm_benchmark_text_embeddings_qwen3(self, download_and_conv
249255
convert_model, hf_tokenizer, models_path = download_and_convert_embeddings_models
250256
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
251257
benchmark_py_command = [
252-
sys.executable,
253-
benchmark_script,
258+
sys.executable,
259+
benchmark_script,
254260
"-m", models_path,
255261
] + sample_args
256262
run_sample(benchmark_py_command)
@@ -283,8 +289,8 @@ def test_python_tool_llm_benchmark_text_reranking_qwen3(self, model_id, sample_a
283289
model, hf_tokenizer, models_path = download_and_convert_model(model_id)
284290
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
285291
benchmark_py_command = [
286-
sys.executable,
287-
benchmark_script,
292+
sys.executable,
293+
benchmark_script,
288294
"-m", models_path,
289295
] + sample_args
290296
run_sample(benchmark_py_command)
@@ -305,3 +311,21 @@ def test_python_tool_llm_benchmark_gguf_format(self, sample_args):
305311
"-m", gguf_full_path,
306312
] + sample_args
307313
run_sample(benchmark_py_command)
314+
315+
@pytest.mark.samples
316+
@pytest.mark.parametrize("convert_model, sample_args", [
317+
pytest.param("katuni4ka/tiny-random-qwen2vl", ["-d", "cpu", "-n", "1", "--genai", "-vf", "5"]),
318+
pytest.param("katuni4ka/tiny-random-qwen2vl", ["-d", "cpu", "-n", "1", "--genai", "-vf", "-150"]),
319+
pytest.param("katuni4ka/tiny-random-qwen2vl", ["-d", "cpu", "-n", "1", "--optimum", "-vf", "5"]),
320+
], indirect=["convert_model"])
321+
def test_python_tool_llm_benchmark_video_prompts(self, download_test_video, convert_model, sample_args):
322+
benchmark_script = os.path.join(SAMPLES_PY_DIR, 'llm_bench/benchmark.py')
323+
benchmark_py_command = [
324+
sys.executable,
325+
benchmark_script,
326+
"-m", convert_model,
327+
"--video", download_test_video,
328+
"--prompt", "What_is_presented_in_the_video?"
329+
]
330+
benchmark_py_command.extend(sample_args)
331+
run_sample(benchmark_py_command)

tests/python_tests/test_vlm_pipeline.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,12 @@ def ov_pipe_model(request: pytest.FixtureRequest) -> VlmModelInfo:
304304
ids=lambda p: f"{p[0]}/{p[1]}",
305305
indirect=["ov_pipe_model"],
306306
)
307-
307+
308308
@pytest.fixture(scope="module")
309309
def ov_continious_batching_pipe() -> ContinuousBatchingPipeline:
310310
models_path = _get_ov_model(MODEL_IDS[0])
311311
return ContinuousBatchingPipeline(models_path, SchedulerConfig(), "CPU")
312-
312+
313313
@pytest.fixture(scope="module")
314314
def ov_continious_batching_pipe_gemma() -> ContinuousBatchingPipeline:
315315
models_path = _get_ov_model(MODEL_IDS[8])
@@ -424,7 +424,7 @@ def test_images(request: pytest.FixtureRequest):
424424
def test_vlm_pipeline(ov_pipe_model: VlmModelInfo, test_images: list[openvino.Tensor]):
425425
ov_pipe = ov_pipe_model.pipeline
426426
result_from_streamer = []
427-
427+
428428
def streamer(word: str) -> bool:
429429
nonlocal result_from_streamer
430430
result_from_streamer.append(word)

tools/llm_bench/benchmark.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def get_argprser():
231231
parser.add_argument("--vocoder_path", type=str, default=None,
232232
help="Path to vocoder for text to speech scenarios")
233233
parser.add_argument("-vf", "--video_frames", type=int, default=None,
234-
help="controller of video frames to process (required frame number or decymation factor if negative)")
234+
help="controller of video frames to process (required frame number if positive or decymation factor if negative)")
235235
return parser.parse_args()
236236

237237

@@ -316,10 +316,6 @@ def main():
316316
iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
317317
model_path, framework, args.device, args.tokens_len, args.streaming, model_args,
318318
args.num_iters, memory_data_collector)
319-
elif model_args['use_case'].task == "visual_text_gen":
320-
iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
321-
model_path, framework, args.device, model_args, args.num_iters,
322-
memory_data_collector, decym_frames=args.video_frames)
323319
else:
324320
iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
325321
model_path, framework, args.device, model_args, args.num_iters, memory_data_collector)

tools/llm_bench/llm_bench_utils/model_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def analyze_args(args):
142142
model_args["rerank_texts"] = args.texts
143143
model_args["rerank_texts_file"] = args.texts_file
144144
model_args["apply_chat_template"] = args.apply_chat_template
145-
145+
model_args["video_frames"] = args.video_frames
146146
optimum = args.optimum
147147

148148
if optimum and args.genai:

0 commit comments

Comments
 (0)