Skip to content

Commit c155de2

Browse files
committed
After review
1 parent 0233d1f commit c155de2

File tree

2 files changed

+72
-60
lines changed

2 files changed

+72
-60
lines changed

tools/llm_bench/llm_bench_utils/prompt_utils.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import logging as log
1111
from .model_utils import get_param_from_file
1212
from .parse_json_data import parse_text_json_data
13-
13+
import openvino as ov
1414

1515
def get_text_prompt(args):
1616
text_list = []
@@ -32,13 +32,16 @@ def inner(video_path, decym_frames):
3232
log.info(f"Requested to reduce into {decym_frames} frames")
3333
out_frames = func(video_path, decym_frames)
3434
log.info(f"Final frames number: {len(out_frames)}")
35-
return np.array(out_frames)
35+
log.info(f"First frame shape: {out_frames[0].shape}")
36+
log.info(f"First frame dtype: {out_frames[0].dtype}")
37+
return [ov.Tensor(frame) for frame in out_frames]
38+
# return out_frames
3639
return inner
3740

3841

3942
@print_video_frames_number_and_convert_to_tensor
4043
def make_video_tensor(video_path, decym_frames=None):
41-
supported_files = set([".mp4"])
44+
supported_files = {".mp4"}
4245

4346
assert os.path.exists(video_path), f"no input video file: {video_path}"
4447
assert video_path.suffix.lower() in supported_files, "no supported video file"
@@ -49,21 +52,24 @@ def make_video_tensor(video_path, decym_frames=None):
4952
ret, frame = cap.read()
5053
if not ret:
5154
break
55+
5256
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
5357
pil_image = Image.fromarray(frame_rgb)
58+
np_img_array = np.array(pil_image)
59+
output_frames.append(np_img_array)
5460

55-
shape = np.array(pil_image).shape
56-
dtype = np.array(pil_image).dtype
57-
log.info(f"Video shape: {shape}")
58-
log.info(f"Video dtype: {dtype}")
59-
new_frame = np.zeros(shape, dtype)
61+
shape = np_img_array.shape
62+
dtype = np_img_array.dtype
63+
log.debug(f"Video shape: {shape}")
64+
log.debug(f"Video dtype: {dtype}")
6065

61-
width, height = pil_image.size
62-
log.info(f"Video size: {width}x{height}")
63-
for x in range(0, width):
64-
for y in range(0, height):
65-
new_frame[y, x] = frame_rgb[y, x]
66-
output_frames.append(np.array(pil_image))
66+
# new_frame = np.zeros(shape, dtype=int)
67+
# width, height = pil_image.size
68+
# log.debug(f"Video size: {width}x{height}")
69+
# for x in range(0, width):
70+
# for y in range(0, height):
71+
# new_frame[y, x] = frame_rgb[y, x]
72+
# output_frames.append(new_frame)
6773

6874
if decym_frames is None:
6975
return output_frames

tools/llm_bench/task/visual_language_generation.py

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import llm_bench_utils.output_file
1818
import llm_bench_utils.gen_output_data as gen_output_data
1919
import llm_bench_utils.parse_json_data as parse_json_data
20-
import llm_bench_utils.prompt_utils as pu
20+
import llm_bench_utils.prompt_utils as prompt_utils
2121
from pathlib import Path
2222

2323
FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils}
@@ -33,32 +33,22 @@ def run_visual_language_generation_optimum(
3333
if args['batch_size'] != 1:
3434
log.warning("Only batch size 1 available for benchmarking")
3535
args["batch_size"] = 1
36-
images = []
37-
prompts = []
38-
videos = []
39-
inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
40-
for input_data in inputs:
41-
if input_data.get("video", None):
42-
entry = Path(input_data["video"])
43-
video_tensor = pu.make_video_tensor(entry, required_frames)
44-
videos.append(video_tensor)
45-
elif input_data.get("media", None):
46-
entry = Path(input_data["media"])
47-
if entry.is_dir():
48-
for file in sorted(entry.iterdir()):
49-
images.append(load_image(str(file)))
50-
else:
51-
images.append(load_image(input_data["media"]))
52-
prompts.append(input_data["prompt"])
53-
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
54-
log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
36+
5537
if args["output_dir"] is not None and num == 0:
5638
for bs_index, in_text in enumerate(prompts):
57-
llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
39+
llm_bench_utils.output_file.output_input_text(
40+
in_text, args, model_precision,
41+
prompt_index, bs_index, proc_id)
5842
tok_encode_start = time.perf_counter()
43+
44+
prompts, images, videos = extract_prompt_issues(inputs, required_frames)
45+
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
46+
log.info(f'{prefix}[P{prompt_index}] Input image nums: {len(images)}')
47+
log.info(f'{prefix}[P{prompt_index}] Input video nums: {len(videos)}')
5948
input_data = model.preprocess_inputs(text=prompts[0], image=images[0] if images else None, **processor)
6049
if videos:
6150
input_data["videos"] = videos
51+
6252
tok_encode_end = time.perf_counter()
6353
tok_encode_time = (tok_encode_end - tok_encode_start) * 1000
6454
# Remove `token_type_ids` from inputs
@@ -201,26 +191,12 @@ def run_visual_language_generation_genai(
201191
if args['batch_size'] != 1:
202192
log.warning("Only batch size 1 available for benchmarking")
203193
args["batch_size"] = 1
204-
images = []
205-
prompts = []
206-
videos = []
207-
inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
208-
for input_data in inputs:
209-
if input_data.get("video", None):
210-
entry = Path(input_data["video"])
211-
video_tensor = pu.make_video_tensor(entry, required_frames)
212-
videos.append(video_tensor)
213-
elif input_data.get("media", None):
214-
entry = Path(input_data["media"])
215-
if entry.is_dir():
216-
for file in sorted(entry.iterdir()):
217-
images.append(load_image_genai(str(file)))
218-
else:
219-
images.append(load_image_genai(input_data["media"]))
220-
prompts.append(input_data["prompt"])
194+
221195
if args["output_dir"] is not None and num == 0:
222196
for bs_index, in_text in enumerate(prompts):
223-
llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
197+
llm_bench_utils.output_file.output_input_text(
198+
in_text, args, model_precision,
199+
prompt_index, bs_index, proc_id)
224200
max_rss_mem_consumption = ''
225201
max_sys_mem_consumption = ''
226202
max_rss_mem_increase = ''
@@ -233,13 +209,18 @@ def run_visual_language_generation_genai(
233209
gen_config.num_beams = args["num_beams"]
234210
gen_config.do_sample = False
235211
gen_config.ignore_eos = True
212+
236213
kwargs = {}
214+
prompts, images, videos = extract_prompt_issues(inputs, required_frames)
215+
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
216+
log.info(f'{prefix}[P{prompt_index}] Input image nums: {len(images)}')
217+
log.info(f'{prefix}[P{prompt_index}] Input video nums: {len(videos)}')
218+
237219
if images:
238220
kwargs["images"] = images
239221
if videos:
240222
kwargs["videos"] = videos
241-
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
242-
log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
223+
243224
start = time.perf_counter()
244225
generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs)
245226
end = time.perf_counter()
@@ -354,8 +335,8 @@ def run_visual_language_generation_benchmark(
354335
for idx, input_text in enumerate(image_text_list):
355336
p_idx = prompt_idx_list[idx]
356337
if num == 0:
357-
metrics_print.print_unicode(f'[warm-up][P{p_idx}] Input text: {input_text}',
358-
max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
338+
prefix = f'[warm-up][P{p_idx}] Input text: {input_text}'
339+
metrics_print.print_unicode(prefix, max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
359340
iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
360341
gen_fn(
361342
input_text, num, model, processor, args, iter_data_list, md5_list,
@@ -368,8 +349,8 @@ def run_visual_language_generation_benchmark(
368349
p_idx = prompt_idx_list[idx]
369350
for num in range(num_iters + 1):
370351
if num == 0:
371-
metrics_print.print_unicode(f'[warm-up][P{p_idx}] Input text: {input_text}',
372-
max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
352+
prefix = f'[warm-up][P{p_idx}] Input text: {input_text}'
353+
metrics_print.print_unicode(prefix, max_output=metrics_print.MAX_INPUT_TXT_IN_LOG)
373354
iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
374355
gen_fn(
375356
input_text, num, model, processor, args, iter_data_list, md5_list, prompt_idx_list[idx],
@@ -382,6 +363,31 @@ def run_visual_language_generation_benchmark(
382363
return iter_data_list, pretrain_time, iter_timestamp
383364

384365

366+
def extract_prompt_issues(inputs, required_frames):
367+
prompts, images, videos = [], [], []
368+
if not isinstance(inputs, (list, tuple, set)):
369+
inputs = [inputs]
370+
for input_data in inputs:
371+
if input_data.get("video") is not None:
372+
entry = Path(input_data["video"])
373+
if entry.is_dir():
374+
for filename in sorted(entry.iterdir()):
375+
video_tensor = prompt_utils.make_video_tensor(filename, required_frames)
376+
videos.append(video_tensor)
377+
else:
378+
video_tensor = prompt_utils.make_video_tensor(entry, required_frames)
379+
videos.append(video_tensor)
380+
if input_data.get("media") is not None:
381+
entry = Path(input_data["media"])
382+
if entry.is_dir():
383+
for file in sorted(entry.iterdir()):
384+
images.append(load_image(str(file)))
385+
else:
386+
images.append(load_image(str(entry)))
387+
prompts.append(input_data["prompt"])
388+
return prompts, images, videos
389+
390+
385391
def get_image_text_prompt(args):
386392
vlm_file_list = []
387393
output_data_list, is_json_data = model_utils.get_param_from_file(args, ["media", "prompt"])
@@ -393,7 +399,7 @@ def get_image_text_prompt(args):
393399
if 'video' in vlm_file:
394400
raise ValueError('media and video cannot be specify in a single prompt file')
395401
vlm_file['media'] = model_utils.resolve_media_file_path(vlm_file.get('media'), args['prompt_file'][0])
396-
elif args['prompt_file'] is not None and len(args['prompt_file']) > 0 and 'video' in vlm_file:
402+
if args['prompt_file'] is not None and len(args['prompt_file']) > 0 and 'video' in vlm_file:
397403
vlm_file['video'] = model_utils.resolve_media_file_path(vlm_file.get('video'), args['prompt_file'][0])
398404
vlm_file_list.append(vlm_file)
399405
else:

0 commit comments

Comments
 (0)