Skip to content

Commit 23248ee

Browse files
committed
After review
1 parent 3f0109c commit 23248ee

File tree

5 files changed

+161
-114
lines changed

5 files changed

+161
-114
lines changed

tools/llm_bench/benchmark.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def get_argprser():
7878
'if the value equals 0 (default), execute the warm-up iteration(0th iteration).',
7979
)
8080
parser.add_argument('-i', '--images', default=None, help='test images for vision tasks. Can be directory or path to single image')
81+
parser.add_argument('-vp', '--videos', default=None, help='test videos for vision tasks. Can be directory or path to single video')
8182
parser.add_argument('-s', '--seed', type=int, default=42, required=False, help='specific random seed to generate fix result. Default 42.')
8283
parser.add_argument(
8384
'-lc',
@@ -230,16 +231,16 @@ def get_argprser():
230231
parser.add_argument("--vocoder_path", type=str, default=None,
231232
help="Path to vocoder for text to speech scenarios")
232233
parser.add_argument("-vf", "--video_frames", type=int, default=None,
233-
help="controler of video frames to process")
234+
help="controller of video frames to process (required frame number or decymation factor if negative)")
234235
return parser.parse_args()
235236

236237

237238
CASE_TO_BENCH = {
238-
'text_gen': bench_text.run_text_generation_benchmark,
239-
'image_gen': bench_image.run_image_generation_benchmark,
240-
'code_gen': bench_text.run_text_generation_benchmark,
241-
'ldm_super_resolution': bench_ldm_sr.run_ldm_super_resolution_benchmark,
242-
'speech_to_text': bench_speech.run_speech_2_txt_benchmark,
239+
"text_gen": bench_text.run_text_generation_benchmark,
240+
"image_gen": bench_image.run_image_generation_benchmark,
241+
"code_gen": bench_text.run_text_generation_benchmark,
242+
"ldm_super_resolution": bench_ldm_sr.run_ldm_super_resolution_benchmark,
243+
"speech_to_text": bench_speech.run_speech_2_txt_benchmark,
243244
"visual_text_gen": bench_vlm.run_visual_language_generation_benchmark,
244245
"text_embed": bench_text_embed.run_text_embddings_benchmark,
245246
"text_to_speech": bench_text_to_speech.run_text_2_speech_benchmark,
@@ -315,10 +316,14 @@ def main():
315316
iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
316317
model_path, framework, args.device, args.tokens_len, args.streaming, model_args,
317318
args.num_iters, memory_data_collector)
318-
else:
319+
elif model_args['use_case'].task == "visual_text_gen":
319320
iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
320321
model_path, framework, args.device, model_args, args.num_iters,
321-
memory_data_collector, args.video_frames)
322+
memory_data_collector, decym_frames=args.video_frames)
323+
else:
324+
iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case'].task](
325+
model_path, framework, args.device, model_args, args.num_iters, memory_data_collector)
326+
322327
if args.report is not None or args.report_json is not None:
323328
model_precision = ''
324329
if framework == 'ov':

tools/llm_bench/llm_bench_utils/model_utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def get_param_from_file(args, input_key):
4343

4444
elif args[input_key] is not None and args['prompt_file'] is not None:
4545
raise RuntimeError(f'== {input_key} and prompt file should not exist together ==')
46+
4647
else:
4748
if args[input_key] is not None:
4849
if args[input_key] != '':
@@ -56,11 +57,16 @@ def get_param_from_file(args, input_key):
5657
if "media" in input_key:
5758
if args["media"] is None and args["images"] is None:
5859
if args["use_case"].task == "visual_text_gen":
59-
log.warn("Input image is not provided. Only text generation part will be evaluated")
60+
if args["videos"] is None:
61+
log.warn("Input image/video is not provided. Only text generation part will be evaluated")
6062
elif args["use_case"].task != "image_gen":
6163
raise RuntimeError("No input image. ImageToImage/Inpainting Models cannot start generation without one. Please, provide an image.")
6264
else:
6365
data_dict["media"] = args["media"] if args["media"] is not None else args["images"]
66+
if "video" in input_key:
67+
if args["videos"] is not None:
68+
data_dict["video"] = args["videos"]
69+
6470
if args["prompt"] is None:
6571
if args["use_case"].task == "visual_text_gen":
6672
data_dict["prompt"] = "What is OpenVINO?" if data_dict.get("media") is None else "Describe image"
@@ -112,6 +118,7 @@ def analyze_args(args):
112118
model_args["height"] = args.height
113119
model_args["width"] = args.width
114120
model_args['images'] = args.images
121+
model_args['videos'] = args.videos
115122
model_args['seed'] = args.seed
116123
model_args['mem_consumption'] = args.memory_consumption
117124
model_args['batch_size'] = args.batch_size

tools/llm_bench/llm_bench_utils/parse_json_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def parse_vlm_json_data(json_data_list):
2727
for json_data in json_data_list:
2828
prompt_data = create_base_prompt(json_data)
2929
if ("media" in json_data) and ("video" in json_data):
30-
raise ValueError("only one key is avaialble from media & video")
30+
raise ValueError("only one key is available from media & video")
3131
if "media" in json_data:
3232
prompt_data["media"] = json_data["media"]
3333
if "video" in json_data:

tools/llm_bench/llm_bench_utils/prompt_utils.py

Lines changed: 98 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@
88
import numpy as np
99
from PIL import Image
1010
import logging as log
11+
from transformers.image_utils import load_image
1112
from .model_utils import get_param_from_file
13+
from .model_utils import resolve_media_file_path
1214
from .parse_json_data import parse_text_json_data
15+
from .parse_json_data import parse_vlm_json_data
16+
from pathlib import Path
17+
import openvino as ov
1318

1419

1520
def get_text_prompt(args):
@@ -26,19 +31,35 @@ def get_text_prompt(args):
2631

2732

2833
def print_video_frames_number_and_convert_to_tensor(func):
29-
def inner(video_path, decym_frames):
34+
def inner(video_path, decym_frames, genai_flag):
3035
log.info(f"Input video file: {video_path}")
3136
if decym_frames is not None:
3237
log.info(f"Requested to reduce into {decym_frames} frames")
3338
out_frames = func(video_path, decym_frames)
3439
log.info(f"Final frames number: {len(out_frames)}")
40+
log.info(f"First frame shape: {out_frames[0].shape}")
41+
log.info(f"First frame dtype: {out_frames[0].dtype}")
42+
if genai_flag:
43+
return [ov.Tensor(frame) for frame in out_frames]
3544
return np.array(out_frames)
3645
return inner
3746

3847

3948
@print_video_frames_number_and_convert_to_tensor
4049
def make_video_tensor(video_path, decym_frames=None):
41-
supported_files = set([".mp4"])
50+
supported_files = {
51+
'.mp4', # MPEG-4 (most common)
52+
'.avi', # Audio Video Interleave
53+
'.mov', # QuickTime Movie
54+
'.mkv', # Matroska Video
55+
'.wmv', # Windows Media Video
56+
'.flv', # Flash Video
57+
'.webm', # WebM
58+
'.m4v', # iTunes Video
59+
'.3gp', # 3GPP
60+
'.mpeg', # MPEG
61+
'.mpg' # MPEG
62+
}
4263

4364
assert os.path.exists(video_path), f"no input video file: {video_path}"
4465
assert video_path.suffix.lower() in supported_files, "no supported video file"
@@ -49,38 +70,95 @@ def make_video_tensor(video_path, decym_frames=None):
4970
ret, frame = cap.read()
5071
if not ret:
5172
break
73+
5274
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
5375
pil_image = Image.fromarray(frame_rgb)
5476

55-
shape = np.array(pil_image).shape
56-
dtype = np.array(pil_image).dtype
57-
log.info(f"Video shape: {shape}")
58-
log.info(f"Video dtype: {dtype}")
59-
new_frame = np.zeros(shape, dtype)
60-
61-
width, height = pil_image.size
62-
log.info(f"Video size: {width}x{height}")
63-
for x in range(0, width):
64-
for y in range(0, height):
65-
new_frame[y, x] = frame_rgb[y, x]
66-
output_frames.append(np.array(pil_image))
77+
np_img_array = np.array(pil_image)
78+
log.debug(f"Video shape: {np_img_array.shape}")
79+
log.debug(f"Video dtype: {np_img_array.dtype}")
80+
output_frames.append(np_img_array)
6781

6882
if decym_frames is None:
83+
log.info("Video decym: none: skip")
6984
return output_frames
7085
if int(decym_frames) == 0:
86+
log.info("Video decym: zero: skip")
7187
return output_frames
7288

73-
# decimation procedure
74-
# decim_fames is required frame number if positive
75-
# or decimation factor if negative
89+
# decymation procedure
90+
# decym_fames is required max frame number if positive
91+
# or decymation factor if negative
7692

7793
decym_frames = int(decym_frames)
7894
if decym_frames > 0:
7995
if len(output_frames) <= decym_frames:
80-
return output_frames
81-
decym_factor = int(len(output_frames) / decym_frames)
96+
log.info(f"Video decym: too short to decym: crop: {decym_frames}")
97+
return list(output_frames[:decym_frames])
98+
decym_factor = 1 + int(len(output_frames) / decym_frames)
8299
else:
83100
decym_factor = -decym_frames
101+
log.info(f"Video decym factor: {decym_factor}")
84102
if decym_factor >= 2:
85-
return output_frames[::decym_factor]
103+
return list(output_frames[::decym_factor])
104+
log.info("Video decym: too large decym factor: skip")
86105
return output_frames
106+
107+
108+
def load_image_genai(image_path):
109+
pil_image = load_image(image_path)
110+
image_data = np.array(pil_image)[None]
111+
return ov.Tensor(image_data)
112+
113+
114+
def extract_prompt_issues(inputs, required_frames, genai_flag):
115+
prompts, images, videos = [], [], []
116+
if not isinstance(inputs, (list, tuple, set)):
117+
inputs = [inputs]
118+
for input_data in inputs:
119+
if input_data.get("video") is not None:
120+
entry = Path(input_data["video"])
121+
if entry.is_dir():
122+
for filename in sorted(entry.iterdir()):
123+
video_tensor = make_video_tensor(filename, required_frames, genai_flag)
124+
if genai_flag:
125+
videos.extend(video_tensor)
126+
else:
127+
videos.append(video_tensor)
128+
else:
129+
video_tensor = make_video_tensor(entry, required_frames, genai_flag)
130+
if genai_flag:
131+
videos.extend(video_tensor)
132+
else:
133+
videos.append(video_tensor)
134+
if input_data.get("media") is not None:
135+
func_load_image = load_image_genai if genai_flag else load_image
136+
entry = Path(input_data["media"])
137+
if entry.is_dir():
138+
for file in sorted(entry.iterdir()):
139+
img = func_load_image(str(file))
140+
images.append(img)
141+
else:
142+
img = func_load_image(input_data["media"])
143+
images.append(img)
144+
prompts.append(input_data["prompt"])
145+
return prompts, images, videos
146+
147+
148+
def get_image_text_prompt(args):
149+
vlm_file_list = []
150+
output_data_list, is_json_data = get_param_from_file(args, ["video", "media", "prompt"])
151+
if is_json_data:
152+
vlm_param_list = parse_vlm_json_data(output_data_list)
153+
if len(vlm_param_list) > 0:
154+
for vlm_file in vlm_param_list:
155+
if args['prompt_file'] is not None and len(args['prompt_file']) > 0 and 'media' in vlm_file:
156+
if 'video' in vlm_file:
157+
raise ValueError('media and video cannot be specify in a single prompt file')
158+
vlm_file['media'] = resolve_media_file_path(vlm_file.get('media'), args['prompt_file'][0])
159+
if args['prompt_file'] is not None and len(args['prompt_file']) > 0 and 'video' in vlm_file:
160+
vlm_file['video'] = resolve_media_file_path(vlm_file.get('video'), args['prompt_file'][0])
161+
vlm_file_list.append(vlm_file)
162+
else:
163+
vlm_file_list.append(output_data_list)
164+
return vlm_file_list

0 commit comments

Comments
 (0)