88import numpy as np
99from PIL import Image
1010import logging as log
11+ from transformers .image_utils import load_image
1112from .model_utils import get_param_from_file
13+ from .model_utils import resolve_media_file_path
1214from .parse_json_data import parse_text_json_data
15+ from .parse_json_data import parse_vlm_json_data
16+ from pathlib import Path
17+ import openvino as ov
1318
1419
1520def get_text_prompt (args ):
@@ -26,19 +31,24 @@ def get_text_prompt(args):
2631
2732
2833def print_video_frames_number_and_convert_to_tensor (func ):
29- def inner (video_path , decym_frames ):
34+ def inner (video_path , genai_flag , decym_frames ):
3035 log .info (f"Input video file: { video_path } " )
3136 if decym_frames is not None :
3237 log .info (f"Requested to reduce into { decym_frames } frames" )
33- out_frames = func (video_path , decym_frames )
38+ out_frames = func (video_path , genai_flag , decym_frames )
3439 log .info (f"Final frames number: { len (out_frames )} " )
35- return np .array (out_frames )
40+ log .info (f"First frame shape: { out_frames [0 ].shape } " )
41+ log .info (f"First frame dtype: { out_frames [0 ].dtype } " )
42+ if genai_flag :
43+ return [ov .Tensor (frame [None ]) for frame in out_frames ]
44+ else :
45+ return np .array (out_frames )
3646 return inner
3747
3848
3949@print_video_frames_number_and_convert_to_tensor
40- def make_video_tensor (video_path , decym_frames = None ):
41- supported_files = set ([ ".mp4" ])
50+ def make_video_tensor (video_path , genai_flag , decym_frames = None ):
51+ supported_files = { ".mp4" }
4252
4353 assert os .path .exists (video_path ), f"no input video file: { video_path } "
4454 assert video_path .suffix .lower () in supported_files , "no supported video file"
@@ -49,38 +59,89 @@ def make_video_tensor(video_path, decym_frames=None):
4959 ret , frame = cap .read ()
5060 if not ret :
5161 break
62+
5263 frame_rgb = cv2 .cvtColor (frame , cv2 .COLOR_BGR2RGB )
5364 pil_image = Image .fromarray (frame_rgb )
5465
55- shape = np .array (pil_image ).shape
56- dtype = np .array (pil_image ).dtype
57- log .info (f"Video shape: { shape } " )
58- log .info (f"Video dtype: { dtype } " )
59- new_frame = np .zeros (shape , dtype )
60-
61- width , height = pil_image .size
62- log .info (f"Video size: { width } x{ height } " )
63- for x in range (0 , width ):
64- for y in range (0 , height ):
65- new_frame [y , x ] = frame_rgb [y , x ]
66- output_frames .append (np .array (pil_image ))
66+ np_img_array = np .array (pil_image )
67+ log .debug (f"Video shape: { np_img_array .shape } " )
68+ log .debug (f"Video dtype: { np_img_array .dtype } " )
69+ output_frames .append (np_img_array )
6770
6871 if decym_frames is None :
72+ log .info ("Video decym: none: skip" )
6973 return output_frames
7074 if int (decym_frames ) == 0 :
75+ log .info ("Video decym: zero: skip" )
7176 return output_frames
7277
73- # decimation procedure
74- # decim_fames is required frame number if positive
75- # or decimation factor if negative
78+ # decymation procedure
79+ # decym_fames is required max frame number if positive
80+ # or decymation factor if negative
7681
7782 decym_frames = int (decym_frames )
7883 if decym_frames > 0 :
7984 if len (output_frames ) <= decym_frames :
80- return output_frames
81- decym_factor = int (len (output_frames ) / decym_frames )
85+ log .info (f"Video decym: too short to decym: crop: { decym_frames } " )
86+ return list (output_frames [:decym_frames ])
87+ decym_factor = 1 + int (len (output_frames ) / decym_frames )
8288 else :
8389 decym_factor = - decym_frames
90+ log .info (f"Video decym factor: { decym_factor } " )
8491 if decym_factor >= 2 :
85- return output_frames [::decym_factor ]
92+ return list (output_frames [::decym_factor ])
93+ log .info ("Video decym: too large decym factor: skip" )
8694 return output_frames
95+
96+
97+ def load_image_genai (image_path ):
98+ pil_image = load_image (image_path )
99+ image_data = np .array (pil_image )[None ]
100+ return ov .Tensor (image_data )
101+
102+
103+ def extract_prompt_issues (inputs , genai_flag , required_frames ):
104+ prompts , images , videos = [], [], []
105+ if not isinstance (inputs , (list , tuple , set )):
106+ inputs = [inputs ]
107+ for input_data in inputs :
108+ if input_data .get ("video" ) is not None :
109+ entry = Path (input_data ["video" ])
110+ if entry .is_dir ():
111+ for filename in sorted (entry .iterdir ()):
112+ video_tensor = make_video_tensor (filename , genai_flag , required_frames )
113+ videos .extend (video_tensor )
114+ else :
115+ video_tensor = make_video_tensor (entry , genai_flag , required_frames )
116+ videos .extend (video_tensor )
117+ if input_data .get ("media" ) is not None :
118+ func_load_image = load_image_genai if genai_flag else load_image
119+ entry = Path (input_data ["media" ])
120+ if entry .is_dir ():
121+ for file in sorted (entry .iterdir ()):
122+ img = func_load_image (str (file ))
123+ images .append (img )
124+ else :
125+ img = func_load_image (input_data ["media" ])
126+ images .append (img )
127+ prompts .append (input_data ["prompt" ])
128+ return prompts , images , videos
129+
130+
131+ def get_image_text_prompt (args ):
132+ vlm_file_list = []
133+ output_data_list , is_json_data = get_param_from_file (args , ["video" , "media" , "prompt" ])
134+ if is_json_data :
135+ vlm_param_list = parse_vlm_json_data (output_data_list )
136+ if len (vlm_param_list ) > 0 :
137+ for vlm_file in vlm_param_list :
138+ if args ['prompt_file' ] is not None and len (args ['prompt_file' ]) > 0 and 'media' in vlm_file :
139+ if 'video' in vlm_file :
140+ raise ValueError ('media and video cannot be specify in a single prompt file' )
141+ vlm_file ['media' ] = resolve_media_file_path (vlm_file .get ('media' ), args ['prompt_file' ][0 ])
142+ if args ['prompt_file' ] is not None and len (args ['prompt_file' ]) > 0 and 'video' in vlm_file :
143+ vlm_file ['video' ] = resolve_media_file_path (vlm_file .get ('video' ), args ['prompt_file' ][0 ])
144+ vlm_file_list .append (vlm_file )
145+ else :
146+ vlm_file_list .append (output_data_list )
147+ return vlm_file_list
0 commit comments