@@ -90,45 +90,58 @@ static void list_files(const std::string & dir, std::vector<std::string> & out,
9090
9191namespace mtmd_video {
9292
93- bool load_frames_from_dir (mtmd_context * ctx,
93+ bool is_video_file (const std::string & path){
94+ auto lower = path;
95+ std::transform (lower.begin (), lower.end (), lower.begin (), [](unsigned char c){ return (char )std::tolower (c); });
96+ return lower.rfind (" .mp4" ) != std::string::npos ||
97+ lower.rfind (" .mov" ) != std::string::npos ||
98+ lower.rfind (" .mkv" ) != std::string::npos ||
99+ lower.rfind (" .avi" ) != std::string::npos ||
100+ lower.rfind (" .webm" ) != std::string::npos;
101+ }
102+
103+ // untested
104+ static mtmd_bitmap* load_frames_from_dir (mtmd_context * ctx,
94105 const std::string & dir_path,
95- std::vector<mtmd::bitmap> & out_frames,
96106 const LoadVideoOptions & opts) {
97- if (!ctx || dir_path.empty () || !is_dir (dir_path)) {
98- return false ;
107+ if (!ctx || dir_path.empty () || !is_dir (dir_path) || opts. max_frames < 1 ) {
108+ return nullptr ;
99109 }
100110 // note: hparam-based control is applied inside clip.cpp; nothing to set globally here
101111
102112 std::vector<std::string> files;
103113 list_files (dir_path, files, opts.recursive );
104114 std::sort (files.begin (), files.end ());
105115
106- int stride = std::max (1 , opts.stride );
107- int loaded = 0 ;
116+ auto stride = std::max (1u , opts.stride );
117+ size_t loaded = 0 ;
118+ unsigned char * dest = nullptr ;
119+ mtmd_bitmap* out_frames = nullptr ;
120+
121+ uint32_t w=0 , h=0 ;
108122 for (size_t i = 0 ; i < files.size (); i++) {
109- if (( int ) i % stride != 0 ) continue ;
123+ if (i % stride != 0 ) continue ;
110124 const std::string & f = files[i];
111125 if (!has_image_ext (f)) continue ;
112126 mtmd::bitmap bmp (mtmd_helper_bitmap_init_from_file (ctx, f.c_str ()));
113127 if (!bmp.ptr ) continue ;
114- out_frames.push_back (std::move (bmp));
128+ if (loaded==0 ){
129+ w = bmp.nx ();
130+ h = bmp.ny ();
131+ out_frames = mtmd_bitmap_init_from_video (w, h, loaded, nullptr );
132+ dest = mtmd_bitmap_get_data_mutable (out_frames);
133+ }else if (bmp.nx () != w || bmp.ny () != h){
134+ return nullptr ; // all frames must have the same size
135+ }
136+ std::memcpy (dest,
137+ bmp.data (),
138+ bmp.n_bytes ());
139+ dest += bmp.n_bytes ();
115140 loaded++;
116- if (opts.max_frames > 0 && loaded >= opts.max_frames ) break ;
117- }
118- return loaded > 0 ;
119- }
120-
121- size_t append_frames_from_dir (mtmd_context * ctx,
122- const std::string & dir_path,
123- mtmd::bitmaps & dst,
124- const LoadVideoOptions & opts) {
125- std::vector<mtmd::bitmap> frames;
126- if (!load_frames_from_dir (ctx, dir_path, frames, opts)) {
127- return 0 ;
141+ if (loaded >= opts.max_frames ) break ;
128142 }
129- size_t before = dst.entries .size ();
130- for (auto & f : frames) dst.entries .push_back (std::move (f));
131- return dst.entries .size () - before;
143+
144+ return out_frames;
132145}
133146
134147// --- FFmpeg-based file decoding (optional) ---
@@ -244,49 +257,51 @@ static bool decode_video_ffmpeg_to_rgba(const std::string & file,
244257 return taken > 0 ;
245258}
246259
247- bool load_frames_from_file (mtmd_context * ctx,
260+ static mtmd_bitmap* load_frames_from_file (mtmd_context * ctx,
248261 const std::string & file_path,
249- std::vector<mtmd::bitmap> & out_frames,
250262 const LoadVideoOptions & opts) {
251- if (!ctx) return false ;
263+ if (!ctx) return nullptr ;
252264 std::vector<DecodedFrameRGBA> decoded;
253- if (!decode_video_ffmpeg_to_rgba (file_path, decoded, opts.max_frames , std::max (1 , opts.stride ))) {
254- return false ;
265+ if (!decode_video_ffmpeg_to_rgba (file_path, decoded, opts.max_frames , std::max (1u , opts.stride ))) {
266+ return nullptr ;
255267 }
268+ const size_t nframes = decoded.size ();
269+ if (nframes < 1 ){
270+ return nullptr ;
271+ }
272+ const int w = decoded[0 ].width ;
273+ const int h = decoded[0 ].height ;
274+ mtmd_bitmap* out_frames = mtmd_bitmap_init_from_video (uint32_t (w), uint32_t (h), uint32_t (nframes), nullptr );
275+ unsigned char * dst = mtmd_bitmap_get_data_mutable (out_frames);
276+
256277 for (auto & fr : decoded) {
257- const int w = fr.width ;
258- const int h = fr.height ;
259- std::vector<unsigned char > rgb;
260- rgb.resize ((size_t )w * h * 3 );
278+ GGML_ASSERT (w == fr.width && h == fr.height );
261279 const unsigned char * src = fr.rgba .data ();
262- unsigned char * dst = rgb.data ();
263280 for (int i = 0 ; i < w * h; ++i) {
264281 dst[0 ] = src[0 ]; // R
265282 dst[1 ] = src[1 ]; // G
266283 dst[2 ] = src[2 ]; // B
267284 dst += 3 ;
268285 src += 4 ; // skip A
269286 }
270- mtmd::bitmap bmp (mtmd_bitmap_init ((uint32_t )w, (uint32_t )h, rgb.data ()));
271- if (bmp.ptr ) out_frames.push_back (std::move (bmp));
272287 }
273- return !out_frames.empty ();
288+
289+ return out_frames;
274290}
275291#else
276- bool load_frames_from_file (mtmd_context * /* ctx*/ ,
292+ static mtmd_bitmap* load_frames_from_file (mtmd_context * /* ctx*/ ,
277293 const std::string & /* file_path*/ ,
278- std::vector<mtmd::bitmap> & /* out_frames*/ ,
279294 const LoadVideoOptions & /* opts*/ ) {
280- return false ;
295+ return nullptr ;
281296}
282297bool get_video_info_ffmpeg (const std::string &file, VideoInfo &info) {
298+ LOG_ERR (" FFmpeg support is not enabled in this build\n " );
283299 return false ;
284300}
285301#endif
286302
287- size_t append_frames_from_path (mtmd_context * ctx,
288- const std::string & path,
289- mtmd::bitmaps & dst) {
303+ mtmd_bitmap* init_video_bitmap_from_path (mtmd_context * ctx,
304+ const std::string & path) {
290305 mtmd_video::LoadVideoOptions opts;
291306 opts.max_frames = 32 ;
292307 opts.stride = 1 ;
@@ -300,7 +315,7 @@ size_t append_frames_from_path(mtmd_context * ctx,
300315 info.total_frames = files.size ();
301316 } else {
302317 if (!mtmd_video::get_video_info_ffmpeg (path, info)) {
303- return 0 ;
318+ return nullptr ;
304319 }
305320 }
306321
@@ -316,14 +331,10 @@ size_t append_frames_from_path(mtmd_context * ctx,
316331 }
317332
318333 if (is_dir (path)) {
319- return append_frames_from_dir (ctx, path, dst, opts);
320- } else {
321- std::vector<mtmd::bitmap> frames;
322- if (!load_frames_from_file (ctx, path, frames, opts)) return 0 ;
323- size_t before = dst.entries .size ();
324- for (auto & f : frames) dst.entries .push_back (std::move (f));
325- return dst.entries .size () - before;
334+ return load_frames_from_dir (ctx, path, opts);
326335 }
336+
337+ return load_frames_from_file (ctx, path, opts);
327338}
328339
329340} // namespace mtmd_video
0 commit comments