Skip to content

Commit 2c1d02a

Browse files
committed
refactor: add bitmap_type, instead of is_audio, is_video...
1 parent e377686 commit 2c1d02a

File tree

2 files changed

+24
-16
lines changed

2 files changed

+24
-16
lines changed

tools/mtmd/mtmd-video.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ mtmd_bitmap* init_video_bitmap_from_path(mtmd_context * ctx,
319319
}
320320
}
321321

322-
// minicpm normal speed
322+
// minicpm frames sample method
323323
const int32_t minicpmv_max_video_frames = 64;
324324
opts.max_frames = minicpmv_max_video_frames;
325325
if(info.total_frames > minicpmv_max_video_frames) {

tools/mtmd/mtmd.cpp

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,28 @@
1414
#include <limits>
1515
#include <vector>
1616

17-
// represents raw image data, layout is RGBRGBRGB...
18-
// length of data must be nx * ny * 3
17+
enum class mtmd_bitmap_type {
18+
IMAGE,
19+
AUDIO,
20+
VIDEO,
21+
};
22+
23+
// if bitmap is image:
24+
// length of data must be nx * ny * 3
25+
// the data is in RGBRGBRGB... format
26+
// if bitmap is audio:
27+
// length of data must be n_samples * sizeof(float)
28+
// the data is in float format (PCM F32)
29+
// if bitmap is video:
30+
// length of data must be nx * ny * nframes * 3
31+
// the data is in RGBRGBRGB... format for each frame, frames are stored sequentially
1932
struct mtmd_bitmap {
33+
mtmd_bitmap_type type;
2034
uint32_t nx;
21-
uint32_t ny;
35+
uint32_t ny; // for audio, ny=1
36+
uint32_t nz; // for video: number of frames
2237
std::vector<unsigned char> data;
2338
std::string id; // optional user-defined id, for ex: can be set to image hash, useful for KV cache tracking
24-
bool is_audio = false; // true if the bitmap is audio
25-
26-
bool is_video = false; // true if the bitmap is video
27-
uint32_t nz; // for video: number of frames
2839
};
2940

3041
struct mtmd_image_tokens {
@@ -901,8 +912,7 @@ mtmd_bitmap * mtmd_bitmap_init(uint32_t nx,
901912
bitmap->nx = nx;
902913
bitmap->ny = ny;
903914
bitmap->nz = 1;
904-
bitmap->is_audio = false;
905-
bitmap->is_video = false;
915+
bitmap->type = mtmd_bitmap_type::IMAGE;
906916
size_t data_size = (size_t)nx * ny * 3;
907917
bitmap->data.resize(data_size);
908918
std::memcpy(bitmap->data.data(), data, data_size);
@@ -915,8 +925,7 @@ mtmd_bitmap * mtmd_bitmap_init_from_audio(size_t n_samples,
915925
bitmap->nx = n_samples;
916926
bitmap->ny = 1;
917927
bitmap->nz = 1;
918-
bitmap->is_audio = true;
919-
bitmap->is_video = false;
928+
bitmap->type = mtmd_bitmap_type::AUDIO;
920929
size_t data_size = n_samples * sizeof(float);
921930
bitmap->data.resize(data_size);
922931
std::memcpy(bitmap->data.data(), data, data_size);
@@ -928,8 +937,7 @@ mtmd_bitmap * mtmd_bitmap_init_from_video(uint32_t nx, uint32_t ny, uint32_t nfr
928937
bitmap->nx = nx;
929938
bitmap->ny = ny;
930939
bitmap->nz = nframes;
931-
bitmap->is_audio = false;
932-
bitmap->is_video = true;
940+
bitmap->type = mtmd_bitmap_type::VIDEO;
933941
size_t data_size = (size_t)nx * ny * nframes * 3;
934942
bitmap->data.resize(data_size);
935943
if(data != nullptr){
@@ -960,11 +968,11 @@ size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap) {
960968
}
961969

962970
bool mtmd_bitmap_is_audio(const mtmd_bitmap * bitmap) {
963-
return bitmap->is_audio;
971+
return bitmap->type == mtmd_bitmap_type::AUDIO;
964972
}
965973

966974
bool mtmd_bitmap_is_video(const mtmd_bitmap * bitmap) {
967-
return bitmap->is_video;
975+
return bitmap->type == mtmd_bitmap_type::VIDEO;
968976
}
969977

970978
const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap) {

0 commit comments

Comments
 (0)