We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7e8838f commit 25236bbCopy full SHA for 25236bb
QEfficient/transformers/models/modeling_auto.py
@@ -1413,6 +1413,8 @@ def kv_offload_generate(
1413
if x.startswith("past_") or x.endswith("_RetainedState")
1414
]
1415
)
1416
+ if not_mllama:
1417
+ lang_session.skip_buffers(vision_outputs.keys())
1418
1419
# Get first token
1420
lang_inputs["input_ids"] = outputs["logits"].argmax(2)
QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
@@ -953,6 +953,7 @@ def smart_resize(
953
grid_height = grid_h * grid_w
954
grid_width = patch_size * patch_size * temporal_patch_size * channel
955
vision_size = grid_height // 4
956
+ vision_size = vision_size * num_frames
957
grid_height = grid_height * batch_size
958
959
vision = [
0 commit comments