[gaudi] HuggingFaceM4/idefics2-8b issue fix (#3264)

sywangyi · web-flow · commit a220e57f45ee · 2025-06-13T12:00:08.000+02:00
Signed-off-by: Wang, Yi A &lt;yi.a.wang@intel.com&gt;
diff --git a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -111,7 +111,7 @@ def __init__(self, prefix: str, config, weights, layer_id):
         )
         self.num_heads = config.num_attention_heads
         self.hidden_size = config.hidden_size
-        if hasattr(config, "head_dim"):
+        if getattr(config, "head_dim", None) is not None:
             self.head_size = config.head_dim
         else:
             self.head_size = self.hidden_size // self.num_heads
diff --git a/backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py b/backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py
@@ -1050,8 +1050,6 @@ def forward(
             attention_mask=attention_mask_forward,
             **kwargs,
         )
-        if batch.prefill_cache_indices is not None:
-            batch.prefill_cache_indices = None
         batch.image_grid_thw = None
         batch.free_encoder_cache()
         return logits, speculative_logits
diff --git a/backends/gaudi/server/text_generation_server/utils/debug.py b/backends/gaudi/server/text_generation_server/utils/debug.py
@@ -4,8 +4,8 @@
 import glob
 import time
 
-from optimum.habana.utils import to_gb_rounded
 import habana_frameworks.torch as htorch
+import numpy as np
 
 START_TS = None
 DBG_TRACE_FILENAME = os.environ.get("DBG_TRACE_FILENAME")
@@ -14,6 +14,19 @@
         os.remove(f)
 
 
+def to_gb_rounded(mem: float) -> float:
+    """
+    Rounds and converts to GB.
+
+    Args:
+        mem (float): memory in bytes
+
+    Returns:
+        float: memory in GB rounded to the second decimal
+    """
+    return np.round(mem / 1024**3, 2)
+
+
 def count_hpu_graphs():
     return len(glob.glob(".graph_dumps/*PreGraph*"))
 

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ def __init__(self, prefix: str, config, weights, layer_id):`
`111`	`111`	`)`
`112`	`112`	`self.num_heads = config.num_attention_heads`
`113`	`113`	`self.hidden_size = config.hidden_size`
`114`		`- if hasattr(config, "head_dim"):`
	`114`	`+ if getattr(config, "head_dim", None) is not None:`
`115`	`115`	`self.head_size = config.head_dim`
`116`	`116`	`else:`
`117`	`117`	`self.head_size = self.hidden_size // self.num_heads`
Original file line number	Diff line number	Diff line change
`@@ -1050,8 +1050,6 @@ def forward(`
`1050`	`1050`	`attention_mask=attention_mask_forward,`
`1051`	`1051`	`**kwargs,`
`1052`	`1052`	`)`
`1053`		`- if batch.prefill_cache_indices is not None:`
`1054`		`- batch.prefill_cache_indices = None`
`1055`	`1053`	`batch.image_grid_thw = None`
`1056`	`1054`	`batch.free_encoder_cache()`
`1057`	`1055`	`return logits, speculative_logits`