Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit 5df1556

Browse files
authored
improve lm_eval get chatglm2 tokenizer from local (#1598)
Signed-off-by: changwangss <chang1.wang@intel.com>
1 parent b1d3d3c commit 5df1556

File tree

1 file changed

+14
-6
lines changed
  • intel_extension_for_transformers/transformers/llm/evaluation/lm_eval/models

1 file changed

+14
-6
lines changed

intel_extension_for_transformers/transformers/llm/evaluation/lm_eval/models/huggingface.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -838,12 +838,20 @@ def _create_tokenizer(
838838
else:
839839
# get the HF hub name via accessor on model
840840
model_name = self.model.name_or_path
841-
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
842-
model_name,
843-
revision=revision,
844-
trust_remote_code=trust_remote_code,
845-
use_fast=use_fast_tokenizer,
846-
)
841+
842+
# chatglm2 tokenizer doesn't support loading from local.
843+
if hasattr(self.model, "config") and hasattr(self.model.config, "auto_map") and \
844+
"chatglm2" in self.model.config.auto_map["AutoConfig"]:
845+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
846+
"THUDM/chatglm2-6b", trust_remote_code=True
847+
)
848+
else:
849+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
850+
model_name,
851+
revision=revision,
852+
trust_remote_code=trust_remote_code,
853+
use_fast=use_fast_tokenizer,
854+
)
847855
return None
848856

849857
def _detect_batch_size(self, requests=None, pos: int = 0):

0 commit comments

Comments
 (0)