[LLM Runtime] Remove the identical branch (#894)

a32543254 · web-flow · commit 33defb7e9817 · 2023-12-09T13:08:59.000+08:00
diff --git a/examples/huggingface/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/executor_utils.py b/examples/huggingface/pytorch/text-classification/deployment/emotion/distilbert_base_uncased/executor_utils.py
@@ -37,7 +37,7 @@ def accuracy(self, batch_size, seq_len, dataset_name, task_name, data_dir, token
         # load metric
         log.info("Load metric ......")
         if dataset_name and task_name is not None:
-            metric = load_metric("accuracy")
+            metric = load_metric(dataset_name, task_name)
         else:
             metric = load_metric("accuracy")
         # execute
diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/qkv_merge.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/qkv_merge.py
@@ -102,7 +102,6 @@ def get_zero_ratio(matrix, block):
                 if ((v_matmul.attr.__contains__("output_dtype")
                         and q_matmul.attr.__contains__("output_dtype")
                         and k_matmul.attr.__contains__("output_dtype")
-                        and q_matmul.attr["output_dtype"] == v_matmul.attr["output_dtype"] 
                         and q_matmul.attr["output_dtype"] == v_matmul.attr["output_dtype"])
                         or (v_matmul.attr.__contains__("output_dtype") == False
                         and q_matmul.attr.__contains__("output_dtype") == False
diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_gptq_bloom.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_gptq_bloom.py
@@ -195,11 +195,7 @@ def bytes_to_unicode():
 
     ftype_cur = 0
     if ".weight" in name and list_vars[name].dim() == 2:
-        if name.replace(".weight",
-                        "") in weight_config and weight_config[name.replace(".weight", "")]["dtype"] != "fp32":
-            ftype_cur = 2  # 13
-        else:
-            ftype_cur = 2  # 2
+        ftype_cur = 2  # TODO(Zhenwei) support jblas
 
     data = list_vars[src].squeeze().numpy()
     data = data.astype(np.float32)