From 8d7d93a690afa9c63662439818e1ed10638e765a Mon Sep 17 00:00:00 2001
From: zzzzwwjj <1183291235@qq.com>
Date: Mon, 1 Dec 2025 20:31:47 +0800
Subject: [PATCH] opti profiler default param

Signed-off-by: zzzzwwjj <1183291235@qq.com>
---
 tests/ut/worker/test_worker_v1.py |  4 ++--
 vllm_ascend/envs.py               | 14 ++++++++++++++
 vllm_ascend/worker/worker_v1.py   | 15 ++++++++++++++-
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py
index 5a12981a370..2324046bb03 100644
--- a/tests/ut/worker/test_worker_v1.py
+++ b/tests/ut/worker/test_worker_v1.py
@@ -520,7 +520,7 @@ def test_init_profiler_enabled(
         # Set enum mocks
         mock_export_type.Text = "Text"
         mock_profiler_level.Level1 = "Level1"
-        mock_aic_metrics.AiCoreNone = "AiCoreNone"
+        mock_aic_metrics.PipeUtilization = "PipeUtilization"
         mock_profiler_activity.CPU = "CPU"
         mock_profiler_activity.NPU = "NPU"
 
@@ -554,7 +554,7 @@ def test_init_profiler_enabled(
                 "export_type": "Text",
                 "profiler_level": "Level1",
                 "msprof_tx": False,
-                "aic_metrics": "AiCoreNone",
+                "aic_metrics": "PipeUtilization",
                 "l2_cache": False,
                 "op_attr": False,
                 "data_simplification": False,
diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py
index cd148da3f32..4728ed5469e 100644
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -176,6 +176,20 @@
     # Whether to anbale dynamic EPLB
     "DYNAMIC_EPLB":
     lambda: os.getenv("DYNAMIC_EPLB", "false").lower(),
+    # Set torch_npu profiler to profile aicore metrics. There are the following options that can be configured:
+    # 0: torch_npu.profiler.AiCMetrics.AiCoreNone;
+    # 1: torch_npu.profiler.AiCMetrics.PipeUtilization;
+    # 2: torch_npu.profiler.AiCMetrics.ArithmeticUtilization;
+    # 3: torch_npu.profiler.AiCMetrics.Memory;
+    # 4: torch_npu.profiler.AiCMetrics.MemoryL0;
+    # 5: torch_npu.profiler.AiCMetrics.ResourceConflictRatio;
+    # 6: torch_npu.profiler.AiCMetrics.MemoryUB;
+    # 7: torch_npu.profiler.AiCMetrics.L2Cache;
+    # 8: torch_npu.profiler.AiCMetrics.MemoryAccess;
+    # If not set, it will be torch_npu.profiler.AiCMetrics.PipeUtilization by default.
+    # The meanings of various options can refer to: https://www.hiascend.com/document/detail/zh/Pytorch/720/apiref/torchnpuCustomsapi/context/torch_npu-profiler-AiCMetrics.md
+    "VLLM_ASCEND_PROFILER_AIC_METRICS":
+    lambda: int(os.getenv("VLLM_ASCEND_PROFILER_AIC_METRICS", 1)),
 }
 
 # end-env-vars-definition
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
index ef3f2e49cb3..33442dc07f3 100644
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -442,11 +442,24 @@ def _init_profiler(self):
             logger.info("Profiling enabled. Traces will be saved to: %s",
                         torch_profiler_trace_dir)
 
+            aic_metrics_list = [
+                torch_npu.profiler.AiCMetrics.AiCoreNone,
+                torch_npu.profiler.AiCMetrics.PipeUtilization,
+                torch_npu.profiler.AiCMetrics.ArithmeticUtilization,
+                torch_npu.profiler.AiCMetrics.Memory,
+                torch_npu.profiler.AiCMetrics.MemoryL0,
+                torch_npu.profiler.AiCMetrics.ResourceConflictRatio,
+                torch_npu.profiler.AiCMetrics.MemoryUB,
+                torch_npu.profiler.AiCMetrics.L2Cache,
+                torch_npu.profiler.AiCMetrics.MemoryAccess
+            ]
+
             experimental_config = torch_npu.profiler._ExperimentalConfig(
                 export_type=torch_npu.profiler.ExportType.Text,
                 profiler_level=torch_npu.profiler.ProfilerLevel.Level1,
                 msprof_tx=False,
-                aic_metrics=torch_npu.profiler.AiCMetrics.AiCoreNone,
+                aic_metrics=aic_metrics_list[
+                    envs_ascend.VLLM_ASCEND_PROFILER_AIC_METRICS],
                 l2_cache=False,
                 op_attr=False,
                 data_simplification=False,