diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index cd148da3f32..4728ed5469e 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -176,6 +176,20 @@ # Whether to anbale dynamic EPLB "DYNAMIC_EPLB": lambda: os.getenv("DYNAMIC_EPLB", "false").lower(), + # Set torch_npu profiler to profile aicore metrics. There are the following options that can be configured: + # 0: torch_npu.profiler.AiCMetrics.AiCoreNone; + # 1: torch_npu.profiler.AiCMetrics.PipeUtilization; + # 2: torch_npu.profiler.AiCMetrics.ArithmeticUtilization; + # 3: torch_npu.profiler.AiCMetrics.Memory; + # 4: torch_npu.profiler.AiCMetrics.MemoryL0; + # 5: torch_npu.profiler.AiCMetrics.ResourceConflictRatio; + # 6: torch_npu.profiler.AiCMetrics.MemoryUB; + # 7: torch_npu.profiler.AiCMetrics.L2Cache; + # 8: torch_npu.profiler.AiCMetrics.MemoryAccess; + # If not set, it will be torch_npu.profiler.AiCMetrics.PipeUtilization by default. + # The meanings of various options can refer to: https://www.hiascend.com/document/detail/zh/Pytorch/720/apiref/torchnpuCustomsapi/context/torch_npu-profiler-AiCMetrics.md + "VLLM_ASCEND_PROFILER_AIC_METRICS": + lambda: int(os.getenv("VLLM_ASCEND_PROFILER_AIC_METRICS", 1)), } # end-env-vars-definition diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index ef3f2e49cb3..33442dc07f3 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -442,11 +442,24 @@ def _init_profiler(self): logger.info("Profiling enabled. Traces will be saved to: %s", torch_profiler_trace_dir) + aic_metrics_list = [ + torch_npu.profiler.AiCMetrics.AiCoreNone, + torch_npu.profiler.AiCMetrics.PipeUtilization, + torch_npu.profiler.AiCMetrics.ArithmeticUtilization, + torch_npu.profiler.AiCMetrics.Memory, + torch_npu.profiler.AiCMetrics.MemoryL0, + torch_npu.profiler.AiCMetrics.ResourceConflictRatio, + torch_npu.profiler.AiCMetrics.MemoryUB, + torch_npu.profiler.AiCMetrics.L2Cache, + torch_npu.profiler.AiCMetrics.MemoryAccess + ] + experimental_config = torch_npu.profiler._ExperimentalConfig( export_type=torch_npu.profiler.ExportType.Text, profiler_level=torch_npu.profiler.ProfilerLevel.Level1, msprof_tx=False, - aic_metrics=torch_npu.profiler.AiCMetrics.AiCoreNone, + aic_metrics=aic_metrics_list[ + envs_ascend.VLLM_ASCEND_PROFILER_AIC_METRICS], l2_cache=False, op_attr=False, data_simplification=False,