diff --git a/docs/en/getting_started/compile.md b/docs/en/getting_started/compile.md
index cf7aea653..af63bd5ef 100644
--- a/docs/en/getting_started/compile.md
+++ b/docs/en/getting_started/compile.md
@@ -48,7 +48,7 @@ pip install --upgrade setuptools wheel
 ```
 
 ## Compilation
-Execute the compilation to generate the executable file `build/xllm/core/server/xllm` under `build/`. The default device is A2, for A3, add `--device a3`, for mlu, add `--device mlu`:
+Execute the compilation to generate the executable file `build/xllm/core/server/xllm` under `build/`. For A3, add `--device a3`, and no need to add `--device` for other devices:
 ```bash
 python setup.py build
 ```
diff --git a/docs/zh/getting_started/compile.md b/docs/zh/getting_started/compile.md
index c9ba222b1..fe34dac78 100644
--- a/docs/zh/getting_started/compile.md
+++ b/docs/zh/getting_started/compile.md
@@ -49,7 +49,7 @@ pip install -r cibuild/requirements-dev.txt -i https://mirrors.tuna.tsinghua.edu
 pip install --upgrade setuptools wheel
 ```
 ## 编译
-执行编译，在`build/`下生成可执行文件`build/xllm/core/server/xllm`。默认为A2，A3请加 `--device a3`，MLU请加 `--device mlu`：
+执行编译，在`build/`下生成可执行文件`build/xllm/core/server/xllm`。如果是A3请加`--device a3`，其他设备无需加`--device`：
 ```bash
 python setup.py build
 ```
diff --git a/setup.py b/setup.py
index 7cf2614fc..478655cdb 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,7 @@
 from pathlib import Path
 from typing import List
 from jinja2 import Template
+import argparse
 
 from distutils.core import Command
 from setuptools import Extension, setup, find_packages
@@ -30,6 +31,31 @@ def get_cpu_arch():
     else:
         raise ValueError(f"Unsupported architecture: {arch}")
 
+# get device type
+def get_device_type():
+    import torch
+
+    if torch.cuda.is_available():
+        return "cuda"
+
+    try:
+        import torch_mlu
+        if torch.mlu.is_available():
+            return "mlu"
+    except ImportError:
+        pass
+
+    try:
+        import torch_npu
+        if torch.npu.is_available():
+            return "a2"
+    except ImportError:
+        pass
+
+    print("Unsupported device type, please install torch, torch_mlu or torch_npu")
+    exit(1)
+
+
 def get_cxx_abi():
     try:
         import torch
@@ -224,8 +250,6 @@ def set_cuda_envs():
     os.environ["LIBTORCH_ROOT"] = get_torch_root_path()
     os.environ["PYTORCH_INSTALL_PATH"] = get_torch_root_path()
     os.environ["CUDA_TOOLKIT_ROOT_DIR"] = "/usr/local/cuda"
-    os.environ["NCCL_ROOT"] = get_nccl_root_path()
-    os.environ["NCCL_VERSION"] = "2"
     
 class CMakeExtension(Extension):
     def __init__(self, name: str, path: str, sourcedir: str = "") -> None:
@@ -551,39 +575,53 @@ def pre_build():
         if not run_shell_command("sh third_party/dependencies.sh", cwd=script_path):
             print("❌ Failed to reset changes!")
             exit(0)
+            
+def parse_arguments():
+    parser = argparse.ArgumentParser(add_help=False)
+    
+    parser.add_argument(
+        '--device',
+        type=str.lower,
+        choices=['auto', 'a2', 'a3', 'mlu', 'cuda'],
+        default='auto',
+        help='Device type: a2, a3, mlu, or cuda (case-insensitive)'
+    )
+    
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Dry run mode (do not execute pre_build)'
+    )
+    
+    parser.add_argument(
+        '--install-xllm-kernels',
+        type=str.lower,
+        choices=['true', 'false', '1', '0', 'yes', 'no', 'y', 'n', 'on', 'off'],
+        default='true',
+        help='Whether to install xllm kernels'
+    )
+    
+    parser.add_argument(
+        'setup_command',
+        nargs='*',
+        help='setup.py command (build, install, etc.)'
+    )
+
+    return parser.parse_args()
 
 if __name__ == "__main__":
-    device = 'a2'  # default
+    args = parse_arguments()
+
     arch = get_cpu_arch()
-    install_kernels = True
-    if '--device' in sys.argv:
-        idx = sys.argv.index('--device')
-        if idx + 1 < len(sys.argv):
-            device = sys.argv[idx+1].lower()
-            if device not in ('a2', 'a3', 'mlu', 'cuda'):
-                print("Error: --device must be a2 or a3 or mlu (case-insensitive)")
-                sys.exit(1)
-            # Remove the arguments so setup() doesn't see them
-            del sys.argv[idx]
-            del sys.argv[idx]
-    if '--dry_run' not in sys.argv:
-        pre_build()
-    else:
-        sys.argv.remove("--dry_run") 
+    device = args.device
+    if device == 'auto':
+        device = get_device_type()
+    print(f"🚀 Build xllm with CPU arch: {arch} and target device: {device}")
     
-    if '--install-xllm-kernels' in sys.argv:
-        idx = sys.argv.index('--install-xllm-kernels')
-        if idx + 1 < len(sys.argv):
-            install_kernels = sys.argv[idx+1].lower()
-            if install_kernels in ('true', '1', 'yes', 'y', 'on'):
-                install_kernels = True
-            elif install_kernels in ('false', '0', 'no', 'n', 'off'):
-                install_kernels = False
-            else:
-                print("Error: --install-xllm-kernels must be true or false")
-                sys.exit(1)
-            sys.argv.pop(idx)
-            sys.argv.pop(idx)
+    if not args.dry_run:
+        pre_build()
+
+    install_kernels = args.install_xllm_kernels.lower() in ('true', '1', 'yes', 'y', 'on')
 
     if "SKIP_TEST" in os.environ:
         BUILD_TEST_FILE = False
@@ -605,7 +643,7 @@ def pre_build():
         long_description=read_readme(),
         long_description_content_type="text/markdown",
         url="https://github.com/jd-opensource/xllm",
-        project_url={
+        project_urls={
             "Homepage": "https://xllm.readthedocs.io/zh-cn/latest/",
             "Documentation": "https://xllm.readthedocs.io/zh-cn/latest/",
         },