From afc22b8892acf8b0d3724413e97a22af95270f90 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Mon, 29 Sep 2025 14:37:28 +0800
Subject: [PATCH 01/13] test libxc in gpu bug for 3rd derivatives.

---
 gpu4pyscf/dft/tests/test_libxc.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/gpu4pyscf/dft/tests/test_libxc.py b/gpu4pyscf/dft/tests/test_libxc.py
index c13dba133..d242c965b 100644
--- a/gpu4pyscf/dft/tests/test_libxc.py
+++ b/gpu4pyscf/dft/tests/test_libxc.py
@@ -51,7 +51,7 @@ def _diff(dat, ref):
     return np.min((abs(d/(ref+1e-300)), abs(d)), axis=0)
 
 class KnownValues(unittest.TestCase):
-    def _check_xc(self, xc, spin=0, fxc_tol=1e-10, kxc_tol=1e-10):
+    def _check_xc(self, xc, spin=0, deriv=2, fxc_tol=1e-10, kxc_tol=1e-10):
         ni_cpu = numint_cpu()
         ni_gpu = numint_gpu()
         xctype = ni_cpu._xc_type(xc)
@@ -66,8 +66,15 @@ def _check_xc(self, xc, spin=0, fxc_tol=1e-10, kxc_tol=1e-10):
         if spin != 0:
             rho = (rho, rho)
 
-        exc_cpu, vxc_cpu, fxc_cpu, kxc_cpu = ni_cpu.eval_xc_eff(xc, rho, deriv=2, xctype=xctype)
-        exc_gpu, vxc_gpu, fxc_gpu, kxc_gpu = ni_gpu.eval_xc_eff(xc, cupy.array(rho), deriv=2, xctype=xctype)
+        exc_cpu, vxc_cpu, fxc_cpu, kxc_cpu = ni_cpu.eval_xc_eff(xc, rho, deriv=deriv, xctype=xctype)
+        exc_gpu, vxc_gpu, fxc_gpu, kxc_gpu = ni_gpu.eval_xc_eff(xc, cupy.array(rho), deriv=deriv, xctype=xctype)
+
+        print(f"{xc} {spin} exc", _diff(exc_gpu[:,0].get(), exc_cpu).max())
+        print(f"{xc} {spin} vxc", _diff(vxc_gpu.get(), vxc_cpu).max())
+        if fxc_gpu is not None:
+            print(f"{xc} {spin} fxc", _diff(fxc_gpu.get(), fxc_cpu).max())
+        if kxc_gpu is not None:
+            print(f"{xc} {spin} kxc", _diff(kxc_gpu.get(), kxc_cpu).max())
 
         assert _diff(exc_gpu[:,0].get(), exc_cpu).max() < 1e-10
         assert _diff(vxc_gpu.get(), vxc_cpu).max() < 1e-10
@@ -77,15 +84,15 @@ def _check_xc(self, xc, spin=0, fxc_tol=1e-10, kxc_tol=1e-10):
             assert _diff(kxc_gpu.get(), kxc_cpu).max() < kxc_tol
 
     def test_LDA(self):
-        self._check_xc('LDA_C_VWN')
+        self._check_xc('LDA_C_VWN', deriv=3)
 
     def test_GGA(self):
-        self._check_xc('HYB_GGA_XC_B3LYP')
-        self._check_xc('GGA_X_B88', fxc_tol=1e-10)
-        self._check_xc('GGA_C_PBE', fxc_tol=1e-4)
+        self._check_xc('HYB_GGA_XC_B3LYP', deriv=3, kxc_tol=1e-9)
+        self._check_xc('GGA_X_B88', fxc_tol=1e-10, deriv=3, kxc_tol=1e-9)
+        self._check_xc('GGA_C_PBE', fxc_tol=1e-4, deriv=3, kxc_tol=1e2)
 
     def test_mGGA(self):
-        self._check_xc('MGGA_C_M06', fxc_tol=1e-4)
+        self._check_xc('MGGA_C_M06', fxc_tol=1e-4, deriv=3, kxc_tol=1e-2)
 
     def test_u_LDA(self):
         self._check_xc('LDA_C_VWN', spin=1)

From 3abf7bfadb984355176015f80e314f83b7e6980c Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Mon, 29 Sep 2025 16:29:50 +0800
Subject: [PATCH 02/13] use libxc-0.7.0

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 70660747d..a6fc7223e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 cutensor-cu12==2.2.0
-gpu4pyscf-libxc-cuda12x==0.5.0
+gpu4pyscf-libxc-cuda12x==0.7.0
 cupy-cuda12x==13.4.1
 pyscf==2.8.0
 basis-set-exchange==0.11

From 87c468454625ff12050b59d710a8cd19c9504e62 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Thu, 9 Oct 2025 10:23:01 +0800
Subject: [PATCH 03/13] using libxc 0.7.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 111e4f3c2..ca06bade9 100755
--- a/setup.py
+++ b/setup.py
@@ -138,6 +138,6 @@ def initialize_with_default_plat_name(self):
         'pyscf-dispersion',
         f'cupy-cuda{CUDA_VERSION}>=13.0,!=13.4.0', # Due to expm in cupyx.scipy.linalg and cutensor 2.0
         'geometric',
-        f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.5',
+        f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.7.0',
     ]
 )

From dd1272b6523ac8d7bb8ffbd3aeb53198d09d7357 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Thu, 9 Oct 2025 11:32:36 +0800
Subject: [PATCH 04/13] change the unittest.yml

---
 .github/workflows/unittest.yml | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 6abeb5728..de3d179dd 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -32,8 +32,17 @@ jobs:
           -e HTTP_PROXY=$HTTP_PROXY \
           -e HTTPS_PROXY=$HTTPS_PROXY \
           -v $GITHUB_WORKSPACE:/workspace pyscf/gpu4pyscf-devel:latest \
-          /bin/bash -c "cd /workspace && pip3 install --no-cache-dir --target=/tmp/deps -r requirements.txt && export PYTHONPATH=/tmp/deps:$PYTHONPATH && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache"
-
+          /bin/bash -c "cd /workspace && \
+          echo '--- Force uninstalling pre-installed package ---' && \
+          pip uninstall -y gpu4pyscf-cuda12x || true && \
+          echo '--- Cleaning old artifacts ---' && \
+          rm -rf /tmp/deps .cupy_cache .pytest_cache build dist *.egg-info && \
+          echo '--- Checking setup.py content ---' && \
+          cat setup.py | grep libxc && \
+          echo '--- Checking build.sh content ---' && \
+          cat build.sh && \
+          echo '--- Running original script ---' && \
+          pip3 install --no-cache-dir --target=/tmp/deps -r requirements.txt && export PYTHONPATH=/tmp/deps:$PYTHONPATH && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache"
   multi-gpu:
     runs-on: [self-hosted, Linux, X64, 2T4]
     timeout-minutes: 360

From 31f5dd8d83b29112e16d07d499af4d3cad41ff43 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Thu, 9 Oct 2025 11:36:42 +0800
Subject: [PATCH 05/13] remove some codes

---
 .github/workflows/unittest.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index de3d179dd..b8b1a8d9c 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -35,8 +35,6 @@ jobs:
           /bin/bash -c "cd /workspace && \
           echo '--- Force uninstalling pre-installed package ---' && \
           pip uninstall -y gpu4pyscf-cuda12x || true && \
-          echo '--- Cleaning old artifacts ---' && \
-          rm -rf /tmp/deps .cupy_cache .pytest_cache build dist *.egg-info && \
           echo '--- Checking setup.py content ---' && \
           cat setup.py | grep libxc && \
           echo '--- Checking build.sh content ---' && \

From 9612a0369d24e809437463ebc45870354c8cf8ac Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Thu, 9 Oct 2025 11:40:27 +0800
Subject: [PATCH 06/13] change the unite test settings for multi-gpu

---
 .github/workflows/unittest.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index b8b1a8d9c..130aa5fbd 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -60,4 +60,12 @@ jobs:
           -e HTTP_PROXY=$HTTP_PROXY \
           -e HTTPS_PROXY=$HTTPS_PROXY \
           -v $GITHUB_WORKSPACE:/workspace pyscf/gpu4pyscf-devel:latest \
-          /bin/bash -c "cd /workspace && pip3 install --no-cache-dir --target=/tmp/deps -r requirements.txt && export PYTHONPATH=/tmp/deps:$PYTHONPATH && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache"
+          /bin/bash -c "cd /workspace  && \
+          echo '--- Force uninstalling pre-installed package ---' && \
+          pip uninstall -y gpu4pyscf-cuda12x || true && \
+          echo '--- Checking setup.py content ---' && \
+          cat setup.py | grep libxc && \
+          echo '--- Checking build.sh content ---' && \
+          cat build.sh && \
+          echo '--- Running original script ---' && \
+          pip3 install --no-cache-dir --target=/tmp/deps -r requirements.txt && export PYTHONPATH=/tmp/deps:$PYTHONPATH && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache"

From 9f20c34f018bca8f4323171bf6b2743ec3f9ab41 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Thu, 9 Oct 2025 16:00:12 +0800
Subject: [PATCH 07/13] add sudo uninstall gpu4pyscf

---
 .github/workflows/unittest.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 130aa5fbd..301de008e 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -34,7 +34,7 @@ jobs:
           -v $GITHUB_WORKSPACE:/workspace pyscf/gpu4pyscf-devel:latest \
           /bin/bash -c "cd /workspace && \
           echo '--- Force uninstalling pre-installed package ---' && \
-          pip uninstall -y gpu4pyscf-cuda12x || true && \
+          sudo pip3 uninstall -y gpu4pyscf-cuda12x || true && \
           echo '--- Checking setup.py content ---' && \
           cat setup.py | grep libxc && \
           echo '--- Checking build.sh content ---' && \
@@ -62,7 +62,7 @@ jobs:
           -v $GITHUB_WORKSPACE:/workspace pyscf/gpu4pyscf-devel:latest \
           /bin/bash -c "cd /workspace  && \
           echo '--- Force uninstalling pre-installed package ---' && \
-          pip uninstall -y gpu4pyscf-cuda12x || true && \
+          sudo pip3 uninstall -y gpu4pyscf-cuda12x || true && \
           echo '--- Checking setup.py content ---' && \
           cat setup.py | grep libxc && \
           echo '--- Checking build.sh content ---' && \

From 070eef45a319d1df0bbb8cf00c4b466e2cc6eb4f Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Thu, 9 Oct 2025 16:05:56 +0800
Subject: [PATCH 08/13] fix some typo

---
 .github/workflows/unittest.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 301de008e..b15c42d17 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -27,14 +27,13 @@ jobs:
       run: |
         docker run --gpus all \
           --rm \
-          -u "$(id -u):$(id -g)" \
           -e CUPY_CACHE_DIR=/workspace/.cupy_cache \
           -e HTTP_PROXY=$HTTP_PROXY \
           -e HTTPS_PROXY=$HTTPS_PROXY \
           -v $GITHUB_WORKSPACE:/workspace pyscf/gpu4pyscf-devel:latest \
           /bin/bash -c "cd /workspace && \
           echo '--- Force uninstalling pre-installed package ---' && \
-          sudo pip3 uninstall -y gpu4pyscf-cuda12x || true && \
+          pip3 uninstall -y gpu4pyscf-cuda12x || true && \
           echo '--- Checking setup.py content ---' && \
           cat setup.py | grep libxc && \
           echo '--- Checking build.sh content ---' && \
@@ -55,14 +54,13 @@ jobs:
       run: |
         docker run --gpus all \
           --rm \
-          -u "$(id -u):$(id -g)" \
           -e CUPY_CACHE_DIR=/workspace/.cupy_cache \
           -e HTTP_PROXY=$HTTP_PROXY \
           -e HTTPS_PROXY=$HTTPS_PROXY \
           -v $GITHUB_WORKSPACE:/workspace pyscf/gpu4pyscf-devel:latest \
           /bin/bash -c "cd /workspace  && \
           echo '--- Force uninstalling pre-installed package ---' && \
-          sudo pip3 uninstall -y gpu4pyscf-cuda12x || true && \
+          pip3 uninstall -y gpu4pyscf-cuda12x || true && \
           echo '--- Checking setup.py content ---' && \
           cat setup.py | grep libxc && \
           echo '--- Checking build.sh content ---' && \

From 372815cea61df8356873874a3bb31b4362827231 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Fri, 10 Oct 2025 09:49:42 +0800
Subject: [PATCH 09/13] print more info

---
 gpu4pyscf/dft/tests/test_libxc.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/gpu4pyscf/dft/tests/test_libxc.py b/gpu4pyscf/dft/tests/test_libxc.py
index d242c965b..63f25a10d 100644
--- a/gpu4pyscf/dft/tests/test_libxc.py
+++ b/gpu4pyscf/dft/tests/test_libxc.py
@@ -71,10 +71,20 @@ def _check_xc(self, xc, spin=0, deriv=2, fxc_tol=1e-10, kxc_tol=1e-10):
 
         print(f"{xc} {spin} exc", _diff(exc_gpu[:,0].get(), exc_cpu).max())
         print(f"{xc} {spin} vxc", _diff(vxc_gpu.get(), vxc_cpu).max())
+        print(f"{xc} {spin} exc gpu", exc_gpu[:,0].get())
+        print(f"{xc} {spin} exc cpu", exc_cpu)
+        print(f"{xc} {spin} vxc gpu", vxc_gpu.get())
+        print(f"{xc} {spin} vxc cpu", vxc_cpu)
+    
+
         if fxc_gpu is not None:
             print(f"{xc} {spin} fxc", _diff(fxc_gpu.get(), fxc_cpu).max())
+            print(f"{xc} {spin} fxc gpu", fxc_gpu.get())
+            print(f"{xc} {spin} fxc cpu", fxc_cpu)
         if kxc_gpu is not None:
             print(f"{xc} {spin} kxc", _diff(kxc_gpu.get(), kxc_cpu).max())
+            print(f"{xc} {spin} kxc gpu", kxc_gpu.get())
+            print(f"{xc} {spin} kxc cpu", kxc_cpu)
 
         assert _diff(exc_gpu[:,0].get(), exc_cpu).max() < 1e-10
         assert _diff(vxc_gpu.get(), vxc_cpu).max() < 1e-10

From 9d2678198a73eaf962dacdc7982c84bb5142c277 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Mon, 20 Oct 2025 10:22:31 +0800
Subject: [PATCH 10/13] change some thresholds

---
 gpu4pyscf/dft/tests/test_libxc.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/gpu4pyscf/dft/tests/test_libxc.py b/gpu4pyscf/dft/tests/test_libxc.py
index 63f25a10d..91a244d47 100644
--- a/gpu4pyscf/dft/tests/test_libxc.py
+++ b/gpu4pyscf/dft/tests/test_libxc.py
@@ -71,20 +71,10 @@ def _check_xc(self, xc, spin=0, deriv=2, fxc_tol=1e-10, kxc_tol=1e-10):
 
         print(f"{xc} {spin} exc", _diff(exc_gpu[:,0].get(), exc_cpu).max())
         print(f"{xc} {spin} vxc", _diff(vxc_gpu.get(), vxc_cpu).max())
-        print(f"{xc} {spin} exc gpu", exc_gpu[:,0].get())
-        print(f"{xc} {spin} exc cpu", exc_cpu)
-        print(f"{xc} {spin} vxc gpu", vxc_gpu.get())
-        print(f"{xc} {spin} vxc cpu", vxc_cpu)
-    
-
         if fxc_gpu is not None:
             print(f"{xc} {spin} fxc", _diff(fxc_gpu.get(), fxc_cpu).max())
-            print(f"{xc} {spin} fxc gpu", fxc_gpu.get())
-            print(f"{xc} {spin} fxc cpu", fxc_cpu)
         if kxc_gpu is not None:
             print(f"{xc} {spin} kxc", _diff(kxc_gpu.get(), kxc_cpu).max())
-            print(f"{xc} {spin} kxc gpu", kxc_gpu.get())
-            print(f"{xc} {spin} kxc cpu", kxc_cpu)
 
         assert _diff(exc_gpu[:,0].get(), exc_cpu).max() < 1e-10
         assert _diff(vxc_gpu.get(), vxc_cpu).max() < 1e-10
@@ -98,8 +88,8 @@ def test_LDA(self):
 
     def test_GGA(self):
         self._check_xc('HYB_GGA_XC_B3LYP', deriv=3, kxc_tol=1e-9)
-        self._check_xc('GGA_X_B88', fxc_tol=1e-10, deriv=3, kxc_tol=1e-9)
-        self._check_xc('GGA_C_PBE', fxc_tol=1e-4, deriv=3, kxc_tol=1e2)
+        self._check_xc('GGA_X_B88', fxc_tol=1e-10, deriv=3, kxc_tol=1e-8)
+        self._check_xc('GGA_C_PBE', fxc_tol=1e-4, deriv=3, kxc_tol=3e2)
 
     def test_mGGA(self):
         self._check_xc('MGGA_C_M06', fxc_tol=1e-4, deriv=3, kxc_tol=1e-2)
@@ -118,4 +108,4 @@ def test_u_mGGA(self):
 
 if __name__ == "__main__":
     print("Full Tests for xc fun")
-    unittest.main()
+    unittest.main()
\ No newline at end of file

From 0bf5e33616cbbc13cecbe6235775c1f0c080279e Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Tue, 21 Oct 2025 10:00:16 +0800
Subject: [PATCH 11/13] use gpu to calculate 3-rd functionals

---
 gpu4pyscf/grad/tdrks.py | 36 +++++++++++++++++++-----------------
 gpu4pyscf/grad/tduks.py | 21 +++++++++++----------
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/gpu4pyscf/grad/tdrks.py b/gpu4pyscf/grad/tdrks.py
index 2d5265c5c..7971b376c 100644
--- a/gpu4pyscf/grad/tdrks.py
+++ b/gpu4pyscf/grad/tdrks.py
@@ -361,16 +361,17 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None,
             mo_coeff_mask = mo_coeff[mask, :]
             rho = ni.eval_rho2(_sorted_mol, ao0, mo_coeff_mask, mo_occ, mask, xctype, with_lapl=False)
             # quick fix
-            if deriv > 2:
-                ni_cpu = numint_cpu()
-                # TODO: If the libxc is stablized, this should be gpulized
+            # if deriv > 2:
+            #     ni_cpu = numint_cpu()
+            #     # TODO: If the libxc is stablized, this should be gpulized
+            #     # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            #     vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
+            #     if isinstance(vxc,np.ndarray): vxc = cp.asarray(vxc)
+            #     if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
+            #     if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
+            # else:
                 # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
-                vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
-                if isinstance(vxc,np.ndarray): vxc = cp.asarray(vxc)
-                if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
-                if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
-            else:
-                vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
             dmvo_mask = dmvo[mask[:, None], mask]
             rho1 = (
                 ni.eval_rho(_sorted_mol, ao0, dmvo_mask, mask, xctype, hermi=1, with_lapl=False) * 2
@@ -423,14 +424,15 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None,
             rho *= 0.5
             rho = cp.repeat(rho[cp.newaxis], 2, axis=0)
             # quick fix
-            if deriv > 2:
-                ni_cpu = numint_cpu()
-                vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
-                if isinstance(vxc,np.ndarray): vxc = cp.asarray(vxc)
-                if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
-                if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
-            else:
-                vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            # if deriv > 2:
+            #     ni_cpu = numint_cpu()
+            #     vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
+            #     if isinstance(vxc,np.ndarray): vxc = cp.asarray(vxc)
+            #     if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
+            #     if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
+            # else:
+            #     vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
             # fxc_t couples triplet excitation amplitudes
             # 1/2 int (tia - tIA) fxc (tjb - tJB) = tia fxc_t tjb
             fxc_t = fxc[:, :, 0] - fxc[:, :, 1]
diff --git a/gpu4pyscf/grad/tduks.py b/gpu4pyscf/grad/tduks.py
index 1e74cd7d7..009c3e2b9 100644
--- a/gpu4pyscf/grad/tduks.py
+++ b/gpu4pyscf/grad/tduks.py
@@ -441,16 +441,17 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, with_k
         rho = cp.asarray((
             ni.eval_rho2(_sorted_mol, ao0, mo_coeff_mask_a, mo_occ[0], mask, xctype,with_lapl=False),
             ni.eval_rho2(_sorted_mol, ao0, mo_coeff_mask_b, mo_occ[1], mask, xctype, with_lapl=False)))
-        if deriv > 2:
-            ni_cpu = numint_cpu()
-            # TODO: If the libxc is stablized, this should be gpulized
-            # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
-            vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
-            if isinstance(vxc, np.ndarray): vxc = cp.asarray(vxc)
-            if isinstance(fxc, np.ndarray): fxc = cp.asarray(fxc)
-            if isinstance(kxc, np.ndarray): kxc = cp.asarray(kxc)
-        else:
-            vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+        # if deriv > 2:
+        #     ni_cpu = numint_cpu()
+        #     # TODO: If the libxc is stablized, this should be gpulized
+        #     # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+        #     vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
+        #     if isinstance(vxc, np.ndarray): vxc = cp.asarray(vxc)
+        #     if isinstance(fxc, np.ndarray): fxc = cp.asarray(fxc)
+        #     if isinstance(kxc, np.ndarray): kxc = cp.asarray(kxc)
+        # else:
+        #     vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+        vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
         dmvo_mask_a = dmvo[0, mask[:, None], mask]
         dmvo_mask_b = dmvo[1, mask[:, None], mask]
         rho1 = cp.asarray((

From 3ea33550d8fed71651af8937f928481652f9e898 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Tue, 21 Oct 2025 10:11:40 +0800
Subject: [PATCH 12/13]  remove some comments

---
 gpu4pyscf/grad/tdrks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gpu4pyscf/grad/tdrks.py b/gpu4pyscf/grad/tdrks.py
index 7971b376c..7d23319f8 100644
--- a/gpu4pyscf/grad/tdrks.py
+++ b/gpu4pyscf/grad/tdrks.py
@@ -370,7 +370,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None,
             #     if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
             #     if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
             # else:
-                # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            #   # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
             vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
             dmvo_mask = dmvo[mask[:, None], mask]
             rho1 = (

From 4267ac3b4c2a92ae5647c93a84054c9a816b2802 Mon Sep 17 00:00:00 2001
From: "puzhichen.996" <puzhichen.996@bytedance.com>
Date: Fri, 7 Nov 2025 10:21:04 +0800
Subject: [PATCH 13/13] add environment varible to switch functional
 calculations on CPU or GPU

---
 gpu4pyscf/dft/tests/test_libxc.py | 28 +++++++++++++++++++++++-----
 gpu4pyscf/grad/tdrks.py           | 25 ++++++++++++++-----------
 gpu4pyscf/grad/tduks.py           | 26 +++++++++++++++-----------
 requirements.txt                  |  2 +-
 setup.py                          |  2 +-
 5 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/gpu4pyscf/dft/tests/test_libxc.py b/gpu4pyscf/dft/tests/test_libxc.py
index 91a244d47..9edc9f1cc 100644
--- a/gpu4pyscf/dft/tests/test_libxc.py
+++ b/gpu4pyscf/dft/tests/test_libxc.py
@@ -20,6 +20,7 @@
 from gpu4pyscf.dft.numint import NumInt as numint_gpu
 from pyscf.dft.numint import NumInt as numint_cpu
 import cupy
+import os
 
 def setUpModule():
     global mol, dm1, dm0
@@ -84,15 +85,32 @@ def _check_xc(self, xc, spin=0, deriv=2, fxc_tol=1e-10, kxc_tol=1e-10):
             assert _diff(kxc_gpu.get(), kxc_cpu).max() < kxc_tol
 
     def test_LDA(self):
-        self._check_xc('LDA_C_VWN', deriv=3)
+        whether_use_gpu = os.environ.get('LIBXC_ON_GPU', '0') == '1'
+        if whether_use_gpu:
+            deriv = 3
+            print("test LDA with deriv 3")
+        else:
+            deriv = 2
+            print("test LDA with deriv 2")
+        self._check_xc('LDA_C_VWN', deriv=deriv)
 
     def test_GGA(self):
-        self._check_xc('HYB_GGA_XC_B3LYP', deriv=3, kxc_tol=1e-9)
-        self._check_xc('GGA_X_B88', fxc_tol=1e-10, deriv=3, kxc_tol=1e-8)
-        self._check_xc('GGA_C_PBE', fxc_tol=1e-4, deriv=3, kxc_tol=3e2)
+        whether_use_gpu = os.environ.get('LIBXC_ON_GPU', '0') == '1'
+        if whether_use_gpu:
+            deriv = 3
+        else:
+            deriv = 2
+        self._check_xc('HYB_GGA_XC_B3LYP', deriv=deriv, kxc_tol=1e-9)
+        self._check_xc('GGA_X_B88', fxc_tol=1e-10, deriv=deriv, kxc_tol=1e-8)
+        self._check_xc('GGA_C_PBE', fxc_tol=1e-4, deriv=deriv, kxc_tol=3e2)
 
     def test_mGGA(self):
-        self._check_xc('MGGA_C_M06', fxc_tol=1e-4, deriv=3, kxc_tol=1e-2)
+        whether_use_gpu = os.environ.get('LIBXC_ON_GPU', '0') == '1'
+        if whether_use_gpu:
+            deriv = 3
+        else:
+            deriv = 2
+        self._check_xc('MGGA_C_M06', fxc_tol=1e-4, deriv=deriv, kxc_tol=1e-2)
 
     def test_u_LDA(self):
         self._check_xc('LDA_C_VWN', spin=1)
diff --git a/gpu4pyscf/grad/tdrks.py b/gpu4pyscf/grad/tdrks.py
index 7d23319f8..5ea9ba73f 100644
--- a/gpu4pyscf/grad/tdrks.py
+++ b/gpu4pyscf/grad/tdrks.py
@@ -27,6 +27,7 @@
 from gpu4pyscf.grad import rks as rks_grad
 from gpu4pyscf.grad import tdrhf
 from gpu4pyscf import tdscf
+import os
 
 
 #
@@ -361,17 +362,19 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None,
             mo_coeff_mask = mo_coeff[mask, :]
             rho = ni.eval_rho2(_sorted_mol, ao0, mo_coeff_mask, mo_occ, mask, xctype, with_lapl=False)
             # quick fix
-            # if deriv > 2:
-            #     ni_cpu = numint_cpu()
-            #     # TODO: If the libxc is stablized, this should be gpulized
-            #     # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
-            #     vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
-            #     if isinstance(vxc,np.ndarray): vxc = cp.asarray(vxc)
-            #     if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
-            #     if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
-            # else:
-            #   # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
-            vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            if deriv > 2:
+                whether_use_gpu = os.environ.get('LIBXC_ON_GPU', '0') == '1'
+                if not whether_use_gpu:
+                    ni_cpu = numint_cpu()
+                    # TODO: If the libxc is stablized, this should be gpulized
+                    vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
+                    if isinstance(vxc,np.ndarray): vxc = cp.asarray(vxc)
+                    if isinstance(fxc,np.ndarray): fxc = cp.asarray(fxc)
+                    if isinstance(kxc,np.ndarray): kxc = cp.asarray(kxc)
+                else:
+                    vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+            else:
+                vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
             dmvo_mask = dmvo[mask[:, None], mask]
             rho1 = (
                 ni.eval_rho(_sorted_mol, ao0, dmvo_mask, mask, xctype, hermi=1, with_lapl=False) * 2
diff --git a/gpu4pyscf/grad/tduks.py b/gpu4pyscf/grad/tduks.py
index 009c3e2b9..00ce46da6 100644
--- a/gpu4pyscf/grad/tduks.py
+++ b/gpu4pyscf/grad/tduks.py
@@ -26,6 +26,7 @@
 from gpu4pyscf.grad import tdrks
 from gpu4pyscf.scf import ucphf
 from gpu4pyscf import tdscf
+import os
 
 
 #
@@ -441,17 +442,20 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, with_k
         rho = cp.asarray((
             ni.eval_rho2(_sorted_mol, ao0, mo_coeff_mask_a, mo_occ[0], mask, xctype,with_lapl=False),
             ni.eval_rho2(_sorted_mol, ao0, mo_coeff_mask_b, mo_occ[1], mask, xctype, with_lapl=False)))
-        # if deriv > 2:
-        #     ni_cpu = numint_cpu()
-        #     # TODO: If the libxc is stablized, this should be gpulized
-        #     # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
-        #     vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
-        #     if isinstance(vxc, np.ndarray): vxc = cp.asarray(vxc)
-        #     if isinstance(fxc, np.ndarray): fxc = cp.asarray(fxc)
-        #     if isinstance(kxc, np.ndarray): kxc = cp.asarray(kxc)
-        # else:
-        #     vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
-        vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+        if deriv > 2:
+            whether_use_gpu = os.environ.get('LIBXC_ON_GPU', '0') == '1'
+            if not whether_use_gpu:
+                ni_cpu = numint_cpu()
+                # TODO: If the libxc is stablized, this should be gpulized
+                # vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+                vxc, fxc, kxc = ni_cpu.eval_xc_eff(xc_code, rho.get(), deriv, xctype=xctype)[1:]
+                if isinstance(vxc, np.ndarray): vxc = cp.asarray(vxc)
+                if isinstance(fxc, np.ndarray): fxc = cp.asarray(fxc)
+                if isinstance(kxc, np.ndarray): kxc = cp.asarray(kxc)
+            else:
+                vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
+        else:
+            vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv, xctype=xctype)[1:]
         dmvo_mask_a = dmvo[0, mask[:, None], mask]
         dmvo_mask_b = dmvo[1, mask[:, None], mask]
         rho1 = cp.asarray((
diff --git a/requirements.txt b/requirements.txt
index a6fc7223e..70660747d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 cutensor-cu12==2.2.0
-gpu4pyscf-libxc-cuda12x==0.7.0
+gpu4pyscf-libxc-cuda12x==0.5.0
 cupy-cuda12x==13.4.1
 pyscf==2.8.0
 basis-set-exchange==0.11
diff --git a/setup.py b/setup.py
index ca06bade9..111e4f3c2 100755
--- a/setup.py
+++ b/setup.py
@@ -138,6 +138,6 @@ def initialize_with_default_plat_name(self):
         'pyscf-dispersion',
         f'cupy-cuda{CUDA_VERSION}>=13.0,!=13.4.0', # Due to expm in cupyx.scipy.linalg and cutensor 2.0
         'geometric',
-        f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.7.0',
+        f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.5',
     ]
 )