From 186f681f2f841255816577aa6a2b481c6e6be6d1 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 11 Sep 2025 14:12:21 +0800 Subject: [PATCH 01/32] 1. test the uhf 2. add the get_ab for sf-uhf --- gpu4pyscf/tdscf/tests/test_sftddft.py | 24 +++ gpu4pyscf/tdscf/uhf.py | 204 +++++++++++++++++++++++++- 2 files changed, 227 insertions(+), 1 deletion(-) diff --git a/gpu4pyscf/tdscf/tests/test_sftddft.py b/gpu4pyscf/tdscf/tests/test_sftddft.py index be53d41c3..439ca5057 100644 --- a/gpu4pyscf/tdscf/tests/test_sftddft.py +++ b/gpu4pyscf/tdscf/tests/test_sftddft.py @@ -22,6 +22,20 @@ except ImportError: mcfun = None + +def diagonalize_tda(a, nroots=5): + nocc, nvir = a.shape[:2] + nov = nocc * nvir + a = a.reshape(nov, nov) + e, xy = np.linalg.eig(np.asarray(a)) + sorted_indices = np.argsort(e) + + e_sorted = e[sorted_indices] + xy_sorted = xy[:, sorted_indices] + + return e_sorted[:nroots], xy_sorted[:, :nroots] + + class KnownValues(unittest.TestCase): @classmethod def setUpClass(cls): @@ -48,10 +62,20 @@ def test_tda(self): ref = [ 0.46644071, 0.55755649, 1.05310518] td = mf.SFTDA().run(extype=0, conv_tol=1e-7) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) + a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + print(td.e) + print(e) ref = [-0.21574567, 0.00270390, 0.03143914] td = mf.SFTDA().run(extype=1, conv_tol=1e-7) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) + a, b = td.get_ab() + e = diagonalize_tda(a[1], nroots=3)[0] + print(td.e) + print(e) + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) @unittest.skipIf(mcfun is None, 'MCfun not available') def test_mcol_b3lyp_tda(self): diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index 074abe08c..fe3e64bb1 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -22,7 +22,7 @@ from gpu4pyscf import scf from gpu4pyscf.lib import logger from gpu4pyscf.lib.cupy_helper import contract, tag_array -from gpu4pyscf.tdscf._uhf_resp_sf import gen_uhf_response_sf +from gpu4pyscf.tdscf._uhf_resp_sf import gen_uhf_response_sf, cache_xc_kernel_sf from gpu4pyscf.gto.int3c1e import int1e_grids from gpu4pyscf.tdscf import rhf as tdhf_gpu from gpu4pyscf.dft import KohnShamDFT @@ -402,6 +402,200 @@ def add_hf_(a, b, hyb=1): return (a_aa.get(), a_ab.get(), a_bb.get()), (b_aa.get(), b_ab.get(), b_bb.get()) + +def get_ab_sf(mf, mo_energy=None, mo_coeff=None, mo_occ=None, collinear_samples=200): + r''' + From pyscf-forge + A and B matrices for TDDFT response function. + + A[i,a,j,b] = \delta_{ab}\delta_{ij}(E_a - E_i) + (ia||bj) + B[i,a,j,b] = (ia||jb) + + Spin symmetry is not considered in the returned A, B lists. + List A has two items: (A_baba, A_abab). + List B has two items: (B_baab, B_abba). + ''' + if mo_energy is None: mo_energy = mf.mo_energy + if mo_coeff is None: mo_coeff = mf.mo_coeff + if mo_occ is None: mo_occ = mf.mo_occ + if not isinstance(mo_coeff, cp.ndarray): + mo_coeff = cp.asarray(mo_coeff) + if not isinstance(mo_energy, cp.ndarray): + mo_energy = cp.asarray(mo_energy) + if not isinstance(mo_occ, cp.ndarray): + mo_occ = cp.asarray(mo_occ) + + mol = mf.mol + nao = mol.nao_nr() + occidx_a = cp.where(mo_occ[0]==1)[0] + viridx_a = cp.where(mo_occ[0]==0)[0] + occidx_b = cp.where(mo_occ[1]==1)[0] + viridx_b = cp.where(mo_occ[1]==0)[0] + orbo_a = mo_coeff[0][:,occidx_a] + orbv_a = mo_coeff[0][:,viridx_a] + orbo_b = mo_coeff[1][:,occidx_b] + orbv_b = mo_coeff[1][:,viridx_b] + nocc_a = orbo_a.shape[1] + nvir_a = orbv_a.shape[1] + nocc_b = orbo_b.shape[1] + nvir_b = orbv_b.shape[1] + + e_ia_b2a = (mo_energy[0][viridx_a,None] - mo_energy[1][occidx_b]).T + e_ia_a2b = (mo_energy[1][viridx_b,None] - mo_energy[0][occidx_a]).T + + a_b2a = cp.diag(e_ia_b2a.ravel()).reshape(nocc_b,nvir_a,nocc_b,nvir_a) + a_a2b = cp.diag(e_ia_a2b.ravel()).reshape(nocc_a,nvir_b,nocc_a,nvir_b) + b_b2a = cp.zeros((nocc_b,nvir_a,nocc_a,nvir_b)) + b_a2b = cp.zeros((nocc_a,nvir_b,nocc_b,nvir_a)) + a = (a_b2a, a_a2b) + b = (b_b2a, b_a2b) + + def add_hf_(a, b, hyb=1): + # In spin flip TDA/ TDDFT, hartree potential is zero. + # A : iabj ---> ijba; B : iajb ---> ibja + eri_a_b2a = ao2mo.general(mol, [orbo_b.get() ,orbo_b.get() ,orbv_a.get() ,orbv_a.get()], compact=False) + eri_a_a2b = ao2mo.general(mol, [orbo_a.get() ,orbo_a.get() ,orbv_b.get() ,orbv_b.get()], compact=False) + eri_b_b2a = ao2mo.general(mol, [orbo_b.get() ,orbv_b.get() ,orbo_a.get() ,orbv_a.get()], compact=False) + eri_b_a2b = ao2mo.general(mol, [orbo_a.get() ,orbv_a.get() ,orbo_b.get() ,orbv_b.get()], compact=False) + + eri_a_b2a = eri_a_b2a.reshape(nocc_b,nocc_b,nvir_a,nvir_a) + eri_a_a2b = eri_a_a2b.reshape(nocc_a,nocc_a,nvir_b,nvir_b) + eri_b_b2a = eri_b_b2a.reshape(nocc_b,nvir_b,nocc_a,nvir_a) + eri_b_a2b = eri_b_a2b.reshape(nocc_a,nvir_a,nocc_b,nvir_b) + + a_b2a, a_a2b = a + b_b2a, b_a2b = b + + a_b2a-= cp.einsum('ijba->iajb', eri_a_b2a) * hyb + a_a2b-= cp.einsum('ijba->iajb', eri_a_a2b) * hyb + b_b2a-= cp.einsum('ibja->iajb', eri_b_b2a) * hyb + b_a2b-= cp.einsum('ibja->iajb', eri_b_a2b) * hyb + + if isinstance(mf, scf.hf.KohnShamDFT): + from pyscf.dft import xc_deriv + from pyscf.dft import numint2c + ni0 = mf._numint + ni = numint2c.NumInt2C() + ni.collinear = 'mcol' + ni.collinear_samples = collinear_samples + ni.libxc.test_deriv_order(mf.xc, 2, raise_error=True) + if mf.nlc or ni.libxc.is_nlc(mf.xc): + logger.warn(mf, 'NLC functional found in DFT object. Its second ' + 'deriviative is not available. Its contribution is ' + 'not included in the response function.') + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) + + add_hf_(a, b, hyb) + + xctype = ni._xc_type(mf.xc) + mem_now = lib.current_memory()[0] + max_memory = max(2000, mf.max_memory*.8-mem_now) + + # it should be optimized, which is the disadvantage of mc approach. + fxc = cache_xc_kernel_sf(ni, mol, mf.grids, mf.xc, mo_coeff, mo_occ,deriv=2,spin=1)[2] + p0,p1=0,0 # the two parameters are used for counts the batch of grids. + + if xctype == 'LDA': + ao_deriv = 0 + for ao, mask, weight, coords \ + in ni0.block_loop(mol, mf.grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + wfxc= fxc[0,0][...,p0:p1] * weight + + rho_o_a = lib.einsum('rp,pi->ri', ao, orbo_a) + rho_v_a = lib.einsum('rp,pi->ri', ao, orbv_a) + rho_o_b = lib.einsum('rp,pi->ri', ao, orbo_b) + rho_v_b = lib.einsum('rp,pi->ri', ao, orbv_b) + rho_ov_b2a = cp.einsum('ri,ra->ria', rho_o_b, rho_v_a) + rho_ov_a2b = cp.einsum('ri,ra->ria', rho_o_a, rho_v_b) + + w_ov = cp.einsum('ria,r->ria', rho_ov_b2a, wfxc*2.0) + iajb = lib.einsum('ria,rjb->iajb', rho_ov_b2a, w_ov) + a_b2a += iajb + iajb = lib.einsum('ria,rjb->iajb', rho_ov_a2b, w_ov) + b_a2b += iajb + + w_ov = cp.einsum('ria,r->ria', rho_ov_a2b, wfxc*2.0) + iajb = lib.einsum('ria,rjb->iajb', rho_ov_a2b, w_ov) + a_a2b += iajb + iajb = lib.einsum('ria,rjb->iajb', rho_ov_b2a, w_ov) + b_b2a += iajb + + elif xctype == 'GGA': + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni.block_loop(mol, mf.grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + wfxc= fxc[...,p0:p1] * weight + + rho_o_a = lib.einsum('xrp,pi->xri', ao, orbo_a) + rho_v_a = lib.einsum('xrp,pi->xri', ao, orbv_a) + rho_o_b = lib.einsum('xrp,pi->xri', ao, orbo_b) + rho_v_b = lib.einsum('xrp,pi->xri', ao, orbv_b) + rho_ov_b2a = cp.einsum('xri,ra->xria', rho_o_b, rho_v_a[0]) + rho_ov_a2b = cp.einsum('xri,ra->xria', rho_o_a, rho_v_b[0]) + rho_ov_b2a[1:4] += cp.einsum('ri,xra->xria', rho_o_b[0], rho_v_a[1:4]) + rho_ov_a2b[1:4] += cp.einsum('ri,xra->xria', rho_o_a[0], rho_v_b[1:4]) + + w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) + a_b2a += iajb + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) + b_b2a += iajb + + w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) + a_a2b += iajb + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) + b_a2b += iajb + + elif xctype == 'HF': + pass + + elif xctype == 'NLC': + raise NotImplementedError('NLC') + + elif xctype == 'MGGA': + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni.block_loop(mol, mf.grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + wfxc = fxc[...,p0:p1] * weight + + rho_oa = lib.einsum('xrp,pi->xri', ao, orbo_a) + rho_ob = lib.einsum('xrp,pi->xri', ao, orbo_b) + rho_va = lib.einsum('xrp,pi->xri', ao, orbv_a) + rho_vb = lib.einsum('xrp,pi->xri', ao, orbv_b) + rho_ov_b2a = cp.einsum('xri,ra->xria', rho_ob, rho_va[0]) + rho_ov_a2b = cp.einsum('xri,ra->xria', rho_oa, rho_vb[0]) + rho_ov_b2a[1:4] += cp.einsum('ri,xra->xria', rho_ob[0], rho_va[1:4]) + rho_ov_a2b[1:4] += cp.einsum('ri,xra->xria', rho_oa[0], rho_vb[1:4]) + tau_ov_b2a = cp.einsum('xri,xra->ria', rho_ob[1:4], rho_va[1:4]) * .5 + tau_ov_a2b = cp.einsum('xri,xra->ria', rho_oa[1:4], rho_vb[1:4]) * .5 + rho_ov_b2a = cp.vstack([rho_ov_b2a, tau_ov_b2a[cp.newaxis]]) + rho_ov_a2b = cp.vstack([rho_ov_a2b, tau_ov_a2b[cp.newaxis]]) + + w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) + a_b2a += iajb + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) + b_b2a += iajb + + w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) + a_a2b += iajb + iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) + b_a2b += iajb + else: + add_hf_(a, b) + a = (a[0].get(), a[1].get()) # flip-up flip-down + b = (b[0].get(), b[1].get()) + return a, b + + REAL_EIG_THRESHOLD = tdhf_cpu.REAL_EIG_THRESHOLD def gen_tda_operation(td, mf, fock_ao=None, wfnsym=None): @@ -781,6 +975,14 @@ def all_eigs(w, v, nroots, envs): self._finalize() return self.e, self.xy + def get_ab(self, mf=None, mo_energy=None, mo_coeff=None, mo_occ=None, collinear_samples=None): + if mf is None: mf = self._scf + if mo_energy is None: mo_energy = mf.mo_energy + if mo_coeff is None: mo_coeff = mf.mo_coeff + if mo_occ is None: mo_occ = mf.mo_occ + if collinear_samples is None: collinear_samples = self.collinear_samples + return get_ab_sf(mf, mo_energy=mo_energy, mo_coeff=mo_coeff, mo_occ=mo_occ, collinear_samples=collinear_samples) + def gen_tdhf_operation(td, mf, fock_ao=None, singlet=True, wfnsym=None): '''Generate function to compute From 14ff50fbd2cb6117fe185ed74402b48f41fa2047 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 12 Sep 2025 11:06:05 +0800 Subject: [PATCH 02/32] Finish the get_ab and test the collinear and multi-collinear sf-tddft --- gpu4pyscf/tdscf/tests/test_sftddft.py | 95 ++++++++- gpu4pyscf/tdscf/tests/test_sftddft_col.py | 138 ++++++++++++ gpu4pyscf/tdscf/uhf.py | 244 ++++++++++++---------- 3 files changed, 358 insertions(+), 119 deletions(-) create mode 100644 gpu4pyscf/tdscf/tests/test_sftddft_col.py diff --git a/gpu4pyscf/tdscf/tests/test_sftddft.py b/gpu4pyscf/tdscf/tests/test_sftddft.py index 439ca5057..546551129 100644 --- a/gpu4pyscf/tdscf/tests/test_sftddft.py +++ b/gpu4pyscf/tdscf/tests/test_sftddft.py @@ -50,12 +50,15 @@ def setUpClass(cls): mol.basis = '631g' cls.mol = mol.build() cls.mf = mol.UHF().to_gpu().run() + cls.mflda = mol.UKS(xc='svwn').to_gpu().run() + cls.mfb3lyp = mol.UKS(xc='b3lyp').to_gpu().run() + cls.mftpss = mol.UKS(xc='tpss').to_gpu().run() @classmethod def tearDownClass(cls): cls.mol.stdout.close() - def test_tda(self): + def test_hf_tda(self): mf = self.mf # sftddft not available in pyscf main branch. References are created # using the sftda module from pyscf-forge @@ -65,29 +68,101 @@ def test_tda(self): a, b = td.get_ab() e = diagonalize_tda(a[0], nroots=3)[0] self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) - print(td.e) - print(e) ref = [-0.21574567, 0.00270390, 0.03143914] td = mf.SFTDA().run(extype=1, conv_tol=1e-7) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) + e = diagonalize_tda(a[1], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + + @unittest.skipIf(mcfun is None, 'MCfun not available') + def test_mcol_svwn_tda(self): + mf = self.mflda + # sftddft not available in pyscf main branch. References are created + # using the sftda module from pyscf-forge + ref = [0.45022394, 0.57917576, 1.04475443] + td = mf.SFTDA() + td.collinear = 'mcol' + td.extype = 0 + td.collinear_samples=200 + td.conv_tol = 1e-5 + td.kernel() a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + self.assertAlmostEqual(abs(td.e - ref).max(), 0, 5) + + ref = [-0.32642984, 0.0003752 , 0.02156706] + td = mf.SFTDA() + td.collinear = 'mcol' + td.extype = 1 + td.collinear_samples=200 + td.conv_tol = 1e-7 + td.kernel() e = diagonalize_tda(a[1], nroots=3)[0] - print(td.e) - print(e) + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) @unittest.skipIf(mcfun is None, 'MCfun not available') def test_mcol_b3lyp_tda(self): - mf = self.mf + mf = self.mfb3lyp + # sftddft not available in pyscf main branch. References are created + # using the sftda module from pyscf-forge + ref = [0.45941163, 0.57799537, 1.06629197] + td = mf.SFTDA() + td.collinear = 'mcol' + td.extype = 0 + td.collinear_samples=200 + td.conv_tol = 1e-5 + td.kernel() + a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) + + ref = [-0.29629126, 0.00067001, 0.0195629 ] + td = mf.SFTDA() + td.collinear = 'mcol' + td.extype = 1 + td.collinear_samples=200 + td.conv_tol = 1e-7 + td.kernel() + e = diagonalize_tda(a[1], nroots=3)[0] + + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) + + @unittest.skipIf(mcfun is None, 'MCfun not available') + def test_mcol_tpss_tda(self): + mf = self.mftpss # sftddft not available in pyscf main branch. References are created # using the sftda module from pyscf-forge - ref = [ 0.45941171, 0.57799552, 1.06629265] - td = mf.SFTDA().run(collinear='mcol', extype=0, conv_tol=1e-7) + ref = [0.4498647 , 0.57071842, 1.0544106 ] + td = mf.SFTDA() + td.collinear = 'mcol' + td.extype = 0 + td.collinear_samples=200 + td.conv_tol = 1e-5 + td.kernel() + a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) - ref = [-0.29629139, 0.00067017, 0.01956306] - td = mf.SFTDA().run(collinear='mcol', extype=1, conv_tol=1e-7) + ref = [-0.28699899, 0.00063662, 0.0232923 ] + td = mf.SFTDA() + td.collinear = 'mcol' + td.extype = 1 + td.collinear_samples=200 + td.conv_tol = 1e-7 + td.kernel() + e = diagonalize_tda(a[1], nroots=3)[0] + + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) @unittest.skip('Numerical issues encountered in non-hermitian diagonalization') diff --git a/gpu4pyscf/tdscf/tests/test_sftddft_col.py b/gpu4pyscf/tdscf/tests/test_sftddft_col.py new file mode 100644 index 000000000..6dec9bc2e --- /dev/null +++ b/gpu4pyscf/tdscf/tests/test_sftddft_col.py @@ -0,0 +1,138 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import cupy as cp +from pyscf import lib, gto, scf +from gpu4pyscf import tdscf +try: + import mcfun +except ImportError: + mcfun = None + + +def diagonalize_tda(a, nroots=5): + nocc, nvir = a.shape[:2] + nov = nocc * nvir + a = a.reshape(nov, nov) + e, xy = np.linalg.eig(np.asarray(a)) + sorted_indices = np.argsort(e) + + e_sorted = e[sorted_indices] + xy_sorted = xy[:, sorted_indices] + + return e_sorted[:nroots], xy_sorted[:, :nroots] + + +class KnownValues(unittest.TestCase): + @classmethod + def setUpClass(cls): + mol = gto.Mole() + mol.verbose = 5 + mol.output = '/dev/null' + mol.atom = ''' + O 0. 0. 0. + H 0. -0.757 0.587 + H 0. 0.757 0.587''' + mol.spin = 2 + mol.basis = '631g' + cls.mol = mol.build() + cls.mflda = mol.UKS(xc='svwn').to_gpu().run() + cls.mfb3lyp = mol.UKS(xc='b3lyp').to_gpu().run() + cls.mftpss = mol.UKS(xc='tpss').to_gpu().run() + + @classmethod + def tearDownClass(cls): + cls.mol.stdout.close() + + def test_lda_tda(self): + mf = self.mflda + na, nb = mf.mol.nelec + + td = mf.SFTDA() + td.extype = 0 + td.conv_tol = 1e-7 + td.nroots = 3 + td.collinear = 'col' + td.run() + a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + assert td.e[0] - (mf.mo_energy[0][na] - mf.mo_energy[1][nb-1]) < 1e-6 + + td = mf.SFTDA() + td.extype = 1 + td.conv_tol = 1e-7 + td.nroots = 3 + td.collinear = 'col' + td.run() + e = diagonalize_tda(a[1], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + assert td.e[0] - (mf.mo_energy[1][nb] - mf.mo_energy[0][na-1]) < 1e-6 + + def test_b3lyp_tda(self): + mf = self.mfb3lyp + na, nb = mf.mol.nelec + + td = mf.SFTDA() + td.extype = 0 + td.conv_tol = 1e-7 + td.nroots = 3 + td.collinear = 'col' + td.run() + a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + assert td.e[0] - (mf.mo_energy[0][na] - mf.mo_energy[1][nb-1]) < 1e-6 + + td = mf.SFTDA() + td.extype = 1 + td.conv_tol = 1e-7 + td.nroots = 3 + td.collinear = 'col' + td.run() + e = diagonalize_tda(a[1], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + assert td.e[0] - (mf.mo_energy[1][nb] - mf.mo_energy[0][na-1]) < 1e-6 + + def test_tpss_tda(self): + mf = self.mftpss + na, nb = mf.mol.nelec + + td = mf.SFTDA() + td.extype = 0 + td.conv_tol = 1e-7 + td.nroots = 3 + td.collinear = 'col' + td.run() + a, b = td.get_ab() + e = diagonalize_tda(a[0], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + assert td.e[0] - (mf.mo_energy[0][na] - mf.mo_energy[1][nb-1]) < 1e-6 + + td = mf.SFTDA() + td.extype = 1 + td.conv_tol = 1e-7 + td.nroots = 3 + td.collinear = 'col' + td.run() + e = diagonalize_tda(a[1], nroots=3)[0] + self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) + assert td.e[0] - (mf.mo_energy[1][nb] - mf.mo_energy[0][na-1]) < 1e-6 + + +if __name__ == "__main__": + print("Full Tests for spin-flip-TDA using collinear functional.") + unittest.main() diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index fe3e64bb1..e44b50426 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -18,6 +18,7 @@ from pyscf import lib from pyscf.tdscf import uhf as tdhf_cpu from pyscf import ao2mo +from pyscf.data import nist from gpu4pyscf.tdscf._lr_eig import eigh as lr_eigh, eig as lr_eig, real_eig from gpu4pyscf import scf from gpu4pyscf.lib import logger @@ -403,7 +404,7 @@ def add_hf_(a, b, hyb=1): return (a_aa.get(), a_ab.get(), a_bb.get()), (b_aa.get(), b_ab.get(), b_bb.get()) -def get_ab_sf(mf, mo_energy=None, mo_coeff=None, mo_occ=None, collinear_samples=200): +def get_ab_sf(mf, mo_energy=None, mo_coeff=None, mo_occ=None, collinear='col', collinear_samples=200): r''' From pyscf-forge A and B matrices for TDDFT response function. @@ -485,110 +486,121 @@ def add_hf_(a, b, hyb=1): 'not included in the response function.') omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - add_hf_(a, b, hyb) - - xctype = ni._xc_type(mf.xc) - mem_now = lib.current_memory()[0] - max_memory = max(2000, mf.max_memory*.8-mem_now) - - # it should be optimized, which is the disadvantage of mc approach. - fxc = cache_xc_kernel_sf(ni, mol, mf.grids, mf.xc, mo_coeff, mo_occ,deriv=2,spin=1)[2] - p0,p1=0,0 # the two parameters are used for counts the batch of grids. - - if xctype == 'LDA': - ao_deriv = 0 - for ao, mask, weight, coords \ - in ni0.block_loop(mol, mf.grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - wfxc= fxc[0,0][...,p0:p1] * weight - - rho_o_a = lib.einsum('rp,pi->ri', ao, orbo_a) - rho_v_a = lib.einsum('rp,pi->ri', ao, orbv_a) - rho_o_b = lib.einsum('rp,pi->ri', ao, orbo_b) - rho_v_b = lib.einsum('rp,pi->ri', ao, orbv_b) - rho_ov_b2a = cp.einsum('ri,ra->ria', rho_o_b, rho_v_a) - rho_ov_a2b = cp.einsum('ri,ra->ria', rho_o_a, rho_v_b) - - w_ov = cp.einsum('ria,r->ria', rho_ov_b2a, wfxc*2.0) - iajb = lib.einsum('ria,rjb->iajb', rho_ov_b2a, w_ov) - a_b2a += iajb - iajb = lib.einsum('ria,rjb->iajb', rho_ov_a2b, w_ov) - b_a2b += iajb - - w_ov = cp.einsum('ria,r->ria', rho_ov_a2b, wfxc*2.0) - iajb = lib.einsum('ria,rjb->iajb', rho_ov_a2b, w_ov) - a_a2b += iajb - iajb = lib.einsum('ria,rjb->iajb', rho_ov_b2a, w_ov) - b_b2a += iajb - - elif xctype == 'GGA': - ao_deriv = 1 - for ao, mask, weight, coords \ - in ni.block_loop(mol, mf.grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - wfxc= fxc[...,p0:p1] * weight - - rho_o_a = lib.einsum('xrp,pi->xri', ao, orbo_a) - rho_v_a = lib.einsum('xrp,pi->xri', ao, orbv_a) - rho_o_b = lib.einsum('xrp,pi->xri', ao, orbo_b) - rho_v_b = lib.einsum('xrp,pi->xri', ao, orbv_b) - rho_ov_b2a = cp.einsum('xri,ra->xria', rho_o_b, rho_v_a[0]) - rho_ov_a2b = cp.einsum('xri,ra->xria', rho_o_a, rho_v_b[0]) - rho_ov_b2a[1:4] += cp.einsum('ri,xra->xria', rho_o_b[0], rho_v_a[1:4]) - rho_ov_a2b[1:4] += cp.einsum('ri,xra->xria', rho_o_a[0], rho_v_b[1:4]) - - w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) - a_b2a += iajb - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) - b_b2a += iajb - - w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) - a_a2b += iajb - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) - b_a2b += iajb - - elif xctype == 'HF': - pass - - elif xctype == 'NLC': - raise NotImplementedError('NLC') - - elif xctype == 'MGGA': - ao_deriv = 1 - for ao, mask, weight, coords \ - in ni.block_loop(mol, mf.grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - wfxc = fxc[...,p0:p1] * weight - - rho_oa = lib.einsum('xrp,pi->xri', ao, orbo_a) - rho_ob = lib.einsum('xrp,pi->xri', ao, orbo_b) - rho_va = lib.einsum('xrp,pi->xri', ao, orbv_a) - rho_vb = lib.einsum('xrp,pi->xri', ao, orbv_b) - rho_ov_b2a = cp.einsum('xri,ra->xria', rho_ob, rho_va[0]) - rho_ov_a2b = cp.einsum('xri,ra->xria', rho_oa, rho_vb[0]) - rho_ov_b2a[1:4] += cp.einsum('ri,xra->xria', rho_ob[0], rho_va[1:4]) - rho_ov_a2b[1:4] += cp.einsum('ri,xra->xria', rho_oa[0], rho_vb[1:4]) - tau_ov_b2a = cp.einsum('xri,xra->ria', rho_ob[1:4], rho_va[1:4]) * .5 - tau_ov_a2b = cp.einsum('xri,xra->ria', rho_oa[1:4], rho_vb[1:4]) * .5 - rho_ov_b2a = cp.vstack([rho_ov_b2a, tau_ov_b2a[cp.newaxis]]) - rho_ov_a2b = cp.vstack([rho_ov_a2b, tau_ov_a2b[cp.newaxis]]) - - w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) - a_b2a += iajb - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) - b_b2a += iajb - - w_ov = cp.einsum('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_a2b) - a_a2b += iajb - iajb = lib.einsum('xria,xrjb->iajb', w_ov, rho_ov_b2a) - b_a2b += iajb + if collinear == 'mcol': + add_hf_(a, b, hyb) + xctype = ni._xc_type(mf.xc) + mem_now = lib.current_memory()[0] + max_memory = max(2000, mf.max_memory*.8-mem_now) + # it should be optimized, which is the disadvantage of mc approach. + fxc = cache_xc_kernel_sf(ni0, mol, mf.grids, mf.xc, mo_coeff, mo_occ, collinear_samples)[2] + p0,p1=0,0 # the two parameters are used for counts the batch of grids. + opt = getattr(ni0, 'gdftopt', None) + if opt is None: + ni0.build(mol, mf.grids.coords) + opt = ni0.gdftopt + _sorted_mol = opt._sorted_mol + orbo_a = opt.sort_orbitals(orbo_a, axis=[0]) + orbv_a = opt.sort_orbitals(orbv_a, axis=[0]) + orbo_b = opt.sort_orbitals(orbo_b, axis=[0]) + orbv_b = opt.sort_orbitals(orbv_b, axis=[0]) + if xctype == 'LDA': + ao_deriv = 0 + for ao, mask, weight, coords \ + in ni0.block_loop(_sorted_mol, mf.grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + wfxc= fxc[0,0][...,p0:p1] * weight + + rho_o_a = contract('pr,pi->ri', ao, orbo_a) + rho_v_a = contract('pr,pi->ri', ao, orbv_a) + rho_o_b = contract('pr,pi->ri', ao, orbo_b) + rho_v_b = contract('pr,pi->ri', ao, orbv_b) + rho_ov_b2a = contract('ri,ra->ria', rho_o_b, rho_v_a) + rho_ov_a2b = contract('ri,ra->ria', rho_o_a, rho_v_b) + + w_ov = contract('ria,r->ria', rho_ov_b2a, wfxc*2.0) + iajb = contract('ria,rjb->iajb', rho_ov_b2a, w_ov) + a_b2a += iajb + iajb = contract('ria,rjb->iajb', rho_ov_a2b, w_ov) + b_a2b += iajb + + w_ov = contract('ria,r->ria', rho_ov_a2b, wfxc*2.0) + iajb = contract('ria,rjb->iajb', rho_ov_a2b, w_ov) + a_a2b += iajb + iajb = contract('ria,rjb->iajb', rho_ov_b2a, w_ov) + b_b2a += iajb + + elif xctype == 'GGA': + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni0.block_loop(_sorted_mol, mf.grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + wfxc= fxc[...,p0:p1] * weight + + rho_o_a = contract('xpr,pi->xri', ao, orbo_a) + rho_v_a = contract('xpr,pi->xri', ao, orbv_a) + rho_o_b = contract('xpr,pi->xri', ao, orbo_b) + rho_v_b = contract('xpr,pi->xri', ao, orbv_b) + rho_ov_b2a = contract('xri,ra->xria', rho_o_b, rho_v_a[0]) + rho_ov_a2b = contract('xri,ra->xria', rho_o_a, rho_v_b[0]) + rho_ov_b2a[1:4] += contract('ri,xra->xria', rho_o_b[0], rho_v_a[1:4]) + rho_ov_a2b[1:4] += contract('ri,xra->xria', rho_o_a[0], rho_v_b[1:4]) + + w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) + a_b2a += iajb + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) + b_b2a += iajb + + w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) + a_a2b += iajb + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) + b_a2b += iajb + + elif xctype == 'HF': + pass + + elif xctype == 'NLC': + raise NotImplementedError('NLC') + + elif xctype == 'MGGA': + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni0.block_loop(_sorted_mol, mf.grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + wfxc = fxc[...,p0:p1] * weight + + rho_oa = contract('xpr,pi->xri', ao, orbo_a) + rho_ob = contract('xpr,pi->xri', ao, orbo_b) + rho_va = contract('xpr,pi->xri', ao, orbv_a) + rho_vb = contract('xpr,pi->xri', ao, orbv_b) + rho_ov_b2a = contract('xri,ra->xria', rho_ob, rho_va[0]) + rho_ov_a2b = contract('xri,ra->xria', rho_oa, rho_vb[0]) + rho_ov_b2a[1:4] += contract('ri,xra->xria', rho_ob[0], rho_va[1:4]) + rho_ov_a2b[1:4] += contract('ri,xra->xria', rho_oa[0], rho_vb[1:4]) + tau_ov_b2a = contract('xri,xra->ria', rho_ob[1:4], rho_va[1:4]) * .5 + tau_ov_a2b = contract('xri,xra->ria', rho_oa[1:4], rho_vb[1:4]) * .5 + rho_ov_b2a = cp.vstack([rho_ov_b2a, tau_ov_b2a[cp.newaxis]]) + rho_ov_a2b = cp.vstack([rho_ov_a2b, tau_ov_a2b[cp.newaxis]]) + + w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) + a_b2a += iajb + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) + b_b2a += iajb + + w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) + a_a2b += iajb + iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) + b_a2b += iajb + elif collinear == 'col': + add_hf_(a, b, hyb) + elif collinear == 'ncol': + raise NotImplementedError('Locally collinear approach is not implemented') else: add_hf_(a, b) a = (a[0].get(), a[1].get()) # flip-up flip-down @@ -924,6 +936,17 @@ def check_sanity(self): assert self.collinear in ('col', 'ncol', 'mcol') return self + def _finalize(self): + '''Hook for dumping results and clearing up the object.''' + if not all(self.converged): + logger.note(self, 'TD-SCF states %s not converged.', + [i for i, x in enumerate(self.converged) if not x]) + if self.extype == 0: + logger.note(self, 'Spin-flip-up Excited State energies (eV)\n%s', self.e * nist.HARTREE2EV) + elif self.extype == 1: + logger.note(self, 'Spin-flip-down Excited State energies (eV)\n%s', self.e * nist.HARTREE2EV) + return self + def kernel(self, x0=None, nstates=None): '''Spin-flip TDA diagonalization solver ''' @@ -940,9 +963,10 @@ def kernel(self, x0=None, nstates=None): mf = self._scf ni = mf._numint if not ni.libxc.is_hybrid_xc(mf.xc): - self.converged = True - self.e, xs = self._init_guess() + self.converged = [True,] + self.e, xs = self._init_guess(self._scf, self.nstates) self.xy = [(x, 0) for x in xs] + self._finalize() return self.e, self.xy x0sym = None @@ -975,13 +999,15 @@ def all_eigs(w, v, nroots, envs): self._finalize() return self.e, self.xy - def get_ab(self, mf=None, mo_energy=None, mo_coeff=None, mo_occ=None, collinear_samples=None): + def get_ab(self, mf=None, mo_energy=None, mo_coeff=None, mo_occ=None, collinear=None, collinear_samples=None): if mf is None: mf = self._scf if mo_energy is None: mo_energy = mf.mo_energy if mo_coeff is None: mo_coeff = mf.mo_coeff if mo_occ is None: mo_occ = mf.mo_occ + if collinear is None: collinear = self.collinear if collinear_samples is None: collinear_samples = self.collinear_samples - return get_ab_sf(mf, mo_energy=mo_energy, mo_coeff=mo_coeff, mo_occ=mo_occ, collinear_samples=collinear_samples) + return get_ab_sf(mf, mo_energy=mo_energy, mo_coeff=mo_coeff, mo_occ=mo_occ, + collinear=collinear, collinear_samples=collinear_samples) def gen_tdhf_operation(td, mf, fock_ao=None, singlet=True, wfnsym=None): From b7823e7807e7feae9224db86d193f2d4acd1df87 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 12 Sep 2025 11:24:20 +0800 Subject: [PATCH 03/32] move numint2c to gpu --- gpu4pyscf/dft/numint2c.py | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 gpu4pyscf/dft/numint2c.py diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py new file mode 100644 index 000000000..fea8b0be8 --- /dev/null +++ b/gpu4pyscf/dft/numint2c.py @@ -0,0 +1,55 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyscf import lib +from gpu4pyscf.dft import numint +from pyscf import __config__ + +class NumInt2C(lib.StreamObject, numint.LibXCMixin): + '''Numerical integration methods for 2-component basis (used by GKS)''' + + # collinear schemes: + # 'col' (collinear, by default) + # 'ncol' (non-collinear) + # 'mcol' (multi-collinear) + collinear = getattr(__config__, 'dft_numint_RnumInt_collinear', 'col') + spin_samples = getattr(__config__, 'dft_numint_RnumInt_spin_samples', 770) + collinear_thrd = getattr(__config__, 'dft_numint_RnumInt_collinear_thrd', 0.99) + collinear_samples = getattr(__config__, 'dft_numint_RnumInt_collinear_samples', 200) + + make_mask = staticmethod(numint.make_mask) + eval_ao = staticmethod(numint.eval_ao) + eval_rho = staticmethod(eval_rho) + + eval_rho1 = NotImplemented + eval_rho2 = NotImplemented + cache_xc_kernel = NotImplemented + cache_xc_kernel1 = NotImplemented + get_rho = NotImplemented + _gks_mcol_vxc = NotImplemented + _gks_mcol_fxc = NotImplemented + nr_vxc = NotImplemented + nr_nlc_vxc = NotImplemented + nr_fxc = NotImplemented + get_fxc = nr_gks_fxc = nr_fxc + + eval_xc_eff = NotImplemented + mcfun_eval_xc_adapter = NotImplemented + + block_loop = NotImplemented + _gen_rho_evaluator = NotImplemented + + def _to_numint1c(self): + '''Converts to the associated class to handle collinear systems''' + return self.view(numint.NumInt) From 83f84382bc6132d39228c941eb1716bcec2c21f7 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 15 Sep 2025 07:23:45 +0800 Subject: [PATCH 04/32] debugging --- gpu4pyscf/dft/numint2c.py | 55 ------------- gpu4pyscf/tdscf/_uhf_resp_sf.py | 132 ++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 61 deletions(-) delete mode 100644 gpu4pyscf/dft/numint2c.py diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py deleted file mode 100644 index fea8b0be8..000000000 --- a/gpu4pyscf/dft/numint2c.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pyscf import lib -from gpu4pyscf.dft import numint -from pyscf import __config__ - -class NumInt2C(lib.StreamObject, numint.LibXCMixin): - '''Numerical integration methods for 2-component basis (used by GKS)''' - - # collinear schemes: - # 'col' (collinear, by default) - # 'ncol' (non-collinear) - # 'mcol' (multi-collinear) - collinear = getattr(__config__, 'dft_numint_RnumInt_collinear', 'col') - spin_samples = getattr(__config__, 'dft_numint_RnumInt_spin_samples', 770) - collinear_thrd = getattr(__config__, 'dft_numint_RnumInt_collinear_thrd', 0.99) - collinear_samples = getattr(__config__, 'dft_numint_RnumInt_collinear_samples', 200) - - make_mask = staticmethod(numint.make_mask) - eval_ao = staticmethod(numint.eval_ao) - eval_rho = staticmethod(eval_rho) - - eval_rho1 = NotImplemented - eval_rho2 = NotImplemented - cache_xc_kernel = NotImplemented - cache_xc_kernel1 = NotImplemented - get_rho = NotImplemented - _gks_mcol_vxc = NotImplemented - _gks_mcol_fxc = NotImplemented - nr_vxc = NotImplemented - nr_nlc_vxc = NotImplemented - nr_fxc = NotImplemented - get_fxc = nr_gks_fxc = nr_fxc - - eval_xc_eff = NotImplemented - mcfun_eval_xc_adapter = NotImplemented - - block_loop = NotImplemented - _gen_rho_evaluator = NotImplemented - - def _to_numint1c(self): - '''Converts to the associated class to handle collinear systems''' - return self.view(numint.NumInt) diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index c8f7ff878..141d565d0 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -24,6 +24,99 @@ from gpu4pyscf.scf import hf, uhf from gpu4pyscf.dft.numint import _scale_ao, _tau_dot, eval_rho, eval_rho2 from gpu4pyscf.lib.cupy_helper import transpose_sum, add_sparse, contract +from concurrent.futures import ThreadPoolExecutor + + +MAX_GRIDS_PER_TASK = 200 + +def _prange(start, end, step): + '''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...''' + if start < end: + for i in range(start, end, step): + yield i, min(i+step, end) + + +def _make_paxis_samples(spin_samples): + '''Samples on principal axis between [0, 1]''' + rt, wt = np.polynomial.legendre.leggauss(spin_samples) + rt = cp.array(rt) + wt = cp.array(wt) + rt = rt * .5 + .5 + wt *= .5 # normalized to 1 + return rt, wt + + +def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200, workers=1): + assert deriv < 5 + if rho_tmz.dtype != cp.double: + raise RuntimeError('rho and mz must be real') + ngrids = rho_tmz.shape[-1] + grids_per_task = min(ngrids//(workers*3)+1, MAX_GRIDS_PER_TASK) + + if workers == 1: + results = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) + results.append(r) + else: + print(collinear_samples) + executor = ThreadPoolExecutor + + with executor(max_workers=workers) as ex: + futures = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + f = ex.submit(_eval_xc_sf, func, rho_tmz[...,p0:p1], deriv, collinear_samples) + futures.append(f) + results = [f.result() for f in futures] + + return [None if x[0] is None else cp.concatenate(x, axis=-1) for x in zip(*results)] + +def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): + ngrids = rho_tmz.shape[-1] + # samples on z=cos(theta) and their weights between [0, 1] + sgridz, weights = _make_paxis_samples(collinear_samples) + blksize = int(cp.ceil(1e5 / ngrids)) * 8 + + if rho_tmz.ndim == 2: + nvar = 1 + else: + nvar = rho_tmz.shape[1] + # spin-flip part + fxc_sf = 0.0 + for p0, p1 in _prange(0, weights.size, blksize): + rho = _project_spin_paxis2(rho_tmz, sgridz[p0:p1]) + fxc = func(rho, deriv)[2] + fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, p1 - p0) + fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) + + return None,None,fxc_sf + + +def _project_spin_paxis2(rho_tm, sgridz=None): + # ToDo: be written into the function _project_spin_paxis(). + # Because use mz rather than |mz| here + '''Projects spins onto the principal axis''' + rho = rho_tm[0] + mz = rho_tm[1] + + if sgridz is None: + rho_ts = cp.stack([rho, mz]) + else: + ngrids = rho.shape[-1] + nsg = sgridz.shape[0] + if rho_tm.ndim == 2: + rho_ts = cp.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:,cp.newaxis] + rho_ts[1] = mz[:,cp.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, ngrids * nsg) + else: + nvar = rho_tm.shape[1] + rho_ts = cp.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:,:,cp.newaxis] + rho_ts[1] = mz[:,:,cp.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) + return rho_ts + def gen_uhf_response_sf(mf, mo_coeff=None, mo_occ=None, hermi=0, collinear='mcol', collinear_samples=200): @@ -86,6 +179,23 @@ def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): evfk[order] = xc_deriv.ud2ts(evfk[order]) return evfk +def __mcfun_fn_eval_xc2(ni, xc_code, xctype, rho, deriv): + t, s = rho + if not isinstance(t, cp.ndarray): + t = cp.asarray(t) + if not isinstance(s, cp.ndarray): + s = cp.asarray(s) + rho = cp.stack([(t + s) * .5, (t - s) * .5]) + # if xctype == 'MGGA' and rho.shape[1] == 6: + # rho = np.asarray(rho[:,[0,1,2,3,5],:], order='C') + spin = 1 + evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype, spin=spin) + # evfk = list(evfk) + # for order in range(1, deriv+1): + # if evfk[order] is not None: + # evfk[order] = xc_deriv.ud2ts(evfk[order]) + return evfk + # Edited based on pyscf.dft.numint2c.mcfun_eval_xc_adapter def mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples): '''Wrapper to generate the eval_xc function required by mcfun @@ -97,16 +207,26 @@ def mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples): raise ImportError('This feature requires mcfun library.\n' 'Try install mcfun with `pip install mcfun`') - ni = numint2c.NumInt2C() - ni.collinear = 'mcol' - ni.collinear_samples = collinear_samples + # ni = numint2c.NumInt2C() + # ni.collinear = 'mcol' + # ni.collinear_samples = collinear_samples + # xctype = ni._xc_type(xc_code) + # fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) + # nproc = lib.num_threads() + xctype = ni._xc_type(xc_code) - fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) + fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype) nproc = lib.num_threads() + # def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): + # res = mcfun.eval_xc_eff_sf( + # fn_eval_xc, rho.get(), deriv, + # collinear_samples=collinear_samples, workers=nproc) + # return [x if x is None else cp.asarray(x) for x in res] + def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): - res = mcfun.eval_xc_eff_sf( - fn_eval_xc, rho.get(), deriv, + res = eval_xc_eff_sf( + fn_eval_xc, rho, deriv, collinear_samples=collinear_samples, workers=nproc) return [x if x is None else cp.asarray(x) for x in res] return eval_xc_eff From 9630c5b1abdbb83346d15c564a1c978eb217bc84 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 15 Sep 2025 14:40:49 +0800 Subject: [PATCH 05/32] finish excitation part, begin to write gradients --- gpu4pyscf/dft/xc_deriv.py | 23 ++++++++++++++- gpu4pyscf/grad/tduks_sf.py | 14 +++++++++ gpu4pyscf/tdscf/_uhf_resp_sf.py | 34 +++++----------------- gpu4pyscf/tdscf/tests/test_sftddft.py | 7 ----- gpu4pyscf/tdscf/uhf.py | 42 +++++++++++++++++---------- 5 files changed, 71 insertions(+), 49 deletions(-) create mode 100644 gpu4pyscf/grad/tduks_sf.py diff --git a/gpu4pyscf/dft/xc_deriv.py b/gpu4pyscf/dft/xc_deriv.py index 3213aaa41..88b66bd69 100644 --- a/gpu4pyscf/dft/xc_deriv.py +++ b/gpu4pyscf/dft/xc_deriv.py @@ -358,4 +358,25 @@ def _stack_fggg(fggg, axis=0, rho=None): fggg = fggg[tuple(slices)] fggg = _stack_fg(fggg, axis=axis+2, rho=rho) fggg = _stack_fg(fggg, axis=axis+1, rho=rho) - return _stack_fg(fggg, axis=axis, rho=rho) \ No newline at end of file + return _stack_fg(fggg, axis=axis, rho=rho) + + +def ud2ts(v_ud): + v_ts = cupy.asarray(v_ud) + order = v_ud.ndim // 2 + + if order == 0 and v_ts.shape[0] != 2: + raise ValueError("No spin axis found in the input array.") + + matrix = cupy.array([[0.5, 0.5], + [0.5, -0.5]]) + if order == 1: + v_ts = contract('ra,axg->rxg', matrix, v_ud) + elif order == 2: + v_ts = cupy.einsum('ra,tb,axbyg->rxtyg', matrix, matrix, v_ud) + elif order == 3: + v_ts = cupy.einsum('ra,tb,sc,axbyczg->rxtyszg', matrix, matrix, matrix, v_ud) + else: + raise NotImplementedError(f"Order {order} not implemented.") + + return v_ts diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py new file mode 100644 index 000000000..20d668c30 --- /dev/null +++ b/gpu4pyscf/grad/tduks_sf.py @@ -0,0 +1,14 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index 141d565d0..98309f0d3 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -21,6 +21,7 @@ from pyscf import lib from pyscf.lib import logger from pyscf.dft import numint2c, xc_deriv +from gpu4pyscf.dft import xc_deriv as xc_deriv_gpu from gpu4pyscf.scf import hf, uhf from gpu4pyscf.dft.numint import _scale_ao, _tau_dot, eval_rho, eval_rho2 from gpu4pyscf.lib.cupy_helper import transpose_sum, add_sparse, contract @@ -87,6 +88,8 @@ def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): rho = _project_spin_paxis2(rho_tmz, sgridz[p0:p1]) fxc = func(rho, deriv)[2] fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, p1 - p0) + if not isinstance(fxc, cp.ndarray): + fxc = cp.array(fxc) fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) return None,None,fxc_sf @@ -186,14 +189,12 @@ def __mcfun_fn_eval_xc2(ni, xc_code, xctype, rho, deriv): if not isinstance(s, cp.ndarray): s = cp.asarray(s) rho = cp.stack([(t + s) * .5, (t - s) * .5]) - # if xctype == 'MGGA' and rho.shape[1] == 6: - # rho = np.asarray(rho[:,[0,1,2,3,5],:], order='C') spin = 1 evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype, spin=spin) - # evfk = list(evfk) - # for order in range(1, deriv+1): - # if evfk[order] is not None: - # evfk[order] = xc_deriv.ud2ts(evfk[order]) + evfk = list(evfk) + for order in range(1, deriv+1): + if evfk[order] is not None: + evfk[order] = xc_deriv_gpu.ud2ts(evfk[order]) return evfk # Edited based on pyscf.dft.numint2c.mcfun_eval_xc_adapter @@ -201,28 +202,9 @@ def mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples): '''Wrapper to generate the eval_xc function required by mcfun ''' - try: - import mcfun - except ImportError: - raise ImportError('This feature requires mcfun library.\n' - 'Try install mcfun with `pip install mcfun`') - - # ni = numint2c.NumInt2C() - # ni.collinear = 'mcol' - # ni.collinear_samples = collinear_samples - # xctype = ni._xc_type(xc_code) - # fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) - # nproc = lib.num_threads() - xctype = ni._xc_type(xc_code) fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype) - nproc = lib.num_threads() - - # def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): - # res = mcfun.eval_xc_eff_sf( - # fn_eval_xc, rho.get(), deriv, - # collinear_samples=collinear_samples, workers=nproc) - # return [x if x is None else cp.asarray(x) for x in res] + nproc = 1 def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): res = eval_xc_eff_sf( diff --git a/gpu4pyscf/tdscf/tests/test_sftddft.py b/gpu4pyscf/tdscf/tests/test_sftddft.py index 546551129..903d2d3cb 100644 --- a/gpu4pyscf/tdscf/tests/test_sftddft.py +++ b/gpu4pyscf/tdscf/tests/test_sftddft.py @@ -17,10 +17,6 @@ import cupy as cp from pyscf import lib, gto, scf from gpu4pyscf import tdscf -try: - import mcfun -except ImportError: - mcfun = None def diagonalize_tda(a, nroots=5): @@ -75,7 +71,6 @@ def test_hf_tda(self): e = diagonalize_tda(a[1], nroots=3)[0] self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) - @unittest.skipIf(mcfun is None, 'MCfun not available') def test_mcol_svwn_tda(self): mf = self.mflda # sftddft not available in pyscf main branch. References are created @@ -105,7 +100,6 @@ def test_mcol_svwn_tda(self): self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) - @unittest.skipIf(mcfun is None, 'MCfun not available') def test_mcol_b3lyp_tda(self): mf = self.mfb3lyp # sftddft not available in pyscf main branch. References are created @@ -135,7 +129,6 @@ def test_mcol_b3lyp_tda(self): self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) - @unittest.skipIf(mcfun is None, 'MCfun not available') def test_mcol_tpss_tda(self): mf = self.mftpss # sftddft not available in pyscf main branch. References are created diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index e44b50426..bcddbedcb 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -510,11 +510,15 @@ def add_hf_(a, b, hyb=1): p0 = p1 p1+= weight.shape[0] wfxc= fxc[0,0][...,p0:p1] * weight - - rho_o_a = contract('pr,pi->ri', ao, orbo_a) - rho_v_a = contract('pr,pi->ri', ao, orbv_a) - rho_o_b = contract('pr,pi->ri', ao, orbo_b) - rho_v_b = contract('pr,pi->ri', ao, orbv_b) + orbo_a_mask = orbo_a[mask] + orbv_a_mask = orbv_a[mask] + orbo_b_mask = orbo_b[mask] + orbv_b_mask = orbv_b[mask] + + rho_o_a = contract('pr,pi->ri', ao, orbo_a_mask) + rho_v_a = contract('pr,pi->ri', ao, orbv_a_mask) + rho_o_b = contract('pr,pi->ri', ao, orbo_b_mask) + rho_v_b = contract('pr,pi->ri', ao, orbv_b_mask) rho_ov_b2a = contract('ri,ra->ria', rho_o_b, rho_v_a) rho_ov_a2b = contract('ri,ra->ria', rho_o_a, rho_v_b) @@ -537,11 +541,15 @@ def add_hf_(a, b, hyb=1): p0 = p1 p1+= weight.shape[0] wfxc= fxc[...,p0:p1] * weight - - rho_o_a = contract('xpr,pi->xri', ao, orbo_a) - rho_v_a = contract('xpr,pi->xri', ao, orbv_a) - rho_o_b = contract('xpr,pi->xri', ao, orbo_b) - rho_v_b = contract('xpr,pi->xri', ao, orbv_b) + orbo_a_mask = orbo_a[mask] + orbv_a_mask = orbv_a[mask] + orbo_b_mask = orbo_b[mask] + orbv_b_mask = orbv_b[mask] + + rho_o_a = contract('xpr,pi->xri', ao, orbo_a_mask) + rho_v_a = contract('xpr,pi->xri', ao, orbv_a_mask) + rho_o_b = contract('xpr,pi->xri', ao, orbo_b_mask) + rho_v_b = contract('xpr,pi->xri', ao, orbv_b_mask) rho_ov_b2a = contract('xri,ra->xria', rho_o_b, rho_v_a[0]) rho_ov_a2b = contract('xri,ra->xria', rho_o_a, rho_v_b[0]) rho_ov_b2a[1:4] += contract('ri,xra->xria', rho_o_b[0], rho_v_a[1:4]) @@ -572,11 +580,15 @@ def add_hf_(a, b, hyb=1): p0 = p1 p1+= weight.shape[0] wfxc = fxc[...,p0:p1] * weight - - rho_oa = contract('xpr,pi->xri', ao, orbo_a) - rho_ob = contract('xpr,pi->xri', ao, orbo_b) - rho_va = contract('xpr,pi->xri', ao, orbv_a) - rho_vb = contract('xpr,pi->xri', ao, orbv_b) + orbo_a_mask = orbo_a[mask] + orbv_a_mask = orbv_a[mask] + orbo_b_mask = orbo_b[mask] + orbv_b_mask = orbv_b[mask] + + rho_oa = contract('xpr,pi->xri', ao, orbo_a_mask) + rho_ob = contract('xpr,pi->xri', ao, orbo_b_mask) + rho_va = contract('xpr,pi->xri', ao, orbv_a_mask) + rho_vb = contract('xpr,pi->xri', ao, orbv_b_mask) rho_ov_b2a = contract('xri,ra->xria', rho_ob, rho_va[0]) rho_ov_a2b = contract('xri,ra->xria', rho_oa, rho_vb[0]) rho_ov_b2a[1:4] += contract('ri,xra->xria', rho_ob[0], rho_va[1:4]) From e5a3cea5c9c267e56552906bcaea55395da39341 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 15 Sep 2025 15:03:46 +0800 Subject: [PATCH 06/32] copy from the pyscf-forge --- gpu4pyscf/grad/tduks_sf.py | 805 +++++++++++++++++++++++++++++++++++++ 1 file changed, 805 insertions(+) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 20d668c30..a4fcf68cf 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -12,3 +12,808 @@ # See the License for the specific language governing permissions and # limitations under the License. + +from functools import reduce +import numpy as np +from pyscf import lib +from pyscf.lib import logger +from pyscf.scf import ucphf +from pyscf.dft import numint +from pyscf.dft import numint2c +from pyscf.grad import rks as rks_grad +from pyscf.grad import tdrhf as tdrhf_grad +from pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf + +def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): + ''' Spin flip TDDFT gradient in UKS framework. Note: This function supports + both TDA or TDDFT results. + + Parameters + ---------- + Args: + td_grad : sftda.TDA_SF object. + + Returns: + The gradient of excited states: Ei^{\\xi} = E0^{\\xi} + wi^{\\xi} + ''' + log = logger.new_logger(td_grad, verbose) + time0 = logger.process_clock(), logger.perf_counter() + + mol = td_grad.mol + mf = td_grad.base._scf + + mo_coeff = mf.mo_coeff + mo_energy = mf.mo_energy + mo_occ = mf.mo_occ + occidxa = np.where(mo_occ[0]>0)[0] + occidxb = np.where(mo_occ[1]>0)[0] + viridxa = np.where(mo_occ[0]==0)[0] + viridxb = np.where(mo_occ[1]==0)[0] + nocca = len(occidxa) + noccb = len(occidxb) + nvira = len(viridxa) + nvirb = len(viridxb) + orboa = mo_coeff[0][:,occidxa] + orbob = mo_coeff[1][:,occidxb] + orbva = mo_coeff[0][:,viridxa] + orbvb = mo_coeff[1][:,viridxb] + nao = mo_coeff[0].shape[0] + + nmoa = nocca + nvira + nmob = noccb + nvirb + + if td_grad.base.extype==0 or 1: + # x_ab, a means vira, b means occb + (x_ab, x_ba), (y_ab, y_ba) = x_y + xpy_ab = (x_ab + y_ab).T + xpy_ba = (x_ba + y_ba).T + xmy_ab = (x_ab - y_ab).T + xmy_ba = (x_ba - y_ba).T + + dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 + dvv_b = np.einsum('ai,bi->ab', xpy_ba, xpy_ba) + np.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 + doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 + doo_a =-np.einsum('ai,aj->ij', xpy_ba, xpy_ba) - np.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 + + dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} + dmxpy_ba = reduce(np.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} + dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} + dmxmy_ba = reduce(np.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} + + dmzoo_a = reduce(np.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} + dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} + dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) + dmzoo_b+= reduce(np.dot, (orbvb, dvv_b, orbvb.T)) + + ni = mf._numint + ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) + + # used by mcfun. + rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, + mo_coeff, mo_occ, spin=1) + + f1vo, f1oo, vxc1, k1ao = \ + _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), + (dmzoo_a,dmzoo_b), True, True, max_memory) + k1ao_xpy, k1ao_xmy = k1ao + + # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf + # f1oo, (2,4,nao,nao), 2T with fxc_sc + # vxc1, ao with v1^{\sigma} + # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc + + if abs(hyb) > 1e-10: + dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj, vk = mf.get_jk(mol, dm, hermi=0) + vk *= hyb + if abs(omega) > 1e-10: + vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + vj = vj.reshape(2,3,nao,nao) + vk = vk.reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] + veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + veff = - vk[:,1] + f1vo[0,:,0] + veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + veff = -vk[:,2] + f1vo[1,:,0] + veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + + else: + dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] + veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + veff = f1vo[0,:,0] + veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + veff = f1vo[1,:,0] + veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + + vresp = mf.gen_response(hermi=1) + + def fvind(x): + dm1 = np.empty((2,nao,nao)) + x_a = x[0,:nvira*nocca].reshape(nvira,nocca) + x_b = x[0,nvira*nocca:].reshape(nvirb,noccb) + dm_a = reduce(np.dot, (orbva, x_a, orboa.T)) + dm_b = reduce(np.dot, (orbvb, x_b, orbob.T)) + dm1[0] = (dm_a + dm_a.T).real + dm1[1] = (dm_b + dm_b.T).real + + v1 = vresp(dm1) + v1a = reduce(np.dot, (orbva.T, v1[0], orboa)) + v1b = reduce(np.dot, (orbvb.T, v1[1], orbob)) + return np.hstack((v1a.ravel(), v1b.ravel())) + + z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), + max_cycle=td_grad.cphf_max_cycle, + tol=td_grad.cphf_conv_tol)[0] + + time1 = log.timer('Z-vector using UCPHF solver', *time0) + + z1ao = np.zeros((2,nao,nao)) + z1ao[0] += reduce(np.dot, (orbva, z1a, orboa.T)) + z1ao[1] += reduce(np.dot, (orbvb, z1b, orbob.T)) + + veff = vresp((z1ao+z1ao.transpose(0,2,1))*0.5) + + im0a = np.zeros((nmoa,nmoa)) + im0b = np.zeros((nmob,nmob)) + + im0a[:nocca,:nocca] = reduce(np.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 + im0b[:noccb,:noccb] = reduce(np.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 + im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 + im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 + im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + + im0a[nocca:,nocca:] = np.einsum('bi,ai->ab', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 + im0b[noccb:,noccb:] = np.einsum('bi,ai->ab', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 + im0a[nocca:,nocca:] += np.einsum('bi,ai->ab', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + im0b[noccb:,noccb:] += np.einsum('bi,ai->ab', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + + im0a[nocca:,:nocca] = np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) + im0b[noccb:,:noccb] = np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) + im0a[nocca:,:nocca] += np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) + im0b[noccb:,:noccb] += np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) + + zeta_a = (mo_energy[0][:,None] + mo_energy[0]) * .5 + zeta_b = (mo_energy[1][:,None] + mo_energy[1]) * .5 + zeta_a[nocca:,:nocca] = mo_energy[0][:nocca] + zeta_b[noccb:,:noccb] = mo_energy[1][:noccb] + zeta_a[:nocca,nocca:] = mo_energy[0][nocca:] + zeta_b[:noccb,noccb:] = mo_energy[1][noccb:] + + dm1a = np.zeros((nmoa,nmoa)) + dm1b = np.zeros((nmob,nmob)) + dm1a[:nocca,:nocca] = doo_a * .5 + dm1b[:noccb,:noccb] = doo_b * .5 + dm1a[nocca:,nocca:] = dvv_a * .5 + dm1b[noccb:,noccb:] = dvv_b * .5 + + dm1a[nocca:,:nocca] = z1a *.5 + dm1b[noccb:,:noccb] = z1b *.5 + + dm1a[:nocca,:nocca] += np.eye(nocca) # for ground state + dm1b[:noccb,:noccb] += np.eye(noccb) + + im0a = reduce(np.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) + im0b = reduce(np.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) + im0 = im0a + im0b + + # Initialize hcore_deriv with the underlying SCF object because some + # extensions (e.g. QM/MM, solvent) modifies the SCF object only. + mf_grad = mf.nuc_grad_method() + hcore_deriv = mf_grad.hcore_generator(mol) + + # -mol.intor('int1e_ipovlp', comp=3) + s1 = mf_grad.get_ovlp(mol) + + dmz1doo_a = z1ao[0] + dmzoo_a + dmz1doo_b = z1ao[1] + dmzoo_b + oo0a = reduce(np.dot, (orboa, orboa.T)) + oo0b = reduce(np.dot, (orbob, orbob.T)) + + as_dm1 = oo0a + oo0b + (dmz1doo_a + dmz1doo_b) * .5 + + if abs(hyb) > 1e-10: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj, vk = td_grad.get_jk(mol, dm) + vj = vj.reshape(2,4,3,nao,nao) + vk = vk.reshape(2,4,3,nao,nao) * hyb + vj[:,2:4] *= 0.0 + if abs(omega) > 1e-10: + with mol.with_range_coulomb(omega): + vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + + veff1 = np.zeros((2,4,3,nao,nao)) + veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] + else: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T) + vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + vj[:,2] *= 0.0 + veff1 = np.zeros((2,4,3,nao,nao)) + veff1[:,:3] = vj[0] + vj[1] + + fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) + + veff1[:,0] += vxc1[:,1:] + veff1[:,1] += (f1oo[:,1:] + fxcz1[:,1:])*2 + veff1[0,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] + k1ao_xpy[1,0,1:] + k1ao_xpy[1,1,1:] + +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] + k1ao_xmy[1,0,1:] + k1ao_xmy[1,1,1:])*2 + veff1[1,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] - k1ao_xpy[1,0,1:] - k1ao_xpy[1,1,1:] + +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] - k1ao_xmy[1,0,1:] - k1ao_xmy[1,1,1:])*2 + + veff1[:,2] += f1vo[0,:,1:] + veff1[:,3] += f1vo[1,:,1:] + veff1a, veff1b = veff1 + time1 = log.timer('2e AO integral derivatives', *time1) + + if atmlst is None: + atmlst = range(mol.natm) + offsetdic = mol.offset_nr_by_atom() + de = np.zeros((len(atmlst),3)) + + for k, ia in enumerate(atmlst): + shl0, shl1, p0, p1 = offsetdic[ia] + + # Ground state gradients + h1ao = hcore_deriv(ia) + de[k] = np.einsum('xpq,pq->x', h1ao, as_dm1) + de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) + de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) + de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) + de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) + + de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1doo_a[p0:p1]) *.5 + de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doo_b[p0:p1]) *.5 + de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 + de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 + + de[k] -= np.einsum('xpq,pq->x', s1[:,p0:p1], im0[p0:p1]) + de[k] -= np.einsum('xqp,pq->x', s1[:,p0:p1], im0[:,p0:p1]) + + de[k] += np.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 + de[k] += np.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 + + de[k] += np.einsum('xij,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) + de[k] += np.einsum('xij,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) + de[k] += np.einsum('xji,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) + de[k] += np.einsum('xji,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) + + de[k] += np.einsum('xij,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) + de[k] += np.einsum('xij,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) + de[k] += np.einsum('xji,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) + de[k] += np.einsum('xji,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) + + if abs(hyb) > 1e-10: + de[k] -= np.einsum('xij,ij->x', vk[1,2,:,p0:p1], dmxpy_ab[p0:p1,:]) + de[k] -= np.einsum('xij,ij->x', vk[0,2,:,p0:p1], dmxpy_ba[p0:p1,:]) + de[k] -= np.einsum('xji,ij->x', vk[0,2,:,p0:p1], dmxpy_ab[:,p0:p1]) + de[k] -= np.einsum('xji,ij->x', vk[1,2,:,p0:p1], dmxpy_ba[:,p0:p1]) + + de[k] -= np.einsum('xij,ij->x', vk[1,3,:,p0:p1], dmxmy_ab[p0:p1,:]) + de[k] -= np.einsum('xij,ij->x', vk[0,3,:,p0:p1], dmxmy_ba[p0:p1,:]) + de[k] += np.einsum('xji,ij->x', vk[0,3,:,p0:p1], dmxmy_ab[:,p0:p1]) + de[k] += np.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) + + # de[k] += td_grad.extra_force(ia, locals()) + log.timer('TDUKS nuclear gradients', *time0) + return de + +def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, + with_kxc=True, max_memory=2000): + mol = td_grad.mol + mf = td_grad.base._scf + grids = mf.grids + + ni = mf._numint + xctype = ni._xc_type(xc_code) + + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + nao = mo_coeff[0].shape[0] + + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + + f1vo = np.zeros((2,2,4,nao,nao)) + deriv = 2 + + if dmoo is not None: + f1oo = np.zeros((2,4,nao,nao)) + else: + f1oo = None + if with_vxc: + v1ao = np.zeros((2,4,nao,nao)) + else: + v1ao = None + if with_kxc: + k1ao_xpy = np.zeros((2,2,4,nao,nao)) + k1ao_xmy = np.zeros((2,2,4,nao,nao)) + deriv = 3 + else: + k1ao_xpy = k1ao_xmy = None + + # create a mc object to use mcfun. + nimc = numint2c.NumInt2C() + nimc.collinear = 'mcol' + nimc.collinear_samples=td_grad.base.collinear_samples + + # calculate the derivatives. + fxc_sf,kxc_sf = cache_xc_kernel_sf(nimc,mol,mf.grids,mf.xc,mo_coeff,mo_occ,deriv=3,spin=1)[2:] + p0,p1=0,0 # the two parameters are used for counts the batch of grids. + + if xctype == 'LDA': + def lda_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[0], wv) + for k in range(4): + vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) + + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + s_s = fxc_sf[...,p0:p1] * weight + + rho1_ab = ni.eval_rho(mol, ao[0], dmvo[0][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao[0], dmvo[0][1], mask, xctype) + # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y + lda_sum_(f1vo[0][1], ao, (rho1_ab+rho1_ba)*s_s*2, mask) + lda_sum_(f1vo[0][0], ao, (rho1_ba+rho1_ab)*s_s*2, mask) + + if with_kxc: + s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight + s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight + lda_sum_(k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab+rho1_ba), mask) + lda_sum_(k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba+rho1_ab), mask) + lda_sum_(k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab+rho1_ba), mask) + lda_sum_(k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba+rho1_ab), mask) + + rho1_ab = ni.eval_rho(mol, ao[0], dmvo[1][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao[0], dmvo[1][1], mask, xctype) + + # py attention to the order of f1vo[1][1] and f1vo[1][0] + lda_sum_(f1vo[1][1], ao, (rho1_ab-rho1_ba)*s_s*2, mask) + lda_sum_(f1vo[1][0], ao, (rho1_ba-rho1_ab)*s_s*2, mask) + + if with_kxc: + # Note the "-" + lda_sum_(k1ao_xmy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab-rho1_ba), mask) + lda_sum_(k1ao_xmy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba-rho1_ab), mask) + lda_sum_(k1ao_xmy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab-rho1_ba), mask) + lda_sum_(k1ao_xmy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba-rho1_ab), mask) + + rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + u_u, u_d, d_d = fxc[0].T * weight + if dmoo is not None: + rho2a = ni.eval_rho(mol, ao[0], dmoo[0], mask, xctype, hermi=1) + rho2b = ni.eval_rho(mol, ao[0], dmoo[1], mask, xctype, hermi=1) + lda_sum_(f1oo[0], ao, u_u*rho2a+u_d*rho2b, mask) + lda_sum_(f1oo[1], ao, u_d*rho2a+d_d*rho2b, mask) + if with_vxc: + vrho = vxc[0].T * weight + lda_sum_(v1ao[0], ao, vrho[0], mask) + lda_sum_(v1ao[1], ao, vrho[1], mask) + + elif xctype == 'GGA': + def gga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) + + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + + rho1_ab = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) + + wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) + gga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) + gga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_gga_wv2_p((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) + gga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) + gga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) + gga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) + gga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) + + rho1_ab = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) + + wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) + gga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) + gga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_gga_wv2_m((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) + gga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) + gga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) + gga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) + gga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) + + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + + if dmoo is not None: + rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) + wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) + gga_sum_(f1oo[0], ao, wv[0], mask) + gga_sum_(f1oo[1], ao, wv[1], mask) + if with_vxc: + wv = numint._uks_gga_wv0(rho, vxc, weight) + gga_sum_(v1ao[0], ao, wv[0], mask) + gga_sum_(v1ao[1], ao, wv[1], mask) + + elif xctype == 'MGGA': + def mgga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + + aow = numint._scale_ao(ao[1], wv[4], aow) + tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[2], wv[4], aow) + tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[3], wv[4], aow) + tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) + rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[4]*2, mask, ao_loc, True) + + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + ngrid=weight.shape[-1] + + rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) + rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) + # Padding for laplacian + rho1_ab = np.empty((5, ngrid)) + rho1_ba = np.empty((5, ngrid)) + rho1_ab[:4] = rho1_ab_tmp[:4] + rho1_ba[:4] = rho1_ba_tmp[:4] + rho1_ab[4] = rho1_ab_tmp[5] + rho1_ba[4] = rho1_ba_tmp[5] + + wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) + mgga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) + mgga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_mgga_wv2_p((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) + mgga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) + mgga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) + mgga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) + mgga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) + + rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) + rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) + # Padding for laplacian + rho1_ab = np.empty((5, ngrid)) + rho1_ba = np.empty((5, ngrid)) + rho1_ab[:4] = rho1_ab_tmp[:4] + rho1_ba[:4] = rho1_ba_tmp[:4] + rho1_ab[4] = rho1_ab_tmp[5] + rho1_ba[4] = rho1_ba_tmp[5] + + wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) + mgga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) + mgga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_mgga_wv2_m((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) + mgga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) + mgga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) + mgga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) + mgga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) + + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + + if dmoo is not None: + rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) + wv_tmp = numint._uks_mgga_wv1(rho, rho2, vxc, fxc, weight) + # # Padding for laplacian + wv = np.empty((2,5,ngrid)) + wv[0][:4] = wv_tmp[0][:4] + wv[0][4] = wv_tmp[0][5] + wv[1][:4] = wv_tmp[1][:4] + wv[1][4] = wv_tmp[1][5] + + mgga_sum_(f1oo[0], ao, wv[0], mask) + mgga_sum_(f1oo[1], ao, wv[1], mask) + + if with_vxc: + wv_tmp = numint._uks_mgga_wv0(rho, vxc, weight) + # # Padding for laplacian + wv = np.empty((2,5,ngrid)) + wv[0][:4] = wv_tmp[0][:4] + wv[0][4] = wv_tmp[0][5] + wv[1][:4] = wv_tmp[1][:4] + wv[1][4] = wv_tmp[1][5] + + mgga_sum_(v1ao[0], ao, wv[0], mask) + mgga_sum_(v1ao[1], ao, wv[1], mask) + + else: + raise NotImplementedError(f'td-uks for functional {xc_code}') + + f1vo[:,:,1:] *= -1 + if f1oo is not None: f1oo[:,1:] *= -1 + if v1ao is not None: v1ao[:,1:] *= -1 + if with_kxc: + k1ao_xpy[:,:,1:] *= -1 + k1ao_xmy[:,:,1:] *= -1 + return f1vo, f1oo, v1ao, (k1ao_xpy,k1ao_xmy) + +def uks_sf_gga_wv1(rho1, fxc_sf,weight): + # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + wv_ab, wv_ba = np.empty((2,4,ngrid)) + wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) + wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) + # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 + # wv_ba[0] = wv_ba[0] *2 *.5 + + # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + wv_ab[1:] *=2.0 + wv_ba[1:] *=2.0 + return wv_ab*weight, wv_ba*weight + +def uks_sf_gga_wv2_p(rho1, kxc_sf,weight): + # kxc_sf with a shape (4,4,2,4,ngrid), 4 means I,\nabla_x,y,z, + # 0: n, \nabla_x,y,z n; 1: s, \nabla_x,y,z s. + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,4,ngrid)) + # Note *2 and *0.5 like in function uks_sf_gga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) + + gv_ab[0,1:] *=2.0 + gv_ab[1,1:] *=2.0 + gv_ba[0,1:] *=2.0 + gv_ba[1,1:] *=2.0 + return gv_ab*weight, gv_ba*weight + +def uks_sf_gga_wv2_m(rho1, kxc_sf,weight): + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) + + gv_ab[:,1:] *=2.0 + gv_ba[:,1:] *=2.0 + return gv_ab*weight, gv_ba*weight + +def uks_sf_mgga_wv1(rho1, fxc_sf,weight): + rho1_ab,rho1_ba = rho1 + # fxc_sf with a shape (5,5,ngrid), 5 means I, \nabla_x,y,z s, u + # s_s, s_Ns, Ns_s, Ns_Ns, s_u, u_s, u_Ns, Ns_u, u_u + ngrid = weight.shape[-1] + wv_ab, wv_ba = np.empty((2,5,ngrid)) + wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) + wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) + # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 + # wv_ba[0] = wv_ba[0] *2 *.5 + + # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + wv_ab[1:4] *=2.0 + wv_ba[1:4] *=2.0 + # *0.5 below is for tau->ao + wv_ab[4] *= 0.5 + wv_ba[4] *= 0.5 + return wv_ab*weight, wv_ba*weight + +def uks_sf_mgga_wv2_p(rho1, kxc_sf,weight): + rho1_ab,rho1_ba = rho1 + # kxc_sf with a shape (5,5,2,5,ngrid), 5 means s \nabla_x,y,z s, u + # s_s -> 0: n, \nabla_x,y,z n, tau ; 1: s, \nabla_x,y,z s, u + # s_Ns -> + # Ns_s -> + # Ns_Ns -> + # s_u -> + # u_s -> + # u_Ns -> + # Ns_u -> + # u_u -> + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) + + gv_ab[:,1:4] *=2.0 + gv_ba[:,1:4] *=2.0 + gv_ab[:,4] *= 0.5 + gv_ba[:,4] *= 0.5 + return gv_ab*weight, gv_ba*weight + +def uks_sf_mgga_wv2_m(rho1, kxc_sf,weight): + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) + + gv_ab[:,1:4] *=2.0 + gv_ba[:,1:4] *=2.0 + gv_ab[:,4] *= 0.5 + gv_ba[:,4] *= 0.5 + return gv_ab*weight, gv_ba*weight + +def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): + mol = td_grad.base._scf.mol + mf = td_grad.base._scf + grids = mf.grids + + ni = mf._numint + xctype = ni._xc_type(xc_code) + + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + nao = mo_coeff[0].shape[0] + + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + + dmvo = [(dmvo[0]+dmvo[0].T)*.5, + (dmvo[1]+dmvo[1].T)*.5] + + f1vo = np.zeros((2,4,nao,nao)) + deriv = 2 + + if xctype == 'LDA': + def lda_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[0], wv) + for k in range(4): + vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) + + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] + u_u, u_d, d_d = fxc[0].T * weight + rho1a = ni.eval_rho(mol, ao[0], dmvo[0], mask, xctype, hermi=1) + rho1b = ni.eval_rho(mol, ao[0], dmvo[1], mask, xctype, hermi=1) + + lda_sum_(f1vo[0], ao, u_u*rho1a+u_d*rho1b, mask) + lda_sum_(f1vo[1], ao, u_d*rho1a+d_d*rho1b, mask) + + elif xctype == 'GGA': + def gga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] + + rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) + wv = numint._uks_gga_wv1(rho, rho1, vxc, fxc, weight) + gga_sum_(f1vo[0], ao, wv[0], mask) + gga_sum_(f1vo[1], ao, wv[1], mask) + + elif xctype == 'MGGA': + def mgga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + + aow = numint._scale_ao(ao[1], wv[5], aow) + tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[2], wv[5], aow) + tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[3], wv[5], aow) + tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) + rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[5]*2, mask, ao_loc, True) + + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + + rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) + wv = numint._uks_mgga_wv1(rho, rho1, vxc, fxc, weight) + mgga_sum_(f1vo[0], ao, wv[0], mask) + mgga_sum_(f1vo[1], ao, wv[1], mask) + + vxc = fxc = rho = rho1 = None + + elif xctype == 'HF': + pass + else: + raise NotImplementedError(f'td-uks for functional {xc_code}') + + f1vo[:,1:] *= -1 + return f1vo + +class Gradients(tdrhf_grad.Gradients): + @lib.with_doc(grad_elec.__doc__) + def grad_elec(self, xy, singlet=None, atmlst=None): + return grad_elec(self, xy, atmlst, self.max_memory, self.verbose) + +Grad = Gradients + +from pyscf import sftda +sftda.uks_sf.TDA_SF.Gradients = sftda.uks_sf.TDDFT_SF.Gradients = lib.class_as_method(Gradients) \ No newline at end of file From 64dd5021e17b0787f16b4829011ca926c9417725 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 16 Sep 2025 14:06:48 +0800 Subject: [PATCH 07/32] add the gradient --- gpu4pyscf/grad/tduks_sf.py | 16 +++++++++------- gpu4pyscf/tdscf/_uhf_resp_sf.py | 10 +++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index a4fcf68cf..c179e16ca 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -89,9 +89,9 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - # used by mcfun. - rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, - mo_coeff, mo_occ, spin=1) + # # used by mcfun. + # rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, + # mo_coeff, mo_occ, spin=1) f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), @@ -385,12 +385,14 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, k1ao_xpy = k1ao_xmy = None # create a mc object to use mcfun. - nimc = numint2c.NumInt2C() - nimc.collinear = 'mcol' - nimc.collinear_samples=td_grad.base.collinear_samples + # nimc = numint2c.NumInt2C() + # nimc.collinear = 'mcol' + # nimc.collinear_samples=td_grad.base.collinear_samples + collinear_samples=td_grad.base.collinear_samples + ni = mf._numint # calculate the derivatives. - fxc_sf,kxc_sf = cache_xc_kernel_sf(nimc,mol,mf.grids,mf.xc,mo_coeff,mo_occ,deriv=3,spin=1)[2:] + fxc_sf,kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ,collinear_samples,deriv=3)[2:] p0,p1=0,0 # the two parameters are used for counts the batch of grids. if xctype == 'LDA': diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index 98309f0d3..16e1590db 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -214,7 +214,7 @@ def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): return eval_xc_eff def cache_xc_kernel_sf(ni, mol, grids, xc_code, mo_coeff, mo_occ, - collinear_samples): + collinear_samples, deriv=2): '''Compute the fxc_sf, which can be used in SF-TDDFT/TDA ''' xctype = ni._xc_type(xc_code) @@ -250,8 +250,12 @@ def cache_xc_kernel_sf(ni, mol, grids, xc_code, mo_coeff, mo_occ, rho_z = cp.array([rho_ab[0]+rho_ab[1], rho_ab[0]-rho_ab[1]]) eval_xc_eff = mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples) - vxc, fxc = eval_xc_eff(xc_code, rho_z, deriv=2, xctype=xctype)[1:3] - return rho_ab, vxc, fxc + if deriv == 2: + vxc, fxc = eval_xc_eff(xc_code, rho_z, deriv=2, xctype=xctype)[1:3] + return rho_ab, vxc, fxc + elif deriv == 3: + vxc, fxc, kxc = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[1:4] + return rho_ab, vxc, fxc, kxc def nr_uks_fxc_sf(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, rho0=None, vxc=None, fxc=None): From a64491ec197a05051f4e0ab6056b912604b4c6b5 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 18 Sep 2025 15:02:09 +0800 Subject: [PATCH 08/32] remove gradient : --- gpu4pyscf/grad/tduks_sf.py | 821 ------------------------------------- 1 file changed, 821 deletions(-) delete mode 100644 gpu4pyscf/grad/tduks_sf.py diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py deleted file mode 100644 index c179e16ca..000000000 --- a/gpu4pyscf/grad/tduks_sf.py +++ /dev/null @@ -1,821 +0,0 @@ -# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from functools import reduce -import numpy as np -from pyscf import lib -from pyscf.lib import logger -from pyscf.scf import ucphf -from pyscf.dft import numint -from pyscf.dft import numint2c -from pyscf.grad import rks as rks_grad -from pyscf.grad import tdrhf as tdrhf_grad -from pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf - -def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): - ''' Spin flip TDDFT gradient in UKS framework. Note: This function supports - both TDA or TDDFT results. - - Parameters - ---------- - Args: - td_grad : sftda.TDA_SF object. - - Returns: - The gradient of excited states: Ei^{\\xi} = E0^{\\xi} + wi^{\\xi} - ''' - log = logger.new_logger(td_grad, verbose) - time0 = logger.process_clock(), logger.perf_counter() - - mol = td_grad.mol - mf = td_grad.base._scf - - mo_coeff = mf.mo_coeff - mo_energy = mf.mo_energy - mo_occ = mf.mo_occ - occidxa = np.where(mo_occ[0]>0)[0] - occidxb = np.where(mo_occ[1]>0)[0] - viridxa = np.where(mo_occ[0]==0)[0] - viridxb = np.where(mo_occ[1]==0)[0] - nocca = len(occidxa) - noccb = len(occidxb) - nvira = len(viridxa) - nvirb = len(viridxb) - orboa = mo_coeff[0][:,occidxa] - orbob = mo_coeff[1][:,occidxb] - orbva = mo_coeff[0][:,viridxa] - orbvb = mo_coeff[1][:,viridxb] - nao = mo_coeff[0].shape[0] - - nmoa = nocca + nvira - nmob = noccb + nvirb - - if td_grad.base.extype==0 or 1: - # x_ab, a means vira, b means occb - (x_ab, x_ba), (y_ab, y_ba) = x_y - xpy_ab = (x_ab + y_ab).T - xpy_ba = (x_ba + y_ba).T - xmy_ab = (x_ab - y_ab).T - xmy_ba = (x_ba - y_ba).T - - dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 - dvv_b = np.einsum('ai,bi->ab', xpy_ba, xpy_ba) + np.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 - doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 - doo_a =-np.einsum('ai,aj->ij', xpy_ba, xpy_ba) - np.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 - - dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} - dmxpy_ba = reduce(np.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} - dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} - dmxmy_ba = reduce(np.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} - - dmzoo_a = reduce(np.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} - dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} - dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) - dmzoo_b+= reduce(np.dot, (orbvb, dvv_b, orbvb.T)) - - ni = mf._numint - ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) - omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - - # # used by mcfun. - # rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, - # mo_coeff, mo_occ, spin=1) - - f1vo, f1oo, vxc1, k1ao = \ - _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), - (dmzoo_a,dmzoo_b), True, True, max_memory) - k1ao_xpy, k1ao_xmy = k1ao - - # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf - # f1oo, (2,4,nao,nao), 2T with fxc_sc - # vxc1, ao with v1^{\sigma} - # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc - - if abs(hyb) > 1e-10: - dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) - vj, vk = mf.get_jk(mol, dm, hermi=0) - vk *= hyb - if abs(omega) > 1e-10: - vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) - vj = vj.reshape(2,3,nao,nao) - vk = vk.reshape(2,3,nao,nao) - - veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] - veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 - - veff = - vk[:,1] + f1vo[0,:,0] - veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 - - veff = -vk[:,2] + f1vo[1,:,0] - veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 - - else: - dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) - vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) - - veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] - veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 - - veff = f1vo[0,:,0] - veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 - - veff = f1vo[1,:,0] - veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 - - vresp = mf.gen_response(hermi=1) - - def fvind(x): - dm1 = np.empty((2,nao,nao)) - x_a = x[0,:nvira*nocca].reshape(nvira,nocca) - x_b = x[0,nvira*nocca:].reshape(nvirb,noccb) - dm_a = reduce(np.dot, (orbva, x_a, orboa.T)) - dm_b = reduce(np.dot, (orbvb, x_b, orbob.T)) - dm1[0] = (dm_a + dm_a.T).real - dm1[1] = (dm_b + dm_b.T).real - - v1 = vresp(dm1) - v1a = reduce(np.dot, (orbva.T, v1[0], orboa)) - v1b = reduce(np.dot, (orbvb.T, v1[1], orbob)) - return np.hstack((v1a.ravel(), v1b.ravel())) - - z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), - max_cycle=td_grad.cphf_max_cycle, - tol=td_grad.cphf_conv_tol)[0] - - time1 = log.timer('Z-vector using UCPHF solver', *time0) - - z1ao = np.zeros((2,nao,nao)) - z1ao[0] += reduce(np.dot, (orbva, z1a, orboa.T)) - z1ao[1] += reduce(np.dot, (orbvb, z1b, orbob.T)) - - veff = vresp((z1ao+z1ao.transpose(0,2,1))*0.5) - - im0a = np.zeros((nmoa,nmoa)) - im0b = np.zeros((nmob,nmob)) - - im0a[:nocca,:nocca] = reduce(np.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 - im0b[:noccb,:noccb] = reduce(np.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 - im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 - im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 - im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 - im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 - - im0a[nocca:,nocca:] = np.einsum('bi,ai->ab', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 - im0b[noccb:,noccb:] = np.einsum('bi,ai->ab', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 - im0a[nocca:,nocca:] += np.einsum('bi,ai->ab', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 - im0b[noccb:,noccb:] += np.einsum('bi,ai->ab', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 - - im0a[nocca:,:nocca] = np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) - im0b[noccb:,:noccb] = np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) - im0a[nocca:,:nocca] += np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) - im0b[noccb:,:noccb] += np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) - - zeta_a = (mo_energy[0][:,None] + mo_energy[0]) * .5 - zeta_b = (mo_energy[1][:,None] + mo_energy[1]) * .5 - zeta_a[nocca:,:nocca] = mo_energy[0][:nocca] - zeta_b[noccb:,:noccb] = mo_energy[1][:noccb] - zeta_a[:nocca,nocca:] = mo_energy[0][nocca:] - zeta_b[:noccb,noccb:] = mo_energy[1][noccb:] - - dm1a = np.zeros((nmoa,nmoa)) - dm1b = np.zeros((nmob,nmob)) - dm1a[:nocca,:nocca] = doo_a * .5 - dm1b[:noccb,:noccb] = doo_b * .5 - dm1a[nocca:,nocca:] = dvv_a * .5 - dm1b[noccb:,noccb:] = dvv_b * .5 - - dm1a[nocca:,:nocca] = z1a *.5 - dm1b[noccb:,:noccb] = z1b *.5 - - dm1a[:nocca,:nocca] += np.eye(nocca) # for ground state - dm1b[:noccb,:noccb] += np.eye(noccb) - - im0a = reduce(np.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) - im0b = reduce(np.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) - im0 = im0a + im0b - - # Initialize hcore_deriv with the underlying SCF object because some - # extensions (e.g. QM/MM, solvent) modifies the SCF object only. - mf_grad = mf.nuc_grad_method() - hcore_deriv = mf_grad.hcore_generator(mol) - - # -mol.intor('int1e_ipovlp', comp=3) - s1 = mf_grad.get_ovlp(mol) - - dmz1doo_a = z1ao[0] + dmzoo_a - dmz1doo_b = z1ao[1] + dmzoo_b - oo0a = reduce(np.dot, (orboa, orboa.T)) - oo0b = reduce(np.dot, (orbob, orbob.T)) - - as_dm1 = oo0a + oo0b + (dmz1doo_a + dmz1doo_b) * .5 - - if abs(hyb) > 1e-10: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) - vj, vk = td_grad.get_jk(mol, dm) - vj = vj.reshape(2,4,3,nao,nao) - vk = vk.reshape(2,4,3,nao,nao) * hyb - vj[:,2:4] *= 0.0 - if abs(omega) > 1e-10: - with mol.with_range_coulomb(omega): - vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) - - veff1 = np.zeros((2,4,3,nao,nao)) - veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] - else: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T) - vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) - vj[:,2] *= 0.0 - veff1 = np.zeros((2,4,3,nao,nao)) - veff1[:,:3] = vj[0] + vj[1] - - fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) - - veff1[:,0] += vxc1[:,1:] - veff1[:,1] += (f1oo[:,1:] + fxcz1[:,1:])*2 - veff1[0,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] + k1ao_xpy[1,0,1:] + k1ao_xpy[1,1,1:] - +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] + k1ao_xmy[1,0,1:] + k1ao_xmy[1,1,1:])*2 - veff1[1,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] - k1ao_xpy[1,0,1:] - k1ao_xpy[1,1,1:] - +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] - k1ao_xmy[1,0,1:] - k1ao_xmy[1,1,1:])*2 - - veff1[:,2] += f1vo[0,:,1:] - veff1[:,3] += f1vo[1,:,1:] - veff1a, veff1b = veff1 - time1 = log.timer('2e AO integral derivatives', *time1) - - if atmlst is None: - atmlst = range(mol.natm) - offsetdic = mol.offset_nr_by_atom() - de = np.zeros((len(atmlst),3)) - - for k, ia in enumerate(atmlst): - shl0, shl1, p0, p1 = offsetdic[ia] - - # Ground state gradients - h1ao = hcore_deriv(ia) - de[k] = np.einsum('xpq,pq->x', h1ao, as_dm1) - de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) - de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) - de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) - de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) - - de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1doo_a[p0:p1]) *.5 - de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doo_b[p0:p1]) *.5 - de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 - de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 - - de[k] -= np.einsum('xpq,pq->x', s1[:,p0:p1], im0[p0:p1]) - de[k] -= np.einsum('xqp,pq->x', s1[:,p0:p1], im0[:,p0:p1]) - - de[k] += np.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 - de[k] += np.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 - - de[k] += np.einsum('xij,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) - de[k] += np.einsum('xij,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) - de[k] += np.einsum('xji,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) - de[k] += np.einsum('xji,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) - - de[k] += np.einsum('xij,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) - de[k] += np.einsum('xij,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) - de[k] += np.einsum('xji,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) - de[k] += np.einsum('xji,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) - - if abs(hyb) > 1e-10: - de[k] -= np.einsum('xij,ij->x', vk[1,2,:,p0:p1], dmxpy_ab[p0:p1,:]) - de[k] -= np.einsum('xij,ij->x', vk[0,2,:,p0:p1], dmxpy_ba[p0:p1,:]) - de[k] -= np.einsum('xji,ij->x', vk[0,2,:,p0:p1], dmxpy_ab[:,p0:p1]) - de[k] -= np.einsum('xji,ij->x', vk[1,2,:,p0:p1], dmxpy_ba[:,p0:p1]) - - de[k] -= np.einsum('xij,ij->x', vk[1,3,:,p0:p1], dmxmy_ab[p0:p1,:]) - de[k] -= np.einsum('xij,ij->x', vk[0,3,:,p0:p1], dmxmy_ba[p0:p1,:]) - de[k] += np.einsum('xji,ij->x', vk[0,3,:,p0:p1], dmxmy_ab[:,p0:p1]) - de[k] += np.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) - - # de[k] += td_grad.extra_force(ia, locals()) - log.timer('TDUKS nuclear gradients', *time0) - return de - -def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, - with_kxc=True, max_memory=2000): - mol = td_grad.mol - mf = td_grad.base._scf - grids = mf.grids - - ni = mf._numint - xctype = ni._xc_type(xc_code) - - mo_coeff = mf.mo_coeff - mo_occ = mf.mo_occ - nao = mo_coeff[0].shape[0] - - shls_slice = (0, mol.nbas) - ao_loc = mol.ao_loc_nr() - - f1vo = np.zeros((2,2,4,nao,nao)) - deriv = 2 - - if dmoo is not None: - f1oo = np.zeros((2,4,nao,nao)) - else: - f1oo = None - if with_vxc: - v1ao = np.zeros((2,4,nao,nao)) - else: - v1ao = None - if with_kxc: - k1ao_xpy = np.zeros((2,2,4,nao,nao)) - k1ao_xmy = np.zeros((2,2,4,nao,nao)) - deriv = 3 - else: - k1ao_xpy = k1ao_xmy = None - - # create a mc object to use mcfun. - # nimc = numint2c.NumInt2C() - # nimc.collinear = 'mcol' - # nimc.collinear_samples=td_grad.base.collinear_samples - collinear_samples=td_grad.base.collinear_samples - ni = mf._numint - - # calculate the derivatives. - fxc_sf,kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ,collinear_samples,deriv=3)[2:] - p0,p1=0,0 # the two parameters are used for counts the batch of grids. - - if xctype == 'LDA': - def lda_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[0], wv) - for k in range(4): - vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) - - ao_deriv = 1 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - s_s = fxc_sf[...,p0:p1] * weight - - rho1_ab = ni.eval_rho(mol, ao[0], dmvo[0][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao[0], dmvo[0][1], mask, xctype) - # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y - lda_sum_(f1vo[0][1], ao, (rho1_ab+rho1_ba)*s_s*2, mask) - lda_sum_(f1vo[0][0], ao, (rho1_ba+rho1_ab)*s_s*2, mask) - - if with_kxc: - s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight - s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight - lda_sum_(k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab+rho1_ba), mask) - lda_sum_(k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba+rho1_ab), mask) - lda_sum_(k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab+rho1_ba), mask) - lda_sum_(k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba+rho1_ab), mask) - - rho1_ab = ni.eval_rho(mol, ao[0], dmvo[1][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao[0], dmvo[1][1], mask, xctype) - - # py attention to the order of f1vo[1][1] and f1vo[1][0] - lda_sum_(f1vo[1][1], ao, (rho1_ab-rho1_ba)*s_s*2, mask) - lda_sum_(f1vo[1][0], ao, (rho1_ba-rho1_ab)*s_s*2, mask) - - if with_kxc: - # Note the "-" - lda_sum_(k1ao_xmy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab-rho1_ba), mask) - lda_sum_(k1ao_xmy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba-rho1_ab), mask) - lda_sum_(k1ao_xmy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab-rho1_ba), mask) - lda_sum_(k1ao_xmy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba-rho1_ab), mask) - - rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] - u_u, u_d, d_d = fxc[0].T * weight - if dmoo is not None: - rho2a = ni.eval_rho(mol, ao[0], dmoo[0], mask, xctype, hermi=1) - rho2b = ni.eval_rho(mol, ao[0], dmoo[1], mask, xctype, hermi=1) - lda_sum_(f1oo[0], ao, u_u*rho2a+u_d*rho2b, mask) - lda_sum_(f1oo[1], ao, u_d*rho2a+d_d*rho2b, mask) - if with_vxc: - vrho = vxc[0].T * weight - lda_sum_(v1ao[0], ao, vrho[0], mask) - lda_sum_(v1ao[1], ao, vrho[1], mask) - - elif xctype == 'GGA': - def gga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) - - ao_deriv = 2 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - - rho1_ab = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) - - wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) - gga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) - gga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) - - if with_kxc: - gv_sf = uks_sf_gga_wv2_p((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) - gga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) - gga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) - gga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) - gga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) - - rho1_ab = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) - - wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) - gga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) - gga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) - - if with_kxc: - gv_sf = uks_sf_gga_wv2_m((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) - gga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) - gga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) - gga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) - gga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) - - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] - - if dmoo is not None: - rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) - wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) - gga_sum_(f1oo[0], ao, wv[0], mask) - gga_sum_(f1oo[1], ao, wv[1], mask) - if with_vxc: - wv = numint._uks_gga_wv0(rho, vxc, weight) - gga_sum_(v1ao[0], ao, wv[0], mask) - gga_sum_(v1ao[1], ao, wv[1], mask) - - elif xctype == 'MGGA': - def mgga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - - aow = numint._scale_ao(ao[1], wv[4], aow) - tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[2], wv[4], aow) - tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[3], wv[4], aow) - tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) - rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[4]*2, mask, ao_loc, True) - - ao_deriv = 2 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - ngrid=weight.shape[-1] - - rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) - rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) - # Padding for laplacian - rho1_ab = np.empty((5, ngrid)) - rho1_ba = np.empty((5, ngrid)) - rho1_ab[:4] = rho1_ab_tmp[:4] - rho1_ba[:4] = rho1_ba_tmp[:4] - rho1_ab[4] = rho1_ab_tmp[5] - rho1_ba[4] = rho1_ba_tmp[5] - - wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) - mgga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) - mgga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) - - if with_kxc: - gv_sf = uks_sf_mgga_wv2_p((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) - mgga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) - mgga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) - mgga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) - mgga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) - - rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) - rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) - # Padding for laplacian - rho1_ab = np.empty((5, ngrid)) - rho1_ba = np.empty((5, ngrid)) - rho1_ab[:4] = rho1_ab_tmp[:4] - rho1_ba[:4] = rho1_ba_tmp[:4] - rho1_ab[4] = rho1_ab_tmp[5] - rho1_ba[4] = rho1_ba_tmp[5] - - wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) - mgga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) - mgga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) - - if with_kxc: - gv_sf = uks_sf_mgga_wv2_m((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) - mgga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) - mgga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) - mgga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) - mgga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) - - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] - - if dmoo is not None: - rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) - wv_tmp = numint._uks_mgga_wv1(rho, rho2, vxc, fxc, weight) - # # Padding for laplacian - wv = np.empty((2,5,ngrid)) - wv[0][:4] = wv_tmp[0][:4] - wv[0][4] = wv_tmp[0][5] - wv[1][:4] = wv_tmp[1][:4] - wv[1][4] = wv_tmp[1][5] - - mgga_sum_(f1oo[0], ao, wv[0], mask) - mgga_sum_(f1oo[1], ao, wv[1], mask) - - if with_vxc: - wv_tmp = numint._uks_mgga_wv0(rho, vxc, weight) - # # Padding for laplacian - wv = np.empty((2,5,ngrid)) - wv[0][:4] = wv_tmp[0][:4] - wv[0][4] = wv_tmp[0][5] - wv[1][:4] = wv_tmp[1][:4] - wv[1][4] = wv_tmp[1][5] - - mgga_sum_(v1ao[0], ao, wv[0], mask) - mgga_sum_(v1ao[1], ao, wv[1], mask) - - else: - raise NotImplementedError(f'td-uks for functional {xc_code}') - - f1vo[:,:,1:] *= -1 - if f1oo is not None: f1oo[:,1:] *= -1 - if v1ao is not None: v1ao[:,1:] *= -1 - if with_kxc: - k1ao_xpy[:,:,1:] *= -1 - k1ao_xmy[:,:,1:] *= -1 - return f1vo, f1oo, v1ao, (k1ao_xpy,k1ao_xmy) - -def uks_sf_gga_wv1(rho1, fxc_sf,weight): - # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - wv_ab, wv_ba = np.empty((2,4,ngrid)) - wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) - wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) - # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 - # wv_ba[0] = wv_ba[0] *2 *.5 - - # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - wv_ab[1:] *=2.0 - wv_ba[1:] *=2.0 - return wv_ab*weight, wv_ba*weight - -def uks_sf_gga_wv2_p(rho1, kxc_sf,weight): - # kxc_sf with a shape (4,4,2,4,ngrid), 4 means I,\nabla_x,y,z, - # 0: n, \nabla_x,y,z n; 1: s, \nabla_x,y,z s. - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,4,ngrid)) - # Note *2 and *0.5 like in function uks_sf_gga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) - - gv_ab[0,1:] *=2.0 - gv_ab[1,1:] *=2.0 - gv_ba[0,1:] *=2.0 - gv_ba[1,1:] *=2.0 - return gv_ab*weight, gv_ba*weight - -def uks_sf_gga_wv2_m(rho1, kxc_sf,weight): - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) - - gv_ab[:,1:] *=2.0 - gv_ba[:,1:] *=2.0 - return gv_ab*weight, gv_ba*weight - -def uks_sf_mgga_wv1(rho1, fxc_sf,weight): - rho1_ab,rho1_ba = rho1 - # fxc_sf with a shape (5,5,ngrid), 5 means I, \nabla_x,y,z s, u - # s_s, s_Ns, Ns_s, Ns_Ns, s_u, u_s, u_Ns, Ns_u, u_u - ngrid = weight.shape[-1] - wv_ab, wv_ba = np.empty((2,5,ngrid)) - wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) - wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) - # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 - # wv_ba[0] = wv_ba[0] *2 *.5 - - # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - wv_ab[1:4] *=2.0 - wv_ba[1:4] *=2.0 - # *0.5 below is for tau->ao - wv_ab[4] *= 0.5 - wv_ba[4] *= 0.5 - return wv_ab*weight, wv_ba*weight - -def uks_sf_mgga_wv2_p(rho1, kxc_sf,weight): - rho1_ab,rho1_ba = rho1 - # kxc_sf with a shape (5,5,2,5,ngrid), 5 means s \nabla_x,y,z s, u - # s_s -> 0: n, \nabla_x,y,z n, tau ; 1: s, \nabla_x,y,z s, u - # s_Ns -> - # Ns_s -> - # Ns_Ns -> - # s_u -> - # u_s -> - # u_Ns -> - # Ns_u -> - # u_u -> - ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) - - gv_ab[:,1:4] *=2.0 - gv_ba[:,1:4] *=2.0 - gv_ab[:,4] *= 0.5 - gv_ba[:,4] *= 0.5 - return gv_ab*weight, gv_ba*weight - -def uks_sf_mgga_wv2_m(rho1, kxc_sf,weight): - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) - - gv_ab[:,1:4] *=2.0 - gv_ba[:,1:4] *=2.0 - gv_ab[:,4] *= 0.5 - gv_ba[:,4] *= 0.5 - return gv_ab*weight, gv_ba*weight - -def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): - mol = td_grad.base._scf.mol - mf = td_grad.base._scf - grids = mf.grids - - ni = mf._numint - xctype = ni._xc_type(xc_code) - - mo_coeff = mf.mo_coeff - mo_occ = mf.mo_occ - nao = mo_coeff[0].shape[0] - - shls_slice = (0, mol.nbas) - ao_loc = mol.ao_loc_nr() - - dmvo = [(dmvo[0]+dmvo[0].T)*.5, - (dmvo[1]+dmvo[1].T)*.5] - - f1vo = np.zeros((2,4,nao,nao)) - deriv = 2 - - if xctype == 'LDA': - def lda_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[0], wv) - for k in range(4): - vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) - - ao_deriv = 1 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] - u_u, u_d, d_d = fxc[0].T * weight - rho1a = ni.eval_rho(mol, ao[0], dmvo[0], mask, xctype, hermi=1) - rho1b = ni.eval_rho(mol, ao[0], dmvo[1], mask, xctype, hermi=1) - - lda_sum_(f1vo[0], ao, u_u*rho1a+u_d*rho1b, mask) - lda_sum_(f1vo[1], ao, u_d*rho1a+d_d*rho1b, mask) - - elif xctype == 'GGA': - def gga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) - ao_deriv = 2 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] - - rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) - wv = numint._uks_gga_wv1(rho, rho1, vxc, fxc, weight) - gga_sum_(f1vo[0], ao, wv[0], mask) - gga_sum_(f1vo[1], ao, wv[1], mask) - - elif xctype == 'MGGA': - def mgga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - - aow = numint._scale_ao(ao[1], wv[5], aow) - tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[2], wv[5], aow) - tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[3], wv[5], aow) - tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) - rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[5]*2, mask, ao_loc, True) - - ao_deriv = 2 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] - - rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) - wv = numint._uks_mgga_wv1(rho, rho1, vxc, fxc, weight) - mgga_sum_(f1vo[0], ao, wv[0], mask) - mgga_sum_(f1vo[1], ao, wv[1], mask) - - vxc = fxc = rho = rho1 = None - - elif xctype == 'HF': - pass - else: - raise NotImplementedError(f'td-uks for functional {xc_code}') - - f1vo[:,1:] *= -1 - return f1vo - -class Gradients(tdrhf_grad.Gradients): - @lib.with_doc(grad_elec.__doc__) - def grad_elec(self, xy, singlet=None, atmlst=None): - return grad_elec(self, xy, atmlst, self.max_memory, self.verbose) - -Grad = Gradients - -from pyscf import sftda -sftda.uks_sf.TDA_SF.Gradients = sftda.uks_sf.TDDFT_SF.Gradients = lib.class_as_method(Gradients) \ No newline at end of file From c1e6d330fcbd1355484228f81667939da57694fd Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 18 Sep 2025 16:22:52 +0800 Subject: [PATCH 09/32] add some comments --- gpu4pyscf/tdscf/tests/test_sftddft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu4pyscf/tdscf/tests/test_sftddft.py b/gpu4pyscf/tdscf/tests/test_sftddft.py index 903d2d3cb..8e9938b74 100644 --- a/gpu4pyscf/tdscf/tests/test_sftddft.py +++ b/gpu4pyscf/tdscf/tests/test_sftddft.py @@ -170,5 +170,5 @@ def test_tdhf(self): self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) if __name__ == "__main__": - print("Full Tests for spin-flip-TDA and spin-flip-TDDFT") + print("Full Tests for spin-flip-TDA and spin-flip-TDDFT using multi-collinear functionals") unittest.main() From a4aaec3d9b8f6419ffec4ad94a812c21c6f91d39 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 29 Sep 2025 10:33:41 +0800 Subject: [PATCH 10/32] fix the codes under reviews --- gpu4pyscf/tdscf/_uhf_resp_sf.py | 46 +++++++++-------------- gpu4pyscf/tdscf/tests/test_sftddft.py | 28 ++++++-------- gpu4pyscf/tdscf/tests/test_sftddft_col.py | 21 +++++------ gpu4pyscf/tdscf/uhf.py | 2 +- 4 files changed, 39 insertions(+), 58 deletions(-) diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index 16e1590db..9cc37b7ec 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -28,7 +28,7 @@ from concurrent.futures import ThreadPoolExecutor -MAX_GRIDS_PER_TASK = 200 +MAX_GRIDS_PER_TASK = 65536 def _prange(start, end, step): '''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...''' @@ -47,36 +47,26 @@ def _make_paxis_samples(spin_samples): return rt, wt -def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200, workers=1): +def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200): assert deriv < 5 if rho_tmz.dtype != cp.double: raise RuntimeError('rho and mz must be real') ngrids = rho_tmz.shape[-1] - grids_per_task = min(ngrids//(workers*3)+1, MAX_GRIDS_PER_TASK) - - if workers == 1: - results = [] - for p0, p1 in _prange(0, ngrids, grids_per_task): - r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) - results.append(r) - else: - print(collinear_samples) - executor = ThreadPoolExecutor - - with executor(max_workers=workers) as ex: - futures = [] - for p0, p1 in _prange(0, ngrids, grids_per_task): - f = ex.submit(_eval_xc_sf, func, rho_tmz[...,p0:p1], deriv, collinear_samples) - futures.append(f) - results = [f.result() for f in futures] + grids_per_task = min(ngrids//3+1, MAX_GRIDS_PER_TASK) + + print(collinear_samples) + results = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) + results.append(r) return [None if x[0] is None else cp.concatenate(x, axis=-1) for x in zip(*results)] + def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): ngrids = rho_tmz.shape[-1] # samples on z=cos(theta) and their weights between [0, 1] sgridz, weights = _make_paxis_samples(collinear_samples) - blksize = int(cp.ceil(1e5 / ngrids)) * 8 if rho_tmz.ndim == 2: nvar = 1 @@ -84,13 +74,12 @@ def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): nvar = rho_tmz.shape[1] # spin-flip part fxc_sf = 0.0 - for p0, p1 in _prange(0, weights.size, blksize): - rho = _project_spin_paxis2(rho_tmz, sgridz[p0:p1]) - fxc = func(rho, deriv)[2] - fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, p1 - p0) - if not isinstance(fxc, cp.ndarray): - fxc = cp.array(fxc) - fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) + rho = _project_spin_paxis2(rho_tmz, sgridz) + fxc = func(rho, deriv)[2] + fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, weights.size) + if not isinstance(fxc, cp.ndarray): + fxc = cp.array(fxc) + fxc_sf += fxc[1,:,1].dot(weights) return None,None,fxc_sf @@ -204,12 +193,11 @@ def mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples): xctype = ni._xc_type(xc_code) fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype) - nproc = 1 def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): res = eval_xc_eff_sf( fn_eval_xc, rho, deriv, - collinear_samples=collinear_samples, workers=nproc) + collinear_samples=collinear_samples) return [x if x is None else cp.asarray(x) for x in res] return eval_xc_eff diff --git a/gpu4pyscf/tdscf/tests/test_sftddft.py b/gpu4pyscf/tdscf/tests/test_sftddft.py index 8e9938b74..533170a83 100644 --- a/gpu4pyscf/tdscf/tests/test_sftddft.py +++ b/gpu4pyscf/tdscf/tests/test_sftddft.py @@ -45,34 +45,30 @@ def setUpClass(cls): mol.spin = 2 mol.basis = '631g' cls.mol = mol.build() - cls.mf = mol.UHF().to_gpu().run() - cls.mflda = mol.UKS(xc='svwn').to_gpu().run() - cls.mfb3lyp = mol.UKS(xc='b3lyp').to_gpu().run() - cls.mftpss = mol.UKS(xc='tpss').to_gpu().run() @classmethod def tearDownClass(cls): cls.mol.stdout.close() def test_hf_tda(self): - mf = self.mf + mf = self.mol.UHF().to_gpu().run() # sftddft not available in pyscf main branch. References are created # using the sftda module from pyscf-forge ref = [ 0.46644071, 0.55755649, 1.05310518] - td = mf.SFTDA().run(extype=0, conv_tol=1e-7) + td = mf.SFTDA().run(extype=0, conv_tol=1e-5) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) a, b = td.get_ab() e = diagonalize_tda(a[0], nroots=3)[0] self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) ref = [-0.21574567, 0.00270390, 0.03143914] - td = mf.SFTDA().run(extype=1, conv_tol=1e-7) + td = mf.SFTDA().run(extype=1, conv_tol=1e-5) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) e = diagonalize_tda(a[1], nroots=3)[0] self.assertAlmostEqual(abs(e - td.e).max(), 0, 6) def test_mcol_svwn_tda(self): - mf = self.mflda + mf = self.mol.UKS(xc='svwn').to_gpu().run() # sftddft not available in pyscf main branch. References are created # using the sftda module from pyscf-forge ref = [0.45022394, 0.57917576, 1.04475443] @@ -93,7 +89,7 @@ def test_mcol_svwn_tda(self): td.collinear = 'mcol' td.extype = 1 td.collinear_samples=200 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.kernel() e = diagonalize_tda(a[1], nroots=3)[0] @@ -101,7 +97,7 @@ def test_mcol_svwn_tda(self): self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) def test_mcol_b3lyp_tda(self): - mf = self.mfb3lyp + mf = self.mol.UKS(xc='b3lyp').to_gpu().run() # sftddft not available in pyscf main branch. References are created # using the sftda module from pyscf-forge ref = [0.45941163, 0.57799537, 1.06629197] @@ -122,7 +118,7 @@ def test_mcol_b3lyp_tda(self): td.collinear = 'mcol' td.extype = 1 td.collinear_samples=200 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.kernel() e = diagonalize_tda(a[1], nroots=3)[0] @@ -130,7 +126,7 @@ def test_mcol_b3lyp_tda(self): self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) def test_mcol_tpss_tda(self): - mf = self.mftpss + mf = self.mol.UKS(xc='tpss').to_gpu().run() # sftddft not available in pyscf main branch. References are created # using the sftda module from pyscf-forge ref = [0.4498647 , 0.57071842, 1.0544106 ] @@ -151,7 +147,7 @@ def test_mcol_tpss_tda(self): td.collinear = 'mcol' td.extype = 1 td.collinear_samples=200 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.kernel() e = diagonalize_tda(a[1], nroots=3)[0] @@ -160,13 +156,13 @@ def test_mcol_tpss_tda(self): @unittest.skip('Numerical issues encountered in non-hermitian diagonalization') def test_tdhf(self): - mf = self.mf + mf = self.mol.UHF().to_gpu().run() ref = [1.74385401, 9.38227395, 14.90168875] - td = mf.SFTDHF().run(extype=0, conv_tol=1e-7) + td = mf.SFTDHF().run(extype=0, conv_tol=1e-5) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) ref = [0.41701647, 9.59644331, 22.99972711] - td = mf.SFTDHF().run(extype=1, conv_tol=1e-7) + td = mf.SFTDHF().run(extype=1, conv_tol=1e-5) self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6) if __name__ == "__main__": diff --git a/gpu4pyscf/tdscf/tests/test_sftddft_col.py b/gpu4pyscf/tdscf/tests/test_sftddft_col.py index 6dec9bc2e..45b888571 100644 --- a/gpu4pyscf/tdscf/tests/test_sftddft_col.py +++ b/gpu4pyscf/tdscf/tests/test_sftddft_col.py @@ -49,21 +49,18 @@ def setUpClass(cls): mol.spin = 2 mol.basis = '631g' cls.mol = mol.build() - cls.mflda = mol.UKS(xc='svwn').to_gpu().run() - cls.mfb3lyp = mol.UKS(xc='b3lyp').to_gpu().run() - cls.mftpss = mol.UKS(xc='tpss').to_gpu().run() @classmethod def tearDownClass(cls): cls.mol.stdout.close() def test_lda_tda(self): - mf = self.mflda + mf = self.mol.UKS(xc='svwn').to_gpu().run() na, nb = mf.mol.nelec td = mf.SFTDA() td.extype = 0 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.nroots = 3 td.collinear = 'col' td.run() @@ -74,7 +71,7 @@ def test_lda_tda(self): td = mf.SFTDA() td.extype = 1 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.nroots = 3 td.collinear = 'col' td.run() @@ -83,12 +80,12 @@ def test_lda_tda(self): assert td.e[0] - (mf.mo_energy[1][nb] - mf.mo_energy[0][na-1]) < 1e-6 def test_b3lyp_tda(self): - mf = self.mfb3lyp + mf = self.mol.UKS(xc='b3lyp').to_gpu().run() na, nb = mf.mol.nelec td = mf.SFTDA() td.extype = 0 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.nroots = 3 td.collinear = 'col' td.run() @@ -99,7 +96,7 @@ def test_b3lyp_tda(self): td = mf.SFTDA() td.extype = 1 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.nroots = 3 td.collinear = 'col' td.run() @@ -108,12 +105,12 @@ def test_b3lyp_tda(self): assert td.e[0] - (mf.mo_energy[1][nb] - mf.mo_energy[0][na-1]) < 1e-6 def test_tpss_tda(self): - mf = self.mftpss + mf = self.mol.UKS(xc='tpss').to_gpu().run() na, nb = mf.mol.nelec td = mf.SFTDA() td.extype = 0 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.nroots = 3 td.collinear = 'col' td.run() @@ -124,7 +121,7 @@ def test_tpss_tda(self): td = mf.SFTDA() td.extype = 1 - td.conv_tol = 1e-7 + td.conv_tol = 1e-5 td.nroots = 3 td.collinear = 'col' td.run() diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index bcddbedcb..82ee930aa 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -975,7 +975,7 @@ def kernel(self, x0=None, nstates=None): mf = self._scf ni = mf._numint if not ni.libxc.is_hybrid_xc(mf.xc): - self.converged = [True,] + self.converged = [True for _ in range(self.nstates)] self.e, xs = self._init_guess(self._scf, self.nstates) self.xy = [(x, 0) for x in xs] self._finalize() From 23523d4b210f955a20c0c7d35a746ac2017c7655 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 20 Oct 2025 09:15:58 +0800 Subject: [PATCH 11/32] review the codes --- gpu4pyscf/tdscf/_uhf_resp_sf.py | 3 +-- gpu4pyscf/tdscf/uhf.py | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index 9cc37b7ec..18ede0e32 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -52,9 +52,8 @@ def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200): if rho_tmz.dtype != cp.double: raise RuntimeError('rho and mz must be real') ngrids = rho_tmz.shape[-1] - grids_per_task = min(ngrids//3+1, MAX_GRIDS_PER_TASK) + grids_per_task = MAX_GRIDS_PER_TASK - print(collinear_samples) results = [] for p0, p1 in _prange(0, ngrids, grids_per_task): r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index e1a11cd4c..d7dc3d830 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -689,6 +689,7 @@ def gen_response(self, mo_coeff=None, mo_occ=None, hermi=0): def get_ab(self, mf=None): if mf is None: mf = self._scf + return get_ab(self, mf) def nac_method(self): @@ -936,10 +937,10 @@ def init_guess(self, mf=None, nstates=None, wfnsym=None): def dump_flags(self, verbose=None): TDBase.dump_flags(self, verbose) - logger.info(self, 'extype = %s', self.extype) - logger.info(self, 'collinear = %s', self.collinear) + logger.note(self, 'extype = %s', self.extype) + logger.note(self, 'collinear = %s', self.collinear) if self.collinear == 'mcol': - logger.info(self, 'collinear_samples = %s', self.collinear_samples) + logger.note(self, 'collinear_samples = %s', self.collinear_samples) return self def check_sanity(self): From 5de079862eec320832fe4bb46d27d584355fa093 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 20 Oct 2025 09:22:16 +0800 Subject: [PATCH 12/32] add a new file --- gpu4pyscf/grad/tduks_sf.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 gpu4pyscf/grad/tduks_sf.py diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py new file mode 100644 index 000000000..20d668c30 --- /dev/null +++ b/gpu4pyscf/grad/tduks_sf.py @@ -0,0 +1,14 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + From bacd5d2d79ebd67d91511b424a3a3223fdc3e3cd Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 20 Oct 2025 10:07:17 +0800 Subject: [PATCH 13/32] use the pyscf-forge sf-tddft gradient --- gpu4pyscf/grad/tduks_sf.py | 806 +++++++++++++++++++++++++++++++++++++ 1 file changed, 806 insertions(+) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 20d668c30..fd7d3d9e3 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -12,3 +12,809 @@ # See the License for the specific language governing permissions and # limitations under the License. +from functools import reduce +import numpy as np +import cupy as cp +from pyscf import lib +from pyscf.lib import logger +from pyscf.scf import ucphf +from pyscf.dft import numint +from pyscf.dft import numint2c +from pyscf.grad import rks as rks_grad +from pyscf.grad import tdrhf as tdrhf_grad +from pyscf.sftda.numint2c_sftd import cache_xc_kernel_sf + + +def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): + ''' Spin flip TDDFT gradient in UKS framework. Note: This function supports + both TDA or TDDFT results. + + Parameters + ---------- + Args: + td_grad : sftda.TDA_SF object. + + Returns: + The gradient of excited states: Ei^{\\xi} = E0^{\\xi} + wi^{\\xi} + ''' + log = logger.new_logger(td_grad, verbose) + time0 = logger.process_clock(), logger.perf_counter() + + mol = td_grad.mol + mf = td_grad.base._scf + + mo_coeff = mf.mo_coeff + mo_energy = mf.mo_energy + mo_occ = mf.mo_occ + occidxa = np.where(mo_occ[0]>0)[0] + occidxb = np.where(mo_occ[1]>0)[0] + viridxa = np.where(mo_occ[0]==0)[0] + viridxb = np.where(mo_occ[1]==0)[0] + nocca = len(occidxa) + noccb = len(occidxb) + nvira = len(viridxa) + nvirb = len(viridxb) + orboa = mo_coeff[0][:,occidxa] + orbob = mo_coeff[1][:,occidxb] + orbva = mo_coeff[0][:,viridxa] + orbvb = mo_coeff[1][:,viridxb] + nao = mo_coeff[0].shape[0] + + nmoa = nocca + nvira + nmob = noccb + nvirb + + if td_grad.base.extype==0 or 1: + # x_ab, a means vira, b means occb + (x_ab, x_ba), (y_ab, y_ba) = x_y + xpy_ab = (x_ab + y_ab).T + xpy_ba = (x_ba + y_ba).T + xmy_ab = (x_ab - y_ab).T + xmy_ba = (x_ba - y_ba).T + + dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 + dvv_b = np.einsum('ai,bi->ab', xpy_ba, xpy_ba) + np.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 + doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 + doo_a =-np.einsum('ai,aj->ij', xpy_ba, xpy_ba) - np.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 + + dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} + dmxpy_ba = reduce(np.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} + dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} + dmxmy_ba = reduce(np.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} + + dmzoo_a = reduce(np.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} + dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} + dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) + dmzoo_b+= reduce(np.dot, (orbvb, dvv_b, orbvb.T)) + + ni = mf._numint + ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) + + # used by mcfun. + rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, + mo_coeff, mo_occ, spin=1) + + f1vo, f1oo, vxc1, k1ao = \ + _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), + (dmzoo_a,dmzoo_b), True, True, max_memory) + k1ao_xpy, k1ao_xmy = k1ao + + # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf + # f1oo, (2,4,nao,nao), 2T with fxc_sc + # vxc1, ao with v1^{\sigma} + # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc + + if abs(hyb) > 1e-10: + dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj, vk = mf.get_jk(mol, dm, hermi=0) + vk *= hyb + if abs(omega) > 1e-10: + vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + vj = vj.reshape(2,3,nao,nao) + vk = vk.reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] + veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + veff = - vk[:,1] + f1vo[0,:,0] + veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + veff = -vk[:,2] + f1vo[1,:,0] + veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + + else: + dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] + veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + veff = f1vo[0,:,0] + veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + veff = f1vo[1,:,0] + veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + + vresp = mf.gen_response(hermi=1) + + def fvind(x): + dm1 = np.empty((2,nao,nao)) + x_a = x[0,:nvira*nocca].reshape(nvira,nocca) + x_b = x[0,nvira*nocca:].reshape(nvirb,noccb) + dm_a = reduce(np.dot, (orbva, x_a, orboa.T)) + dm_b = reduce(np.dot, (orbvb, x_b, orbob.T)) + dm1[0] = (dm_a + dm_a.T).real + dm1[1] = (dm_b + dm_b.T).real + + v1 = vresp(dm1) + v1a = reduce(np.dot, (orbva.T, v1[0], orboa)) + v1b = reduce(np.dot, (orbvb.T, v1[1], orbob)) + return np.hstack((v1a.ravel(), v1b.ravel())) + + z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), + max_cycle=td_grad.cphf_max_cycle, + tol=td_grad.cphf_conv_tol)[0] + + time1 = log.timer('Z-vector using UCPHF solver', *time0) + + z1ao = np.zeros((2,nao,nao)) + z1ao[0] += reduce(np.dot, (orbva, z1a, orboa.T)) + z1ao[1] += reduce(np.dot, (orbvb, z1b, orbob.T)) + + veff = vresp((z1ao+z1ao.transpose(0,2,1))*0.5) + + im0a = np.zeros((nmoa,nmoa)) + im0b = np.zeros((nmob,nmob)) + + im0a[:nocca,:nocca] = reduce(np.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 + im0b[:noccb,:noccb] = reduce(np.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 + im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 + im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 + im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + + im0a[nocca:,nocca:] = np.einsum('bi,ai->ab', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 + im0b[noccb:,noccb:] = np.einsum('bi,ai->ab', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 + im0a[nocca:,nocca:] += np.einsum('bi,ai->ab', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + im0b[noccb:,noccb:] += np.einsum('bi,ai->ab', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + + im0a[nocca:,:nocca] = np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) + im0b[noccb:,:noccb] = np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) + im0a[nocca:,:nocca] += np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) + im0b[noccb:,:noccb] += np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) + + zeta_a = (mo_energy[0][:,None] + mo_energy[0]) * .5 + zeta_b = (mo_energy[1][:,None] + mo_energy[1]) * .5 + zeta_a[nocca:,:nocca] = mo_energy[0][:nocca] + zeta_b[noccb:,:noccb] = mo_energy[1][:noccb] + zeta_a[:nocca,nocca:] = mo_energy[0][nocca:] + zeta_b[:noccb,noccb:] = mo_energy[1][noccb:] + + dm1a = np.zeros((nmoa,nmoa)) + dm1b = np.zeros((nmob,nmob)) + dm1a[:nocca,:nocca] = doo_a * .5 + dm1b[:noccb,:noccb] = doo_b * .5 + dm1a[nocca:,nocca:] = dvv_a * .5 + dm1b[noccb:,noccb:] = dvv_b * .5 + + dm1a[nocca:,:nocca] = z1a *.5 + dm1b[noccb:,:noccb] = z1b *.5 + + dm1a[:nocca,:nocca] += np.eye(nocca) # for ground state + dm1b[:noccb,:noccb] += np.eye(noccb) + + im0a = reduce(np.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) + im0b = reduce(np.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) + im0 = im0a + im0b + + # Initialize hcore_deriv with the underlying SCF object because some + # extensions (e.g. QM/MM, solvent) modifies the SCF object only. + mf_grad = mf.nuc_grad_method() + hcore_deriv = mf_grad.hcore_generator(mol) + + # -mol.intor('int1e_ipovlp', comp=3) + s1 = mf_grad.get_ovlp(mol) + + dmz1doo_a = z1ao[0] + dmzoo_a + dmz1doo_b = z1ao[1] + dmzoo_b + oo0a = reduce(np.dot, (orboa, orboa.T)) + oo0b = reduce(np.dot, (orbob, orbob.T)) + + as_dm1 = oo0a + oo0b + (dmz1doo_a + dmz1doo_b) * .5 + + if abs(hyb) > 1e-10: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj, vk = td_grad.get_jk(mol, dm) + vj = vj.reshape(2,4,3,nao,nao) + vk = vk.reshape(2,4,3,nao,nao) * hyb + vj[:,2:4] *= 0.0 + if abs(omega) > 1e-10: + with mol.with_range_coulomb(omega): + vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + + veff1 = np.zeros((2,4,3,nao,nao)) + veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] + else: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T) + vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + vj[:,2] *= 0.0 + veff1 = np.zeros((2,4,3,nao,nao)) + veff1[:,:3] = vj[0] + vj[1] + + fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) + + veff1[:,0] += vxc1[:,1:] + veff1[:,1] += (f1oo[:,1:] + fxcz1[:,1:])*2 + veff1[0,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] + k1ao_xpy[1,0,1:] + k1ao_xpy[1,1,1:] + +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] + k1ao_xmy[1,0,1:] + k1ao_xmy[1,1,1:])*2 + veff1[1,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] - k1ao_xpy[1,0,1:] - k1ao_xpy[1,1,1:] + +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] - k1ao_xmy[1,0,1:] - k1ao_xmy[1,1,1:])*2 + + veff1[:,2] += f1vo[0,:,1:] + veff1[:,3] += f1vo[1,:,1:] + veff1a, veff1b = veff1 + time1 = log.timer('2e AO integral derivatives', *time1) + + if atmlst is None: + atmlst = range(mol.natm) + offsetdic = mol.offset_nr_by_atom() + de = np.zeros((len(atmlst),3)) + + for k, ia in enumerate(atmlst): + shl0, shl1, p0, p1 = offsetdic[ia] + + # Ground state gradients + h1ao = hcore_deriv(ia) + de[k] = np.einsum('xpq,pq->x', h1ao, as_dm1) + de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) + de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) + de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) + de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) + + de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1doo_a[p0:p1]) *.5 + de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doo_b[p0:p1]) *.5 + de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 + de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 + + de[k] -= np.einsum('xpq,pq->x', s1[:,p0:p1], im0[p0:p1]) + de[k] -= np.einsum('xqp,pq->x', s1[:,p0:p1], im0[:,p0:p1]) + + de[k] += np.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 + de[k] += np.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 + + de[k] += np.einsum('xij,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) + de[k] += np.einsum('xij,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) + de[k] += np.einsum('xji,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) + de[k] += np.einsum('xji,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) + + de[k] += np.einsum('xij,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) + de[k] += np.einsum('xij,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) + de[k] += np.einsum('xji,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) + de[k] += np.einsum('xji,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) + + if abs(hyb) > 1e-10: + de[k] -= np.einsum('xij,ij->x', vk[1,2,:,p0:p1], dmxpy_ab[p0:p1,:]) + de[k] -= np.einsum('xij,ij->x', vk[0,2,:,p0:p1], dmxpy_ba[p0:p1,:]) + de[k] -= np.einsum('xji,ij->x', vk[0,2,:,p0:p1], dmxpy_ab[:,p0:p1]) + de[k] -= np.einsum('xji,ij->x', vk[1,2,:,p0:p1], dmxpy_ba[:,p0:p1]) + + de[k] -= np.einsum('xij,ij->x', vk[1,3,:,p0:p1], dmxmy_ab[p0:p1,:]) + de[k] -= np.einsum('xij,ij->x', vk[0,3,:,p0:p1], dmxmy_ba[p0:p1,:]) + de[k] += np.einsum('xji,ij->x', vk[0,3,:,p0:p1], dmxmy_ab[:,p0:p1]) + de[k] += np.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) + + # de[k] += td_grad.extra_force(ia, locals()) + log.timer('TDUKS nuclear gradients', *time0) + return de + +def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, + with_kxc=True, max_memory=2000): + mol = td_grad.mol + mf = td_grad.base._scf + grids = mf.grids + + ni = mf._numint + xctype = ni._xc_type(xc_code) + + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + nao = mo_coeff[0].shape[0] + + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + + f1vo = np.zeros((2,2,4,nao,nao)) + deriv = 2 + + if dmoo is not None: + f1oo = np.zeros((2,4,nao,nao)) + else: + f1oo = None + if with_vxc: + v1ao = np.zeros((2,4,nao,nao)) + else: + v1ao = None + if with_kxc: + k1ao_xpy = np.zeros((2,2,4,nao,nao)) + k1ao_xmy = np.zeros((2,2,4,nao,nao)) + deriv = 3 + else: + k1ao_xpy = k1ao_xmy = None + + # create a mc object to use mcfun. + nimc = numint2c.NumInt2C() + nimc.collinear = 'mcol' + nimc.collinear_samples=td_grad.base.collinear_samples + + # calculate the derivatives. + fxc_sf,kxc_sf = cache_xc_kernel_sf(nimc,mol,mf.grids,mf.xc,mo_coeff,mo_occ,deriv=3,spin=1)[2:] + p0,p1=0,0 # the two parameters are used for counts the batch of grids. + + if xctype == 'LDA': + def lda_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[0], wv) + for k in range(4): + vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) + + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + s_s = fxc_sf[...,p0:p1] * weight + + rho1_ab = ni.eval_rho(mol, ao[0], dmvo[0][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao[0], dmvo[0][1], mask, xctype) + # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y + lda_sum_(f1vo[0][1], ao, (rho1_ab+rho1_ba)*s_s*2, mask) + lda_sum_(f1vo[0][0], ao, (rho1_ba+rho1_ab)*s_s*2, mask) + + if with_kxc: + s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight + s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight + lda_sum_(k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab+rho1_ba), mask) + lda_sum_(k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba+rho1_ab), mask) + lda_sum_(k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab+rho1_ba), mask) + lda_sum_(k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba+rho1_ab), mask) + + rho1_ab = ni.eval_rho(mol, ao[0], dmvo[1][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao[0], dmvo[1][1], mask, xctype) + + # py attention to the order of f1vo[1][1] and f1vo[1][0] + lda_sum_(f1vo[1][1], ao, (rho1_ab-rho1_ba)*s_s*2, mask) + lda_sum_(f1vo[1][0], ao, (rho1_ba-rho1_ab)*s_s*2, mask) + + if with_kxc: + # Note the "-" + lda_sum_(k1ao_xmy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab-rho1_ba), mask) + lda_sum_(k1ao_xmy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba-rho1_ab), mask) + lda_sum_(k1ao_xmy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab-rho1_ba), mask) + lda_sum_(k1ao_xmy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba-rho1_ab), mask) + + rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + u_u, u_d, d_d = fxc[0].T * weight + if dmoo is not None: + rho2a = ni.eval_rho(mol, ao[0], dmoo[0], mask, xctype, hermi=1) + rho2b = ni.eval_rho(mol, ao[0], dmoo[1], mask, xctype, hermi=1) + lda_sum_(f1oo[0], ao, u_u*rho2a+u_d*rho2b, mask) + lda_sum_(f1oo[1], ao, u_d*rho2a+d_d*rho2b, mask) + if with_vxc: + vrho = vxc[0].T * weight + lda_sum_(v1ao[0], ao, vrho[0], mask) + lda_sum_(v1ao[1], ao, vrho[1], mask) + + elif xctype == 'GGA': + def gga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) + + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + + rho1_ab = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) + + wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) + gga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) + gga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_gga_wv2_p((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) + gga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) + gga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) + gga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) + gga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) + + rho1_ab = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) + rho1_ba = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) + + wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) + gga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) + gga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_gga_wv2_m((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) + gga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) + gga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) + gga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) + gga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) + + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + + if dmoo is not None: + rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) + wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) + gga_sum_(f1oo[0], ao, wv[0], mask) + gga_sum_(f1oo[1], ao, wv[1], mask) + if with_vxc: + wv = numint._uks_gga_wv0(rho, vxc, weight) + gga_sum_(v1ao[0], ao, wv[0], mask) + gga_sum_(v1ao[1], ao, wv[1], mask) + + elif xctype == 'MGGA': + def mgga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + + aow = numint._scale_ao(ao[1], wv[4], aow) + tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[2], wv[4], aow) + tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[3], wv[4], aow) + tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) + rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[4]*2, mask, ao_loc, True) + + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0 = p1 + p1+= weight.shape[0] + ngrid=weight.shape[-1] + + rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) + rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) + # Padding for laplacian + rho1_ab = np.empty((5, ngrid)) + rho1_ba = np.empty((5, ngrid)) + rho1_ab[:4] = rho1_ab_tmp[:4] + rho1_ba[:4] = rho1_ba_tmp[:4] + rho1_ab[4] = rho1_ab_tmp[5] + rho1_ba[4] = rho1_ba_tmp[5] + + wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) + mgga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) + mgga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_mgga_wv2_p((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) + mgga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) + mgga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) + mgga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) + mgga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) + + rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) + rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) + # Padding for laplacian + rho1_ab = np.empty((5, ngrid)) + rho1_ba = np.empty((5, ngrid)) + rho1_ab[:4] = rho1_ab_tmp[:4] + rho1_ba[:4] = rho1_ba_tmp[:4] + rho1_ab[4] = rho1_ab_tmp[5] + rho1_ba[4] = rho1_ba_tmp[5] + + wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) + mgga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) + mgga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) + + if with_kxc: + gv_sf = uks_sf_mgga_wv2_m((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) + mgga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) + mgga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) + mgga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) + mgga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) + + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + + if dmoo is not None: + rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) + wv_tmp = numint._uks_mgga_wv1(rho, rho2, vxc, fxc, weight) + # # Padding for laplacian + wv = np.empty((2,5,ngrid)) + wv[0][:4] = wv_tmp[0][:4] + wv[0][4] = wv_tmp[0][5] + wv[1][:4] = wv_tmp[1][:4] + wv[1][4] = wv_tmp[1][5] + + mgga_sum_(f1oo[0], ao, wv[0], mask) + mgga_sum_(f1oo[1], ao, wv[1], mask) + + if with_vxc: + wv_tmp = numint._uks_mgga_wv0(rho, vxc, weight) + # # Padding for laplacian + wv = np.empty((2,5,ngrid)) + wv[0][:4] = wv_tmp[0][:4] + wv[0][4] = wv_tmp[0][5] + wv[1][:4] = wv_tmp[1][:4] + wv[1][4] = wv_tmp[1][5] + + mgga_sum_(v1ao[0], ao, wv[0], mask) + mgga_sum_(v1ao[1], ao, wv[1], mask) + + else: + raise NotImplementedError(f'td-uks for functional {xc_code}') + + f1vo[:,:,1:] *= -1 + if f1oo is not None: f1oo[:,1:] *= -1 + if v1ao is not None: v1ao[:,1:] *= -1 + if with_kxc: + k1ao_xpy[:,:,1:] *= -1 + k1ao_xmy[:,:,1:] *= -1 + return f1vo, f1oo, v1ao, (k1ao_xpy,k1ao_xmy) + +def uks_sf_gga_wv1(rho1, fxc_sf,weight): + # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + wv_ab, wv_ba = np.empty((2,4,ngrid)) + wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) + wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) + # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 + # wv_ba[0] = wv_ba[0] *2 *.5 + + # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + wv_ab[1:] *=2.0 + wv_ba[1:] *=2.0 + return wv_ab*weight, wv_ba*weight + +def uks_sf_gga_wv2_p(rho1, kxc_sf,weight): + # kxc_sf with a shape (4,4,2,4,ngrid), 4 means I,\nabla_x,y,z, + # 0: n, \nabla_x,y,z n; 1: s, \nabla_x,y,z s. + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,4,ngrid)) + # Note *2 and *0.5 like in function uks_sf_gga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) + + gv_ab[0,1:] *=2.0 + gv_ab[1,1:] *=2.0 + gv_ba[0,1:] *=2.0 + gv_ba[1,1:] *=2.0 + return gv_ab*weight, gv_ba*weight + +def uks_sf_gga_wv2_m(rho1, kxc_sf,weight): + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) + + gv_ab[:,1:] *=2.0 + gv_ba[:,1:] *=2.0 + return gv_ab*weight, gv_ba*weight + +def uks_sf_mgga_wv1(rho1, fxc_sf,weight): + rho1_ab,rho1_ba = rho1 + # fxc_sf with a shape (5,5,ngrid), 5 means I, \nabla_x,y,z s, u + # s_s, s_Ns, Ns_s, Ns_Ns, s_u, u_s, u_Ns, Ns_u, u_u + ngrid = weight.shape[-1] + wv_ab, wv_ba = np.empty((2,5,ngrid)) + wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) + wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) + # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 + # wv_ba[0] = wv_ba[0] *2 *.5 + + # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + wv_ab[1:4] *=2.0 + wv_ba[1:4] *=2.0 + # *0.5 below is for tau->ao + wv_ab[4] *= 0.5 + wv_ba[4] *= 0.5 + return wv_ab*weight, wv_ba*weight + +def uks_sf_mgga_wv2_p(rho1, kxc_sf,weight): + rho1_ab,rho1_ba = rho1 + # kxc_sf with a shape (5,5,2,5,ngrid), 5 means s \nabla_x,y,z s, u + # s_s -> 0: n, \nabla_x,y,z n, tau ; 1: s, \nabla_x,y,z s, u + # s_Ns -> + # Ns_s -> + # Ns_Ns -> + # s_u -> + # u_s -> + # u_Ns -> + # Ns_u -> + # u_u -> + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) + + gv_ab[:,1:4] *=2.0 + gv_ba[:,1:4] *=2.0 + gv_ab[:,4] *= 0.5 + gv_ba[:,4] *= 0.5 + return gv_ab*weight, gv_ba*weight + +def uks_sf_mgga_wv2_m(rho1, kxc_sf,weight): + rho1_ab,rho1_ba = rho1 + ngrid = weight.shape[-1] + gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) + gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) + + gv_ab[:,1:4] *=2.0 + gv_ba[:,1:4] *=2.0 + gv_ab[:,4] *= 0.5 + gv_ba[:,4] *= 0.5 + return gv_ab*weight, gv_ba*weight + +def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): + mol = td_grad.base._scf.mol + mf = td_grad.base._scf + grids = mf.grids + + ni = mf._numint + xctype = ni._xc_type(xc_code) + + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + nao = mo_coeff[0].shape[0] + + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + + dmvo = [(dmvo[0]+dmvo[0].T)*.5, + (dmvo[1]+dmvo[1].T)*.5] + + f1vo = np.zeros((2,4,nao,nao)) + deriv = 2 + + if xctype == 'LDA': + def lda_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[0], wv) + for k in range(4): + vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) + + ao_deriv = 1 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] + u_u, u_d, d_d = fxc[0].T * weight + rho1a = ni.eval_rho(mol, ao[0], dmvo[0], mask, xctype, hermi=1) + rho1b = ni.eval_rho(mol, ao[0], dmvo[1], mask, xctype, hermi=1) + + lda_sum_(f1vo[0], ao, u_u*rho1a+u_d*rho1b, mask) + lda_sum_(f1vo[1], ao, u_d*rho1a+d_d*rho1b, mask) + + elif xctype == 'GGA': + def gga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] + + rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) + wv = numint._uks_gga_wv1(rho, rho1, vxc, fxc, weight) + gga_sum_(f1vo[0], ao, wv[0], mask) + gga_sum_(f1vo[1], ao, wv[1], mask) + + elif xctype == 'MGGA': + def mgga_sum_(vmat, ao, wv, mask): + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + + aow = numint._scale_ao(ao[1], wv[5], aow) + tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[2], wv[5], aow) + tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) + aow = numint._scale_ao(ao[3], wv[5], aow) + tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) + vmat[0] += tmp + tmp.T + + rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) + rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[5]*2, mask, ao_loc, True) + + ao_deriv = 2 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), + ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + + rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), + ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) + wv = numint._uks_mgga_wv1(rho, rho1, vxc, fxc, weight) + mgga_sum_(f1vo[0], ao, wv[0], mask) + mgga_sum_(f1vo[1], ao, wv[1], mask) + + vxc = fxc = rho = rho1 = None + + elif xctype == 'HF': + pass + else: + raise NotImplementedError(f'td-uks for functional {xc_code}') + + f1vo[:,1:] *= -1 + return f1vo + +class Gradients(tdrhf_grad.Gradients): + @lib.with_doc(grad_elec.__doc__) + def grad_elec(self, xy, singlet=None, atmlst=None): + return grad_elec(self, xy, atmlst, self.max_memory, self.verbose) + +Grad = Gradients + +from pyscf import sftda +sftda.uks_sf.TDA_SF.Gradients = sftda.uks_sf.TDDFT_SF.Gradients = lib.class_as_method(Gradients) \ No newline at end of file From 60f370cdd95fb817597e6b29093a3cc630b8cf06 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 20 Oct 2025 10:13:23 +0800 Subject: [PATCH 14/32] in writing --- gpu4pyscf/grad/tduks_sf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index fd7d3d9e3..746ee0af5 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -36,6 +36,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): Returns: The gradient of excited states: Ei^{\\xi} = E0^{\\xi} + wi^{\\xi} + This function is based on https://github.com/pyscf/pyscf-forge/blob/master/pyscf/grad/tduks_sf.py ''' log = logger.new_logger(td_grad, verbose) time0 = logger.process_clock(), logger.perf_counter() From 37533fe31c6c8216ed106af94885023f9131e8cd Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 20 Oct 2025 11:20:03 +0800 Subject: [PATCH 15/32] in writting --- gpu4pyscf/grad/tduks_sf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 746ee0af5..2aa9aab63 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -47,6 +47,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): mo_coeff = mf.mo_coeff mo_energy = mf.mo_energy mo_occ = mf.mo_occ + occidxa = np.where(mo_occ[0]>0)[0] occidxb = np.where(mo_occ[1]>0)[0] viridxa = np.where(mo_occ[0]==0)[0] @@ -177,6 +178,8 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + else: + raise RuntimeError("Only spin-flip UHF/UKS is supported") vresp = mf.gen_response(hermi=1) From 91cae3b1ceabed23a548bf9d239e62f7e8baab8b Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 21 Oct 2025 08:58:45 +0800 Subject: [PATCH 16/32] in writing --- gpu4pyscf/grad/tduks_sf.py | 220 ++++++++++++++++++++++++++++++++++++- 1 file changed, 219 insertions(+), 1 deletion(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 2aa9aab63..078f3e45a 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -65,7 +65,225 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): nmoa = nocca + nvira nmob = noccb + nvirb - if td_grad.base.extype==0 or 1: + # if td_grad.base.extype==0 or 1: + # # x_ab, a means vira, b means occb + # (x_ab, x_ba), (y_ab, y_ba) = x_y + # xpy_ab = (x_ab + y_ab).T + # xpy_ba = (x_ba + y_ba).T + # xmy_ab = (x_ab - y_ab).T + # xmy_ba = (x_ba - y_ba).T + + # dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 + # dvv_b = np.einsum('ai,bi->ab', xpy_ba, xpy_ba) + np.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 + # doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 + # doo_a =-np.einsum('ai,aj->ij', xpy_ba, xpy_ba) - np.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 + + # dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} + # dmxpy_ba = reduce(np.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} + # dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} + # dmxmy_ba = reduce(np.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} + + # dmzoo_a = reduce(np.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} + # dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} + # dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) + # dmzoo_b+= reduce(np.dot, (orbvb, dvv_b, orbvb.T)) + + # ni = mf._numint + # ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) + # omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) + + # # used by mcfun. + # rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, + # mo_coeff, mo_occ, spin=1) + + # f1vo, f1oo, vxc1, k1ao = \ + # _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), + # (dmzoo_a,dmzoo_b), True, True, max_memory) + # k1ao_xpy, k1ao_xmy = k1ao + + # # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf + # # f1oo, (2,4,nao,nao), 2T with fxc_sc + # # vxc1, ao with v1^{\sigma} + # # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc + + # if abs(hyb) > 1e-10: + # dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + # dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + # vj, vk = mf.get_jk(mol, dm, hermi=0) + # vk *= hyb + # if abs(omega) > 1e-10: + # vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + # vj = vj.reshape(2,3,nao,nao) + # vk = vk.reshape(2,3,nao,nao) + + # veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] + # veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + # veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + # wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + # wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + # veff = - vk[:,1] + f1vo[0,:,0] + # veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + # veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + # wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + # wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + # wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + # wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + # veff = -vk[:,2] + f1vo[1,:,0] + # veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + # veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + # wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + # wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + # wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + # wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + + # else: + # dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + # dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + # vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) + + # veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] + # veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + # veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + # wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + # wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + # veff = f1vo[0,:,0] + # veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + # veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + # wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + # wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + # wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + # wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + # veff = f1vo[1,:,0] + # veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + # veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + # wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + # wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + # wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + # wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + if td_grad.base.extype == 0: # spin-flip-up + # x_ab, a means vira, b means occb + (x_ab, x_ba), (y_ab, y_ba) = x_y + xpy_ab = (x_ab + y_ab).T + xmy_ab = (x_ab - y_ab).T + + dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 + doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 + + dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} + dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} + + dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} + dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) + + ni = mf._numint + ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) + + # used by mcfun. + rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, + mo_coeff, mo_occ, spin=1) + + f1vo, f1oo, vxc1, k1ao = \ + _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), + (dmzoo_a,dmzoo_b), True, True, max_memory) + k1ao_xpy, k1ao_xmy = k1ao + + # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf + # f1oo, (2,4,nao,nao), 2T with fxc_sc + # vxc1, ao with v1^{\sigma} + # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc + + if abs(hyb) > 1e-10: + dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj, vk = mf.get_jk(mol, dm, hermi=0) + vk *= hyb + if abs(omega) > 1e-10: + vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + vj = vj.reshape(2,3,nao,nao) + vk = vk.reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] + veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + veff = - vk[:,1] + f1vo[0,:,0] + veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + veff = -vk[:,2] + f1vo[1,:,0] + veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + + else: + dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, + dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] + veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) + veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] + +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) + + wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + veff = f1vo[0,:,0] + veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + + veff = f1vo[1,:,0] + veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + + wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + + wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 + wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + elif td_grad.base.extype == 1: # spin-flip-down # x_ab, a means vira, b means occb (x_ab, x_ba), (y_ab, y_ba) = x_y xpy_ab = (x_ab + y_ab).T From 324bf37f8f9dad345a63c8ee3d12909ce7a88669 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 21 Oct 2025 17:23:39 +0800 Subject: [PATCH 17/32] in writting --- gpu4pyscf/grad/__init__.py | 1 + gpu4pyscf/grad/tduks_sf.py | 909 ++++++++++++++++++------------------- 2 files changed, 455 insertions(+), 455 deletions(-) diff --git a/gpu4pyscf/grad/__init__.py b/gpu4pyscf/grad/__init__.py index 86f087fef..2a2bafbbd 100644 --- a/gpu4pyscf/grad/__init__.py +++ b/gpu4pyscf/grad/__init__.py @@ -8,3 +8,4 @@ from . import tduhf from . import tduks from . import tdrks_ris +from . import tduks_sf diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 078f3e45a..8b9dd5ffd 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -12,17 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from functools import reduce +from functools import reduce, partial import numpy as np import cupy as cp from pyscf import lib from pyscf.lib import logger from pyscf.scf import ucphf from pyscf.dft import numint -from pyscf.dft import numint2c from pyscf.grad import rks as rks_grad from pyscf.grad import tdrhf as tdrhf_grad -from pyscf.sftda.numint2c_sftd import cache_xc_kernel_sf +from gpu4pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf +from gpu4pyscf.grad import tdrks def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): @@ -48,10 +48,10 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): mo_energy = mf.mo_energy mo_occ = mf.mo_occ - occidxa = np.where(mo_occ[0]>0)[0] - occidxb = np.where(mo_occ[1]>0)[0] - viridxa = np.where(mo_occ[0]==0)[0] - viridxb = np.where(mo_occ[1]==0)[0] + occidxa = cp.where(mo_occ[0]>0)[0] + occidxb = cp.where(mo_occ[1]>0)[0] + viridxa = cp.where(mo_occ[0]==0)[0] + viridxb = cp.where(mo_occ[1]==0)[0] nocca = len(occidxa) noccb = len(occidxb) nvira = len(viridxa) @@ -65,133 +65,20 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): nmoa = nocca + nvira nmob = noccb + nvirb - # if td_grad.base.extype==0 or 1: - # # x_ab, a means vira, b means occb - # (x_ab, x_ba), (y_ab, y_ba) = x_y - # xpy_ab = (x_ab + y_ab).T - # xpy_ba = (x_ba + y_ba).T - # xmy_ab = (x_ab - y_ab).T - # xmy_ba = (x_ba - y_ba).T - - # dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 - # dvv_b = np.einsum('ai,bi->ab', xpy_ba, xpy_ba) + np.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 - # doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 - # doo_a =-np.einsum('ai,aj->ij', xpy_ba, xpy_ba) - np.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 - - # dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} - # dmxpy_ba = reduce(np.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} - # dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} - # dmxmy_ba = reduce(np.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} - - # dmzoo_a = reduce(np.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} - # dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} - # dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) - # dmzoo_b+= reduce(np.dot, (orbvb, dvv_b, orbvb.T)) - - # ni = mf._numint - # ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) - # omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - - # # used by mcfun. - # rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, - # mo_coeff, mo_occ, spin=1) - - # f1vo, f1oo, vxc1, k1ao = \ - # _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), - # (dmzoo_a,dmzoo_b), True, True, max_memory) - # k1ao_xpy, k1ao_xmy = k1ao - - # # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf - # # f1oo, (2,4,nao,nao), 2T with fxc_sc - # # vxc1, ao with v1^{\sigma} - # # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc - - # if abs(hyb) > 1e-10: - # dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - # dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) - # vj, vk = mf.get_jk(mol, dm, hermi=0) - # vk *= hyb - # if abs(omega) > 1e-10: - # vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) - # vj = vj.reshape(2,3,nao,nao) - # vk = vk.reshape(2,3,nao,nao) - - # veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] - # veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - # veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - # wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - # wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 - - # veff = - vk[:,1] + f1vo[0,:,0] - # veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - # veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - # wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - # wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - - # wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - # wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 - - # veff = -vk[:,2] + f1vo[1,:,0] - # veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - # veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - # wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - # wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - - # wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - # wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 - - # else: - # dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - # dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) - # vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) - - # veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] - # veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - # veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - # +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - # wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - # wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 - - # veff = f1vo[0,:,0] - # veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - # veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - # wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - # wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - - # wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - # wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 - - # veff = f1vo[1,:,0] - # veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - # veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - # wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - # wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - - # wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - # wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 if td_grad.base.extype == 0: # spin-flip-up # x_ab, a means vira, b means occb (x_ab, x_ba), (y_ab, y_ba) = x_y xpy_ab = (x_ab + y_ab).T xmy_ab = (x_ab - y_ab).T - dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 - doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 + dvv_a = cp.einsum('ai,bi->ab', xpy_ab, xpy_ab) + cp.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 + doo_b =-cp.einsum('ai,aj->ij', xpy_ab, xpy_ab) - cp.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 - dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} - dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} + dmxpy_ab = reduce(cp.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} + dmxmy_ab = reduce(cp.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} - dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} - dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) + dmzoo_b = reduce(cp.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} + dmzoo_a = reduce(cp.dot, (orbva, dvv_a, orbva.T)) ni = mf._numint ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) @@ -202,7 +89,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): mo_coeff, mo_occ, spin=1) f1vo, f1oo, vxc1, k1ao = \ - _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), + _contract_xc_kernel(td_grad, mf.xc, (dmxpy_ab, dmxmy_ab), (dmzoo_a,dmzoo_b), True, True, max_memory) k1ao_xpy, k1ao_xmy = k1ao @@ -212,8 +99,8 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc if abs(hyb) > 1e-10: - dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + dm = (dmzoo_a, dmxpy_ab.T, -dmxmy_ab.T, + dmzoo_b, dmxpy_ab, dmxmy_ab) vj, vk = mf.get_jk(mol, dm, hermi=0) vk *= hyb if abs(omega) > 1e-10: @@ -227,32 +114,24 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 veff = - vk[:,1] + f1vo[0,:,0] - veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + veff0mop_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + wvob += cp.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + wvoa -= cp.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 veff = -vk[:,2] + f1vo[1,:,0] - veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + veff0mom_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + wvob += cp.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + wvoa -= cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 else: - dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + dm = (dmzoo_a, dmxpy_ab.T, -dmxmy_ab.T, + dmzoo_b, dmxpy_ab, dmxmy_ab) vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] @@ -261,50 +140,34 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 veff = f1vo[0,:,0] - veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + veff0mop_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + wvob += cp.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + wvoa -= cp.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 veff = f1vo[1,:,0] - veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + veff0mom_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + wvob += cp.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + wvoa -= cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 elif td_grad.base.extype == 1: # spin-flip-down # x_ab, a means vira, b means occb (x_ab, x_ba), (y_ab, y_ba) = x_y - xpy_ab = (x_ab + y_ab).T xpy_ba = (x_ba + y_ba).T - xmy_ab = (x_ab - y_ab).T xmy_ba = (x_ba - y_ba).T - dvv_a = np.einsum('ai,bi->ab', xpy_ab, xpy_ab) + np.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 - dvv_b = np.einsum('ai,bi->ab', xpy_ba, xpy_ba) + np.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 - doo_b =-np.einsum('ai,aj->ij', xpy_ab, xpy_ab) - np.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 - doo_a =-np.einsum('ai,aj->ij', xpy_ba, xpy_ba) - np.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 + dvv_b = cp.einsum('ai,bi->ab', xpy_ba, xpy_ba) + cp.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 + doo_a =-cp.einsum('ai,aj->ij', xpy_ba, xpy_ba) - cp.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 - dmxpy_ab = reduce(np.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} - dmxpy_ba = reduce(np.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} - dmxmy_ab = reduce(np.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} - dmxmy_ba = reduce(np.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} + dmxpy_ba = reduce(cp.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} + dmxmy_ba = reduce(cp.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} - dmzoo_a = reduce(np.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} - dmzoo_b = reduce(np.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} - dmzoo_a+= reduce(np.dot, (orbva, dvv_a, orbva.T)) - dmzoo_b+= reduce(np.dot, (orbvb, dvv_b, orbvb.T)) + dmzoo_a = reduce(cp.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} + dmzoo_b+= reduce(cp.dot, (orbvb, dvv_b, orbvb.T)) ni = mf._numint ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) @@ -315,7 +178,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): mo_coeff, mo_occ, spin=1) f1vo, f1oo, vxc1, k1ao = \ - _contract_xc_kernel(td_grad, mf.xc, ((dmxpy_ab,dmxpy_ba),(dmxmy_ab,dmxmy_ba)), + _contract_xc_kernel(td_grad, mf.xc, (dmxpy_ba, dmxmy_ba), (dmzoo_a,dmzoo_b), True, True, max_memory) k1ao_xpy, k1ao_xmy = k1ao @@ -325,8 +188,8 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc if abs(hyb) > 1e-10: - dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + dm = (dmzoo_a, dmxpy_ba, dmxmy_ba, + dmzoo_b, dmxpy_ba.T, -dmxmy_ba.T) vj, vk = mf.get_jk(mol, dm, hermi=0) vk *= hyb if abs(omega) > 1e-10: @@ -340,32 +203,24 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 veff = - vk[:,1] + f1vo[0,:,0] - veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + veff0mop_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + wvoa += cp.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob -= cp.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 veff = -vk[:,2] + f1vo[1,:,0] - veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 + veff0mom_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + wvoa += cp.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob -= cp.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 else: - dm = (dmzoo_a, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - dmzoo_b, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) + dm = (dmzoo_a, dmxpy_ba, dmxmy_ba, + dmzoo_b, dmxpy_ba.T, -dmxmy_ba.T) vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] @@ -374,46 +229,38 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - wvoa = reduce(np.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(np.dot, (orbvb.T, veff0doo[1], orbob)) *2 + wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 veff = f1vo[0,:,0] - veff0mop_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mop_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvoa += np.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 + veff0mop_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - wvoa -= np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + wvoa += cp.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 + wvob -= cp.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 veff = f1vo[1,:,0] - veff0mom_ba = reduce(np.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - veff0mom_ab = reduce(np.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + veff0mom_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - wvoa += np.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob += np.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - - wvoa -= np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - wvob -= np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 + wvoa += cp.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 + wvob -= cp.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 else: raise RuntimeError("Only spin-flip UHF/UKS is supported") vresp = mf.gen_response(hermi=1) def fvind(x): - dm1 = np.empty((2,nao,nao)) + dm1 = cp.empty((2,nao,nao)) x_a = x[0,:nvira*nocca].reshape(nvira,nocca) x_b = x[0,nvira*nocca:].reshape(nvirb,noccb) - dm_a = reduce(np.dot, (orbva, x_a, orboa.T)) - dm_b = reduce(np.dot, (orbvb, x_b, orbob.T)) + dm_a = reduce(cp.dot, (orbva, x_a, orboa.T)) + dm_b = reduce(cp.dot, (orbvb, x_b, orbob.T)) dm1[0] = (dm_a + dm_a.T).real dm1[1] = (dm_b + dm_b.T).real v1 = vresp(dm1) - v1a = reduce(np.dot, (orbva.T, v1[0], orboa)) - v1b = reduce(np.dot, (orbvb.T, v1[1], orbob)) - return np.hstack((v1a.ravel(), v1b.ravel())) + v1a = reduce(cp.dot, (orbva.T, v1[0], orboa)) + v1b = reduce(cp.dot, (orbvb.T, v1[1], orbob)) + return cp.hstack((v1a.ravel(), v1b.ravel())) z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), max_cycle=td_grad.cphf_max_cycle, @@ -421,31 +268,36 @@ def fvind(x): time1 = log.timer('Z-vector using UCPHF solver', *time0) - z1ao = np.zeros((2,nao,nao)) - z1ao[0] += reduce(np.dot, (orbva, z1a, orboa.T)) - z1ao[1] += reduce(np.dot, (orbvb, z1b, orbob.T)) + z1ao = cp.zeros((2,nao,nao)) + z1ao[0] += reduce(cp.dot, (orbva, z1a, orboa.T)) + z1ao[1] += reduce(cp.dot, (orbvb, z1b, orbob.T)) veff = vresp((z1ao+z1ao.transpose(0,2,1))*0.5) - im0a = np.zeros((nmoa,nmoa)) - im0b = np.zeros((nmob,nmob)) + im0a = cp.zeros((nmoa,nmoa)) + im0b = cp.zeros((nmob,nmob)) + if td_grad.base.extype == 0: + im0a[:nocca,:nocca] = reduce(cp.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 + im0b[:noccb,:noccb] = reduce(cp.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 + im0b[:noccb,:noccb] += cp.einsum('aj,ai->ij', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 + im0b[:noccb,:noccb] += cp.einsum('aj,ai->ij', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + + im0a[nocca:,nocca:] = cp.einsum('bi,ai->ab', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 + im0a[nocca:,nocca:] += cp.einsum('bi,ai->ab', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 - im0a[:nocca,:nocca] = reduce(np.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 - im0b[:noccb,:noccb] = reduce(np.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 - im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 - im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 - im0a[:nocca,:nocca] += np.einsum('aj,ai->ij', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 - im0b[:noccb,:noccb] += np.einsum('aj,ai->ij', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + im0a[nocca:,:nocca] = cp.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) + im0a[nocca:,:nocca] += cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) + elif td_grad.base.extype == 1: + im0a[:nocca,:nocca] = reduce(cp.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 + im0b[:noccb,:noccb] = reduce(cp.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 + im0a[:nocca,:nocca] += cp.einsum('aj,ai->ij', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 + im0a[:nocca,:nocca] += cp.einsum('aj,ai->ij', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 - im0a[nocca:,nocca:] = np.einsum('bi,ai->ab', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 - im0b[noccb:,noccb:] = np.einsum('bi,ai->ab', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 - im0a[nocca:,nocca:] += np.einsum('bi,ai->ab', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 - im0b[noccb:,noccb:] += np.einsum('bi,ai->ab', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + im0b[noccb:,noccb:] = cp.einsum('bi,ai->ab', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 + im0b[noccb:,noccb:] += cp.einsum('bi,ai->ab', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 - im0a[nocca:,:nocca] = np.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) - im0b[noccb:,:noccb] = np.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) - im0a[nocca:,:nocca] += np.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) - im0b[noccb:,:noccb] += np.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) + im0b[noccb:,:noccb] = cp.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) + im0b[noccb:,:noccb] += cp.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) zeta_a = (mo_energy[0][:,None] + mo_energy[0]) * .5 zeta_b = (mo_energy[1][:,None] + mo_energy[1]) * .5 @@ -454,21 +306,23 @@ def fvind(x): zeta_a[:nocca,nocca:] = mo_energy[0][nocca:] zeta_b[:noccb,noccb:] = mo_energy[1][noccb:] - dm1a = np.zeros((nmoa,nmoa)) - dm1b = np.zeros((nmob,nmob)) - dm1a[:nocca,:nocca] = doo_a * .5 - dm1b[:noccb,:noccb] = doo_b * .5 - dm1a[nocca:,nocca:] = dvv_a * .5 - dm1b[noccb:,noccb:] = dvv_b * .5 + dm1a = cp.zeros((nmoa,nmoa)) + dm1b = cp.zeros((nmob,nmob)) + if td_grad.base.extype == 0: + dm1b[:noccb,:noccb] = doo_b * .5 + dm1a[nocca:,nocca:] = dvv_a * .5 + elif td_grad.base.extype == 1: + dm1a[:nocca,:nocca] = doo_a * .5 + dm1b[noccb:,noccb:] = dvv_b * .5 dm1a[nocca:,:nocca] = z1a *.5 dm1b[noccb:,:noccb] = z1b *.5 - dm1a[:nocca,:nocca] += np.eye(nocca) # for ground state - dm1b[:noccb,:noccb] += np.eye(noccb) + dm1a[:nocca,:nocca] += cp.eye(nocca) # for ground state + dm1b[:noccb,:noccb] += cp.eye(noccb) - im0a = reduce(np.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) - im0b = reduce(np.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) + im0a = reduce(cp.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) + im0b = reduce(cp.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) im0 = im0a + im0b # Initialize hcore_deriv with the underlying SCF object because some @@ -481,31 +335,52 @@ def fvind(x): dmz1doo_a = z1ao[0] + dmzoo_a dmz1doo_b = z1ao[1] + dmzoo_b - oo0a = reduce(np.dot, (orboa, orboa.T)) - oo0b = reduce(np.dot, (orbob, orbob.T)) + oo0a = reduce(cp.dot, (orboa, orboa.T)) + oo0b = reduce(cp.dot, (orbob, orbob.T)) as_dm1 = oo0a + oo0b + (dmz1doo_a + dmz1doo_b) * .5 + if td_grad.base.extype == 0: + if abs(hyb) > 1e-10: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ab.T, -dmxmy_ab.T, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab, dmxmy_ab) + vj, vk = td_grad.get_jk(mol, dm) + vj = vj.reshape(2,4,3,nao,nao) + vk = vk.reshape(2,4,3,nao,nao) * hyb + vj[:,2:4] *= 0.0 + if abs(omega) > 1e-10: + with mol.with_range_coulomb(omega): + vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) - if abs(hyb) > 1e-10: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, dmxmy_ba-dmxmy_ab.T, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T, dmxmy_ab-dmxmy_ba.T) - vj, vk = td_grad.get_jk(mol, dm) - vj = vj.reshape(2,4,3,nao,nao) - vk = vk.reshape(2,4,3,nao,nao) * hyb - vj[:,2:4] *= 0.0 - if abs(omega) > 1e-10: - with mol.with_range_coulomb(omega): - vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) - - veff1 = np.zeros((2,4,3,nao,nao)) - veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] - else: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba+dmxpy_ab.T, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab+dmxpy_ba.T) - vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) - vj[:,2] *= 0.0 - veff1 = np.zeros((2,4,3,nao,nao)) - veff1[:,:3] = vj[0] + vj[1] + veff1 = cp.zeros((2,4,3,nao,nao)) + veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] + else: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ab.T, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab) + vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + vj[:,2] *= 0.0 + veff1 = cp.zeros((2,4,3,nao,nao)) + veff1[:,:3] = vj[0] + vj[1] + elif td_grad.base.extype == 1: + if abs(hyb) > 1e-10: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba, dmxmy_ba, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ba.T, -dmxmy_ba.T) + vj, vk = td_grad.get_jk(mol, dm) + vj = vj.reshape(2,4,3,nao,nao) + vk = vk.reshape(2,4,3,nao,nao) * hyb + vj[:,2:4] *= 0.0 + if abs(omega) > 1e-10: + with mol.with_range_coulomb(omega): + vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + + veff1 = cp.zeros((2,4,3,nao,nao)) + veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] + else: + dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba, + oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ba.T) + vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + vj[:,2] *= 0.0 + veff1 = cp.zeros((2,4,3,nao,nao)) + veff1[:,:3] = vj[0] + vj[1] fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) @@ -524,57 +399,62 @@ def fvind(x): if atmlst is None: atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() - de = np.zeros((len(atmlst),3)) + de = cp.zeros((len(atmlst),3)) for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # Ground state gradients h1ao = hcore_deriv(ia) - de[k] = np.einsum('xpq,pq->x', h1ao, as_dm1) - de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) - de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) - de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) - de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) + de[k] = cp.einsum('xpq,pq->x', h1ao, as_dm1) + de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) + de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) + de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) + de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) - de[k] += np.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1doo_a[p0:p1]) *.5 - de[k] += np.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doo_b[p0:p1]) *.5 - de[k] += np.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 - de[k] += np.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 + de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1doo_a[p0:p1]) *.5 + de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doo_b[p0:p1]) *.5 + de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 + de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 - de[k] -= np.einsum('xpq,pq->x', s1[:,p0:p1], im0[p0:p1]) - de[k] -= np.einsum('xqp,pq->x', s1[:,p0:p1], im0[:,p0:p1]) + de[k] -= cp.einsum('xpq,pq->x', s1[:,p0:p1], im0[p0:p1]) + de[k] -= cp.einsum('xqp,pq->x', s1[:,p0:p1], im0[:,p0:p1]) - de[k] += np.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 - de[k] += np.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 + de[k] += cp.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 + de[k] += cp.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 - de[k] += np.einsum('xij,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) - de[k] += np.einsum('xij,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) - de[k] += np.einsum('xji,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) - de[k] += np.einsum('xji,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) + if td_grad.base.extype == 0: + de[k] += cp.einsum('xij,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) + de[k] += cp.einsum('xji,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) - de[k] += np.einsum('xij,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) - de[k] += np.einsum('xij,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) - de[k] += np.einsum('xji,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) - de[k] += np.einsum('xji,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) + de[k] += cp.einsum('xij,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) + de[k] += cp.einsum('xji,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) - if abs(hyb) > 1e-10: - de[k] -= np.einsum('xij,ij->x', vk[1,2,:,p0:p1], dmxpy_ab[p0:p1,:]) - de[k] -= np.einsum('xij,ij->x', vk[0,2,:,p0:p1], dmxpy_ba[p0:p1,:]) - de[k] -= np.einsum('xji,ij->x', vk[0,2,:,p0:p1], dmxpy_ab[:,p0:p1]) - de[k] -= np.einsum('xji,ij->x', vk[1,2,:,p0:p1], dmxpy_ba[:,p0:p1]) + if abs(hyb) > 1e-10: + de[k] -= cp.einsum('xij,ij->x', vk[1,2,:,p0:p1], dmxpy_ab[p0:p1,:]) + de[k] -= cp.einsum('xji,ij->x', vk[0,2,:,p0:p1], dmxpy_ab[:,p0:p1]) + + de[k] -= cp.einsum('xij,ij->x', vk[1,3,:,p0:p1], dmxmy_ab[p0:p1,:]) + de[k] += cp.einsum('xji,ij->x', vk[0,3,:,p0:p1], dmxmy_ab[:,p0:p1]) + elif td_grad.base.extype == 1: + de[k] += cp.einsum('xij,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) + de[k] += cp.einsum('xji,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) - de[k] -= np.einsum('xij,ij->x', vk[1,3,:,p0:p1], dmxmy_ab[p0:p1,:]) - de[k] -= np.einsum('xij,ij->x', vk[0,3,:,p0:p1], dmxmy_ba[p0:p1,:]) - de[k] += np.einsum('xji,ij->x', vk[0,3,:,p0:p1], dmxmy_ab[:,p0:p1]) - de[k] += np.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) + de[k] += cp.einsum('xij,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) + de[k] += cp.einsum('xji,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) + + if abs(hyb) > 1e-10: + de[k] -= cp.einsum('xij,ij->x', vk[0,2,:,p0:p1], dmxpy_ba[p0:p1,:]) + de[k] -= cp.einsum('xji,ij->x', vk[1,2,:,p0:p1], dmxpy_ba[:,p0:p1]) + + de[k] -= cp.einsum('xij,ij->x', vk[0,3,:,p0:p1], dmxmy_ba[p0:p1,:]) + de[k] += cp.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) - # de[k] += td_grad.extra_force(ia, locals()) log.timer('TDUKS nuclear gradients', *time0) return de def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, - with_kxc=True, max_memory=2000): + with_kxc=True, max_memory=2000, extype=0): mol = td_grad.mol mf = td_grad.base._scf grids = mf.grids @@ -585,149 +465,205 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, mo_coeff = mf.mo_coeff mo_occ = mf.mo_occ nao = mo_coeff[0].shape[0] - shls_slice = (0, mol.nbas) ao_loc = mol.ao_loc_nr() - f1vo = np.zeros((2,2,4,nao,nao)) + opt = getattr(ni, "gdftopt", None) + if opt is None: + ni.build(mol, grids.coords) + opt = ni.gdftopt + _sorted_mol = opt._sorted_mol + mo_coeff = opt.sort_orbitals(mo_coeff, axis=[1]) + + f1vo = cp.zeros((2,2,4,nao,nao)) deriv = 2 if dmoo is not None: - f1oo = np.zeros((2,4,nao,nao)) + f1oo = cp.zeros((2,4,nao,nao)) + dmoo0 = opt.sort_orbitals(dmoo, axis=[0, 1]) + dmoo1 = opt.sort_orbitals(dmoo, axis=[0, 1]) else: f1oo = None if with_vxc: - v1ao = np.zeros((2,4,nao,nao)) + v1ao = cp.zeros((2,4,nao,nao)) else: v1ao = None if with_kxc: - k1ao_xpy = np.zeros((2,2,4,nao,nao)) - k1ao_xmy = np.zeros((2,2,4,nao,nao)) + k1ao_xpy = cp.zeros((2,2,4,nao,nao)) + k1ao_xmy = cp.zeros((2,2,4,nao,nao)) deriv = 3 else: k1ao_xpy = k1ao_xmy = None - # create a mc object to use mcfun. - nimc = numint2c.NumInt2C() - nimc.collinear = 'mcol' - nimc.collinear_samples=td_grad.base.collinear_samples + dmvo0 = opt.sort_orbitals(dmvo[0], axis=[0, 1]) + dmvo1 = opt.sort_orbitals(dmvo[1], axis=[0, 1]) + + # # create a mc object to use mcfun. + # nimc = numint2c.NumInt2C() + # nimc.collinear = 'mcol' + # nimc.collinear_samples=td_grad.base.collinear_samples # calculate the derivatives. - fxc_sf,kxc_sf = cache_xc_kernel_sf(nimc,mol,mf.grids,mf.xc,mo_coeff,mo_occ,deriv=3,spin=1)[2:] + fxc_sf,kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, + td_grad.base.collinear_samples,deriv=3)[2:] p0,p1=0,0 # the two parameters are used for counts the batch of grids. - if xctype == 'LDA': - def lda_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[0], wv) - for k in range(4): - vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) + if xctype == "LDA": + fmat_, ao_deriv = tdrks._lda_eval_mat_, 1 + elif xctype == "GGA": + fmat_, ao_deriv = tdrks._gga_eval_mat_, 2 + elif xctype == "MGGA": + fmat_, ao_deriv = tdrks._mgga_eval_mat_, 2 - ao_deriv = 1 + if xctype == 'LDA': for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): p0 = p1 p1+= weight.shape[0] s_s = fxc_sf[...,p0:p1] * weight - rho1_ab = ni.eval_rho(mol, ao[0], dmvo[0][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao[0], dmvo[0][1], mask, xctype) - # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y - lda_sum_(f1vo[0][1], ao, (rho1_ab+rho1_ba)*s_s*2, mask) - lda_sum_(f1vo[0][0], ao, (rho1_ba+rho1_ab)*s_s*2, mask) - - if with_kxc: - s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight - s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight - lda_sum_(k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab+rho1_ba), mask) - lda_sum_(k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba+rho1_ab), mask) - lda_sum_(k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab+rho1_ba), mask) - lda_sum_(k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba+rho1_ab), mask) - - rho1_ab = ni.eval_rho(mol, ao[0], dmvo[1][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao[0], dmvo[1][1], mask, xctype) - - # py attention to the order of f1vo[1][1] and f1vo[1][0] - lda_sum_(f1vo[1][1], ao, (rho1_ab-rho1_ba)*s_s*2, mask) - lda_sum_(f1vo[1][0], ao, (rho1_ba-rho1_ab)*s_s*2, mask) - - if with_kxc: - # Note the "-" - lda_sum_(k1ao_xmy[0][0], ao, s_s_n*2*rho1_ab*(rho1_ab-rho1_ba), mask) - lda_sum_(k1ao_xmy[0][1], ao, s_s_n*2*rho1_ba*(rho1_ba-rho1_ab), mask) - lda_sum_(k1ao_xmy[1][0], ao, s_s_s*2*rho1_ab*(rho1_ab-rho1_ba), mask) - lda_sum_(k1ao_xmy[1][1], ao, s_s_s*2*rho1_ba*(rho1_ba-rho1_ab), mask) - - rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + mo_coeff_mask_a = mo_coeff[0, mask] + mo_coeff_mask_b = mo_coeff[1, mask] + dmvo0_mask = dmvo0[mask[:, None], mask] + dmvo1_mask = dmvo1[mask[:, None], mask] + + if extype == 0: + rho1_ab = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) + # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y + fmat_(_sorted_mol, f1vo[0][1], ao, rho1_ab*s_s*2, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][0], ao, rho1_ab*s_s*2, mask, shls_slice, ao_loc) + # lda_sum_(f1vo[0][1], ao, rho1_ab*s_s*2, mask) + # lda_sum_(f1vo[0][0], ao, rho1_ab*s_s*2, mask) + + if with_kxc: + s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight + s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight + fmat_(_sorted_mol, k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) + + rho1_ab = ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype) + + # py attention to the order of f1vo[1][1] and f1vo[1][0] + fmat_(_sorted_mol, f1vo[1][1], ao, rho1_ab*s_s*2, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][0], ao, -rho1_ab*s_s*2, mask, shls_slice, ao_loc) + + if with_kxc: + # Note the "-" + fmat_(_sorted_mol, k1ao_xmy[0][0], ao, s_s_n*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xmy[1][0], ao, s_s_s*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) + elif extype == 1: + rho1_ba = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) + # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y + fmat_(_sorted_mol, f1vo[0][1], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][0], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) + + if with_kxc: + s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight + s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight + fmat_(_sorted_mol, k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) + + rho1_ba = ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype) + + # py attention to the order of f1vo[1][1] and f1vo[1][0] + fmat_(_sorted_mol, f1vo[1][1], ao, -rho1_ba*s_s*2, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][0], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) + + if with_kxc: + # Note the "-" + fmat_(_sorted_mol, k1ao_xmy[0][1], ao, s_s_n*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xmy[1][1], ao, s_s_s*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) + + rho = (ni.eval_rho2(_sorted_mol, ao[0],mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, 1, deriv=deriv)[1:] u_u, u_d, d_d = fxc[0].T * weight if dmoo is not None: - rho2a = ni.eval_rho(mol, ao[0], dmoo[0], mask, xctype, hermi=1) - rho2b = ni.eval_rho(mol, ao[0], dmoo[1], mask, xctype, hermi=1) - lda_sum_(f1oo[0], ao, u_u*rho2a+u_d*rho2b, mask) - lda_sum_(f1oo[1], ao, u_d*rho2a+d_d*rho2b, mask) + dmoo0_mask = dmoo0[mask[:, None], mask] + dmoo1_mask = dmoo1[mask[:, None], mask] + rho2a = ni.eval_rho(_sorted_mol, ao[0], dmoo0_mask, mask, xctype, hermi=1) + rho2b = ni.eval_rho(_sorted_mol, ao[0], dmoo1_mask, mask, xctype, hermi=1) + fmat_(_sorted_mol, f1oo[0], ao, u_u*rho2a+u_d*rho2b, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1oo[1], ao, u_d*rho2a+d_d*rho2b, mask, shls_slice, ao_loc) if with_vxc: vrho = vxc[0].T * weight - lda_sum_(v1ao[0], ao, vrho[0], mask) - lda_sum_(v1ao[1], ao, vrho[1], mask) + fmat_(_sorted_mol, v1ao[0], ao, vrho[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, v1ao[1], ao, vrho[1], mask, shls_slice, ao_loc) elif xctype == 'GGA': - def gga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) - - ao_deriv = 2 for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): p0 = p1 p1+= weight.shape[0] - rho1_ab = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) + mo_coeff_mask_a = mo_coeff[0, mask] + mo_coeff_mask_b = mo_coeff[1, mask] + dmvo0_mask = dmvo0[mask[:, None], mask] + dmvo1_mask = dmvo1[mask[:, None], mask] - wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) - gga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) - gga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) + if extype == 0: + rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype) - if with_kxc: - gv_sf = uks_sf_gga_wv2_p((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) - gga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) - gga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) - gga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) - gga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) + wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) - rho1_ab = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) - rho1_ba = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) + if with_kxc: + gv_sf = uks_sf_gga_wv2_p(rho1_ab,kxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, k1ao_xpy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xpy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) - wv_sf = uks_sf_gga_wv1((rho1_ab,rho1_ba),fxc_sf[...,p0:p1],weight) - gga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) - gga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) + rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype) - if with_kxc: - gv_sf = uks_sf_gga_wv2_m((rho1_ab,rho1_ba),kxc_sf[...,p0:p1],weight) - gga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) - gga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) - gga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) - gga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) + wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, f1vo[1][1], ao, wv_sf, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][0], ao, -wv_sf, mask, shls_slice, ao_loc) - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] + if with_kxc: + gv_sf = uks_sf_gga_wv2_m(rho1_ab,kxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, k1ao_xmy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xmy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) + elif extype == 1: + rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype) + + wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) + if with_kxc: + gv_sf = uks_sf_gga_wv2_p(rho1_ba,kxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, k1ao_xpy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xpy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) + + rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype) + + wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, f1vo[1][1], ao, -wv_sf, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][0], ao, wv_sf, mask, shls_slice, ao_loc) + + if with_kxc: + gv_sf = uks_sf_gga_wv2_m(rho1_ba,kxc_sf[...,p0:p1],weight, extype) + fmat_(_sorted_mol, k1ao_xmy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao_xmy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) + + rho = (ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype)) + vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, 1, deriv=deriv)[1:] if dmoo is not None: - rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) + dmoo0_mask = dmoo0[mask[:, None], mask] + dmoo1_mask = dmoo1[mask[:, None], mask] + rho2 = (ni.eval_rho(_sorted_mol, ao, dmoo0_mask, mask, xctype, hermi=1), + ni.eval_rho(_sorted_mol, ao, dmoo1_mask, mask, xctype, hermi=1)) wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) - gga_sum_(f1oo[0], ao, wv[0], mask) - gga_sum_(f1oo[1], ao, wv[1], mask) + fmat_(_sorted_mol, f1oo[0], ao, wv[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1oo[1], ao, wv[1], mask, shls_slice, ao_loc) if with_vxc: wv = numint._uks_gga_wv0(rho, vxc, weight) - gga_sum_(v1ao[0], ao, wv[0], mask) - gga_sum_(v1ao[1], ao, wv[1], mask) + fmat_(_sorted_mol, v1ao[0], ao, wv[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, v1ao[1], ao, wv[1], mask, shls_slice, ao_loc) elif xctype == 'MGGA': + raise NotImplementedError('MGGA not implemented') def mgga_sum_(vmat, ao, wv, mask): aow = numint._scale_ao(ao[:4], wv[:4]) tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) @@ -749,12 +685,16 @@ def mgga_sum_(vmat, ao, wv, mask): p0 = p1 p1+= weight.shape[0] ngrid=weight.shape[-1] + if extype == 0: + pass + elif extype == 1: + pass rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) # Padding for laplacian - rho1_ab = np.empty((5, ngrid)) - rho1_ba = np.empty((5, ngrid)) + rho1_ab = cp.empty((5, ngrid)) + rho1_ba = cp.empty((5, ngrid)) rho1_ab[:4] = rho1_ab_tmp[:4] rho1_ba[:4] = rho1_ba_tmp[:4] rho1_ab[4] = rho1_ab_tmp[5] @@ -774,8 +714,8 @@ def mgga_sum_(vmat, ao, wv, mask): rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) # Padding for laplacian - rho1_ab = np.empty((5, ngrid)) - rho1_ba = np.empty((5, ngrid)) + rho1_ab = cp.empty((5, ngrid)) + rho1_ba = cp.empty((5, ngrid)) rho1_ab[:4] = rho1_ab_tmp[:4] rho1_ba[:4] = rho1_ba_tmp[:4] rho1_ab[4] = rho1_ab_tmp[5] @@ -801,7 +741,7 @@ def mgga_sum_(vmat, ao, wv, mask): ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) wv_tmp = numint._uks_mgga_wv1(rho, rho2, vxc, fxc, weight) # # Padding for laplacian - wv = np.empty((2,5,ngrid)) + wv = cp.empty((2,5,ngrid)) wv[0][:4] = wv_tmp[0][:4] wv[0][4] = wv_tmp[0][5] wv[1][:4] = wv_tmp[1][:4] @@ -813,7 +753,7 @@ def mgga_sum_(vmat, ao, wv, mask): if with_vxc: wv_tmp = numint._uks_mgga_wv0(rho, vxc, weight) # # Padding for laplacian - wv = np.empty((2,5,ngrid)) + wv = cp.empty((2,5,ngrid)) wv[0][:4] = wv_tmp[0][:4] wv[0][4] = wv_tmp[0][5] wv[1][:4] = wv_tmp[1][:4] @@ -826,64 +766,122 @@ def mgga_sum_(vmat, ao, wv, mask): raise NotImplementedError(f'td-uks for functional {xc_code}') f1vo[:,:,1:] *= -1 - if f1oo is not None: f1oo[:,1:] *= -1 - if v1ao is not None: v1ao[:,1:] *= -1 + f1vo = opt.unsort_orbitals(f1vo, axis=[3, 4]) + if f1oo is not None: + f1oo[:,1:] *= -1 + f1oo = opt.unsort_orbitals(f1oo, axis=[2, 3]) + if v1ao is not None: + v1ao[:,1:] *= -1 + v1ao = opt.unsort_orbitals(v1ao, axis=[2, 3]) if with_kxc: k1ao_xpy[:,:,1:] *= -1 k1ao_xmy[:,:,1:] *= -1 + k1ao_xpy = opt.unsort_orbitals(k1ao_xpy, axis=[3, 4]) + k1ao_xmy = opt.unsort_orbitals(k1ao_xmy, axis=[3, 4]) + return f1vo, f1oo, v1ao, (k1ao_xpy,k1ao_xmy) -def uks_sf_gga_wv1(rho1, fxc_sf,weight): +def uks_sf_gga_wv1(rho1, fxc_sf, weight, extype): # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - wv_ab, wv_ba = np.empty((2,4,ngrid)) - wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) - wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) - # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 - # wv_ba[0] = wv_ba[0] *2 *.5 - - # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - wv_ab[1:] *=2.0 - wv_ba[1:] *=2.0 - return wv_ab*weight, wv_ba*weight - -def uks_sf_gga_wv2_p(rho1, kxc_sf,weight): + if extype == 0: + ngrid = weight.shape[-1] + wv_ab = cp.empty((4,ngrid)) + wv_ab = cp.einsum('yp,xyp->xp', rho1,fxc_sf) + + # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + wv_ab[1:] *=2.0 + return wv_ab*weight + elif extype == 1: + ngrid = weight.shape[-1] + wv_ba = cp.empty((4,ngrid)) + wv_ba = cp.einsum('yp,xyp->xp', rho1,fxc_sf) + + # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + wv_ba[1:] *=2.0 + return wv_ba*weight + # rho1_ab,rho1_ba = rho1 + # ngrid = weight.shape[-1] + # wv_ab, wv_ba = cp.empty((2,4,ngrid)) + # wv_ab = cp.einsum('yp,xyp->xp', rho1_ab,fxc_sf) + # wv_ba = cp.einsum('yp,xyp->xp', rho1_ba,fxc_sf) + # # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 + # # wv_ba[0] = wv_ba[0] *2 *.5 + + # # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. + # wv_ab[1:] *=2.0 + # wv_ba[1:] *=2.0 + # return wv_ab*weight, wv_ba*weight + +def uks_sf_gga_wv2_p(rho1, kxc_sf, weight, extype): # kxc_sf with a shape (4,4,2,4,ngrid), 4 means I,\nabla_x,y,z, # 0: n, \nabla_x,y,z n; 1: s, \nabla_x,y,z s. - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,4,ngrid)) - # Note *2 and *0.5 like in function uks_sf_gga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) - - gv_ab[0,1:] *=2.0 - gv_ab[1,1:] *=2.0 - gv_ba[0,1:] *=2.0 - gv_ba[1,1:] *=2.0 - return gv_ab*weight, gv_ba*weight - -def uks_sf_gga_wv2_m(rho1, kxc_sf,weight): - rho1_ab,rho1_ba = rho1 - ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) - - gv_ab[:,1:] *=2.0 - gv_ba[:,1:] *=2.0 - return gv_ab*weight, gv_ba*weight + if extype == 0: + ngrid = weight.shape[-1] + gv_ab = cp.empty((2,4,ngrid)) + # Note *2 and *0.5 like in function uks_sf_gga_wv1 + gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf, optimize=True) + + gv_ab[0,1:] *=2.0 + gv_ab[1,1:] *=2.0 + return gv_ab*weight + elif extype == 1: + ngrid = weight.shape[-1] + gv_ba = cp.empty((2,4,ngrid)) + # Note *2 and *0.5 like in function uks_sf_gga_wv1 + gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf, optimize=True) + + gv_ba[0,1:] *=2.0 + gv_ba[1,1:] *=2.0 + return gv_ba*weight + # rho1_ab,rho1_ba = rho1 + # ngrid = weight.shape[-1] + # gv_ab, gv_ba = cp.empty((2,2,4,ngrid)) + # # Note *2 and *0.5 like in function uks_sf_gga_wv1 + # gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) + # gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) + + # gv_ab[0,1:] *=2.0 + # gv_ab[1,1:] *=2.0 + # gv_ba[0,1:] *=2.0 + # gv_ba[1,1:] *=2.0 + # return gv_ab*weight, gv_ba*weight + +def uks_sf_gga_wv2_m(rho1, kxc_sf,weight, extype): + if extype == 0: + ngrid = weight.shape[-1] + gv_ab = cp.empty((2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf , optimize=True) + + gv_ab[:,1:] *=2.0 + return gv_ab*weight + elif extype == 1: + ngrid = weight.shape[-1] + gv_ba = cp.empty((2,5,ngrid)) + # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf , optimize=True) + + gv_ba[:,1:] *=2.0 + return gv_ba*weight + # rho1_ab,rho1_ba = rho1 + # ngrid = weight.shape[-1] + # gv_ab, gv_ba = cp.empty((2,2,5,ngrid)) + # # Note *2 and *0.5 like in function uks_sf_mgga_wv1 + # gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) + # gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) + + # gv_ab[:,1:] *=2.0 + # gv_ba[:,1:] *=2.0 + # return gv_ab*weight, gv_ba*weight def uks_sf_mgga_wv1(rho1, fxc_sf,weight): rho1_ab,rho1_ba = rho1 # fxc_sf with a shape (5,5,ngrid), 5 means I, \nabla_x,y,z s, u # s_s, s_Ns, Ns_s, Ns_Ns, s_u, u_s, u_Ns, Ns_u, u_u ngrid = weight.shape[-1] - wv_ab, wv_ba = np.empty((2,5,ngrid)) - wv_ab = np.einsum('yp,xyp->xp', rho1_ab,fxc_sf) - wv_ba = np.einsum('yp,xyp->xp', rho1_ba,fxc_sf) + wv_ab, wv_ba = cp.empty((2,5,ngrid)) + wv_ab = cp.einsum('yp,xyp->xp', rho1_ab,fxc_sf) + wv_ba = cp.einsum('yp,xyp->xp', rho1_ba,fxc_sf) # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 # wv_ba[0] = wv_ba[0] *2 *.5 @@ -908,10 +906,10 @@ def uks_sf_mgga_wv2_p(rho1, kxc_sf,weight): # Ns_u -> # u_u -> ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + gv_ab, gv_ba = cp.empty((2,2,5,ngrid)) # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) + gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) + gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) gv_ab[:,1:4] *=2.0 gv_ba[:,1:4] *=2.0 @@ -922,10 +920,10 @@ def uks_sf_mgga_wv2_p(rho1, kxc_sf,weight): def uks_sf_mgga_wv2_m(rho1, kxc_sf,weight): rho1_ab,rho1_ba = rho1 ngrid = weight.shape[-1] - gv_ab, gv_ba = np.empty((2,2,5,ngrid)) + gv_ab, gv_ba = cp.empty((2,2,5,ngrid)) # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = np.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) - gv_ba = np.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) + gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) + gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) gv_ab[:,1:4] *=2.0 gv_ba[:,1:4] *=2.0 @@ -933,6 +931,7 @@ def uks_sf_mgga_wv2_m(rho1, kxc_sf,weight): gv_ba[:,4] *= 0.5 return gv_ab*weight, gv_ba*weight + def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): mol = td_grad.base._scf.mol mf = td_grad.base._scf @@ -951,7 +950,7 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): dmvo = [(dmvo[0]+dmvo[0].T)*.5, (dmvo[1]+dmvo[1].T)*.5] - f1vo = np.zeros((2,4,nao,nao)) + f1vo = cp.zeros((2,4,nao,nao)) deriv = 2 if xctype == 'LDA': From e60dffed2a2ea9a10fabea7f3020331eabea1000 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 22 Oct 2025 14:04:06 +0800 Subject: [PATCH 18/32] debug --- gpu4pyscf/grad/tduks_sf.py | 275 +++++++++++++++++++++----------- gpu4pyscf/tdscf/_uhf_resp_sf.py | 26 +-- gpu4pyscf/tdscf/uhf.py | 7 + 3 files changed, 202 insertions(+), 106 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 8b9dd5ffd..cc53f0f3d 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -16,12 +16,14 @@ import numpy as np import cupy as cp from pyscf import lib -from pyscf.lib import logger -from pyscf.scf import ucphf +from gpu4pyscf.lib import logger +from gpu4pyscf.scf import ucphf from pyscf.dft import numint -from pyscf.grad import rks as rks_grad -from pyscf.grad import tdrhf as tdrhf_grad -from gpu4pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf +from gpu4pyscf.df import int3c2e +from gpu4pyscf.lib.cupy_helper import contract +from gpu4pyscf.grad import rks as rks_grad +from gpu4pyscf.grad import tdrhf as tdrhf_grad +from gpu4pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf, mcfun_eval_xc_adapter_sf from gpu4pyscf.grad import tdrks @@ -47,6 +49,12 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): mo_coeff = mf.mo_coeff mo_energy = mf.mo_energy mo_occ = mf.mo_occ + if not isinstance(mo_occ, cp.ndarray): + mo_occ = cp.asarray(mo_occ) + if not isinstance(mo_energy, cp.ndarray): + mo_energy = cp.asarray(mo_energy) + if not isinstance(mo_coeff, cp.ndarray): + mo_coeff = cp.asarray(mo_coeff) occidxa = cp.where(mo_occ[0]>0)[0] occidxb = cp.where(mo_occ[1]>0)[0] @@ -67,9 +75,11 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): if td_grad.base.extype == 0: # spin-flip-up # x_ab, a means vira, b means occb - (x_ab, x_ba), (y_ab, y_ba) = x_y - xpy_ab = (x_ab + y_ab).T - xmy_ab = (x_ab - y_ab).T + x_ab, y_ab = x_y + if not isinstance(x_ab, cp.ndarray): + x_ab = cp.asarray(x_ab) + xpy_ab = x_ab.T + xmy_ab = x_ab.T dvv_a = cp.einsum('ai,bi->ab', xpy_ab, xpy_ab) + cp.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 doo_b =-cp.einsum('ai,aj->ij', xpy_ab, xpy_ab) - cp.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 @@ -156,9 +166,11 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): wvoa -= cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 elif td_grad.base.extype == 1: # spin-flip-down # x_ab, a means vira, b means occb - (x_ab, x_ba), (y_ab, y_ba) = x_y - xpy_ba = (x_ba + y_ba).T - xmy_ba = (x_ba - y_ba).T + x_ba, y_ba = x_y + if not isinstance(x_ba, cp.ndarray): + x_ba = cp.asarray(x_ba) + xpy_ba = x_ba.T + xmy_ba = x_ba.T dvv_b = cp.einsum('ai,bi->ab', xpy_ba, xpy_ba) + cp.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 doo_a =-cp.einsum('ai,aj->ij', xpy_ba, xpy_ba) - cp.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 @@ -167,7 +179,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): dmxmy_ba = reduce(cp.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} dmzoo_a = reduce(cp.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} - dmzoo_b+= reduce(cp.dot, (orbvb, dvv_b, orbvb.T)) + dmzoo_b = reduce(cp.dot, (orbvb, dvv_b, orbvb.T)) ni = mf._numint ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) @@ -191,6 +203,10 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): dm = (dmzoo_a, dmxpy_ba, dmxmy_ba, dmzoo_b, dmxpy_ba.T, -dmxmy_ba.T) vj, vk = mf.get_jk(mol, dm, hermi=0) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) + if not isinstance(vk, cp.ndarray): + vk = cp.asarray(vk) vk *= hyb if abs(omega) > 1e-10: vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) @@ -325,59 +341,81 @@ def fvind(x): im0b = reduce(cp.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) im0 = im0a + im0b - # Initialize hcore_deriv with the underlying SCF object because some - # extensions (e.g. QM/MM, solvent) modifies the SCF object only. - mf_grad = mf.nuc_grad_method() - hcore_deriv = mf_grad.hcore_generator(mol) - - # -mol.intor('int1e_ipovlp', comp=3) - s1 = mf_grad.get_ovlp(mol) - dmz1doo_a = z1ao[0] + dmzoo_a dmz1doo_b = z1ao[1] + dmzoo_b oo0a = reduce(cp.dot, (orboa, orboa.T)) oo0b = reduce(cp.dot, (orbob, orbob.T)) - as_dm1 = oo0a + oo0b + (dmz1doo_a + dmz1doo_b) * .5 + mf_grad = mf.nuc_grad_method().to_cpu() + h1 = cp.asarray(mf_grad.get_hcore(mol)) # without 1/r like terms + s1 = cp.asarray(mf_grad.get_ovlp(mol)) + dh_ground = contract("xij,ij->xi", h1, oo0a + oo0b) + dh_td = contract("xij,ij->xi", h1, (dmz1doo_a + dmz1doo_b) * .5) + ds = contract("xij,ij->xi", s1, (im0 + im0.T) * 0.5) + + dh1e_ground = int3c2e.get_dh1e(mol, oo0a + oo0b) # 1/r like terms + if mol.has_ecp(): + dh1e_ground += rhf_grad.get_dh1e_ecp(mol, oo0a + oo0b) # 1/r like terms + dh1e_td = int3c2e.get_dh1e(mol, (dmz1doo_a + dmz1doo_b) * 0.25 + (dmz1doo_a + dmz1doo_b).T * 0.25) # 1/r like terms + if mol.has_ecp(): + dh1e_td += rhf_grad.get_dh1e_ecp( + mol, (dmz1doo_a + dmz1doo_b) * 0.25 + (dmz1doo_a + dmz1doo_b).T * 0.25) # 1/r like terms + if td_grad.base.extype == 0: if abs(hyb) > 1e-10: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ab.T, -dmxmy_ab.T, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab, dmxmy_ab) - vj, vk = td_grad.get_jk(mol, dm) + dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ab.T.get(), -dmxmy_ab.T.get(), + oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ab.get(), dmxmy_ab.get()) + vj, vk = mf_grad.get_jk(mol, dm) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) + if not isinstance(vk, cp.ndarray): + vk = cp.asarray(vk) vj = vj.reshape(2,4,3,nao,nao) vk = vk.reshape(2,4,3,nao,nao) * hyb vj[:,2:4] *= 0.0 if abs(omega) > 1e-10: with mol.with_range_coulomb(omega): - vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + vk += mf_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + if not isinstance(vk, cp.ndarray): + vk = cp.asarray(vk) veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] else: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ab.T, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ab) - vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ab.T.get(), + oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ab) + vj = mf_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) vj[:,2] *= 0.0 veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,:3] = vj[0] + vj[1] elif td_grad.base.extype == 1: if abs(hyb) > 1e-10: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba, dmxmy_ba, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ba.T, -dmxmy_ba.T) - vj, vk = td_grad.get_jk(mol, dm) + dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ba.get(), dmxmy_ba.get(), + oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ba.T.get(), -dmxmy_ba.T.get()) + vj, vk = mf_grad.get_jk(mol, dm) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) + if not isinstance(vk, cp.ndarray): + vk = cp.asarray(vk) vj = vj.reshape(2,4,3,nao,nao) vk = vk.reshape(2,4,3,nao,nao) * hyb vj[:,2:4] *= 0.0 if abs(omega) > 1e-10: with mol.with_range_coulomb(omega): - vk += td_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + vk += mf_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) + if not isinstance(vk, cp.ndarray): + vk = cp.asarray(vk) veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] else: - dm = (oo0a, dmz1doo_a+dmz1doo_a.T, dmxpy_ba, - oo0b, dmz1doo_b+dmz1doo_b.T, dmxpy_ba.T) - vj = td_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ba.get(), + oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ba.T.get()) + vj = mf_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) vj[:,2] *= 0.0 veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,:3] = vj[0] + vj[1] @@ -400,13 +438,14 @@ def fvind(x): atmlst = range(mol.natm) offsetdic = mol.offset_nr_by_atom() de = cp.zeros((len(atmlst),3)) - + delec = 2.0 * (dh_ground + dh_td - ds) + aoslices = mol.aoslice_by_atom() + delec = cp.asarray([cp.sum(delec[:, p0:p1], axis=1) for p0, p1 in aoslices[:, 2:]]) + de += delec for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] # Ground state gradients - h1ao = hcore_deriv(ia) - de[k] = cp.einsum('xpq,pq->x', h1ao, as_dm1) de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) @@ -417,9 +456,6 @@ def fvind(x): de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 - de[k] -= cp.einsum('xpq,pq->x', s1[:,p0:p1], im0[p0:p1]) - de[k] -= cp.einsum('xqp,pq->x', s1[:,p0:p1], im0[:,p0:p1]) - de[k] += cp.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 de[k] += cp.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 @@ -451,7 +487,7 @@ def fvind(x): de[k] += cp.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) log.timer('TDUKS nuclear gradients', *time0) - return de + return de.get() def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, with_kxc=True, max_memory=2000, extype=0): @@ -480,8 +516,8 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, if dmoo is not None: f1oo = cp.zeros((2,4,nao,nao)) - dmoo0 = opt.sort_orbitals(dmoo, axis=[0, 1]) - dmoo1 = opt.sort_orbitals(dmoo, axis=[0, 1]) + dmoo0 = opt.sort_orbitals(dmoo[0], axis=[0, 1]) + dmoo1 = opt.sort_orbitals(dmoo[1], axis=[0, 1]) else: f1oo = None if with_vxc: @@ -504,8 +540,8 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, # nimc.collinear_samples=td_grad.base.collinear_samples # calculate the derivatives. - fxc_sf,kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, - td_grad.base.collinear_samples,deriv=3)[2:] + # fxc_sf,kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, + # td_grad.base.collinear_samples,deriv=3)[2:] p0,p1=0,0 # the two parameters are used for counts the batch of grids. if xctype == "LDA": @@ -520,12 +556,25 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): p0 = p1 p1+= weight.shape[0] - s_s = fxc_sf[...,p0:p1] * weight mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] dmvo1_mask = dmvo1[mask[:, None], mask] + with_lapl = False + # fxc_sf, kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, + # td_grad.base.collinear_samples,deriv=3)[2:] + rhoa_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, + mo_occ[0], None, xctype, with_lapl) + rhob_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, + mo_occ[1], None, xctype, with_lapl) + rho_ab = (rhoa_slice, rhob_slice) + rho_z = cp.array([rho_ab[0]+rho_ab[1], + rho_ab[0]-rho_ab[1]]) + eval_xc_eff = mcfun_eval_xc_adapter_sf(ni, xc_code, td_grad.base.collinear_samples) + fxc_sf, kxc_sf = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[2:4] + s_s = fxc_sf * weight + if extype == 0: rho1_ab = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) @@ -536,8 +585,8 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, # lda_sum_(f1vo[0][0], ao, rho1_ab*s_s*2, mask) if with_kxc: - s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight - s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight + s_s_n = kxc_sf[:,:,0] * weight + s_s_s = kxc_sf[:,:,1] * weight fmat_(_sorted_mol, k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) @@ -558,8 +607,8 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fmat_(_sorted_mol, f1vo[0][0], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) if with_kxc: - s_s_n = kxc_sf[:,:,0][...,p0:p1] * weight - s_s_s = kxc_sf[:,:,1][...,p0:p1] * weight + s_s_n = kxc_sf[:,:,0] * weight + s_s_s = kxc_sf[:,:,1] * weight fmat_(_sorted_mol, k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) @@ -576,7 +625,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, rho = (ni.eval_rho2(_sorted_mol, ao[0],mo_coeff_mask_a, mo_occ[0], mask, xctype), ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, 1, deriv=deriv)[1:] + vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:] u_u, u_d, d_d = fxc[0].T * weight if dmoo is not None: dmoo0_mask = dmoo0[mask[:, None], mask] @@ -601,64 +650,80 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, dmvo0_mask = dmvo0[mask[:, None], mask] dmvo1_mask = dmvo1[mask[:, None], mask] + with_lapl = False + # fxc_sf, kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, + # td_grad.base.collinear_samples,deriv=3)[2:] + rhoa_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, + mo_occ[0], None, xctype, with_lapl) + rhob_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, + mo_occ[1], None, xctype, with_lapl) + rho_ab = (rhoa_slice, rhob_slice) + rho_z = cp.array([rho_ab[0]+rho_ab[1], + rho_ab[0]-rho_ab[1]]) + eval_xc_eff = mcfun_eval_xc_adapter_sf(ni, xc_code, td_grad.base.collinear_samples) + fxc_sf, kxc_sf = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[2:4] + if extype == 0: rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype) - wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf[...,p0:p1],weight, extype) + wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) if with_kxc: - gv_sf = uks_sf_gga_wv2_p(rho1_ab,kxc_sf[...,p0:p1],weight, extype) + gv_sf = uks_sf_gga_wv2_p(rho1_ab,kxc_sf,weight, extype) fmat_(_sorted_mol, k1ao_xpy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xpy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype) - wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf[...,p0:p1],weight, extype) + wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[1][1], ao, wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[1][0], ao, -wv_sf, mask, shls_slice, ao_loc) if with_kxc: - gv_sf = uks_sf_gga_wv2_m(rho1_ab,kxc_sf[...,p0:p1],weight, extype) + gv_sf = uks_sf_gga_wv2_m(rho1_ab,kxc_sf,weight, extype) fmat_(_sorted_mol, k1ao_xmy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xmy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) elif extype == 1: rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype) - wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf[...,p0:p1],weight, extype) + wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) if with_kxc: - gv_sf = uks_sf_gga_wv2_p(rho1_ba,kxc_sf[...,p0:p1],weight, extype) + gv_sf = uks_sf_gga_wv2_p(rho1_ba,kxc_sf,weight, extype) fmat_(_sorted_mol, k1ao_xpy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xpy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype) - wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf[...,p0:p1],weight, extype) + wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[1][1], ao, -wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[1][0], ao, wv_sf, mask, shls_slice, ao_loc) if with_kxc: - gv_sf = uks_sf_gga_wv2_m(rho1_ba,kxc_sf[...,p0:p1],weight, extype) + gv_sf = uks_sf_gga_wv2_m(rho1_ba,kxc_sf,weight, extype) fmat_(_sorted_mol, k1ao_xmy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xmy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) - rho = (ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype), - ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, 1, deriv=deriv)[1:] + rho = cp.stack([ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype)]) + vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:] if dmoo is not None: dmoo0_mask = dmoo0[mask[:, None], mask] dmoo1_mask = dmoo1[mask[:, None], mask] - rho2 = (ni.eval_rho(_sorted_mol, ao, dmoo0_mask, mask, xctype, hermi=1), - ni.eval_rho(_sorted_mol, ao, dmoo1_mask, mask, xctype, hermi=1)) - wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) + rho2 = cp.stack([ni.eval_rho(_sorted_mol, ao, dmoo0_mask, mask, xctype, hermi=1), + ni.eval_rho(_sorted_mol, ao, dmoo1_mask, mask, xctype, hermi=1)]) + # wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) + tmp = contract("axg,axbyg->byg", rho2, fxc) + wv = contract("byg,g->byg", tmp, weight) fmat_(_sorted_mol, f1oo[0], ao, wv[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1oo[1], ao, wv[1], mask, shls_slice, ao_loc) if with_vxc: - wv = numint._uks_gga_wv0(rho, vxc, weight) + # wv = numint._uks_gga_wv0(rho, vxc, weight) + wv = vxc * weight fmat_(_sorted_mol, v1ao[0], ao, wv[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, v1ao[1], ao, wv[1], mask, shls_slice, ao_loc) @@ -943,55 +1008,70 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): mo_coeff = mf.mo_coeff mo_occ = mf.mo_occ nao = mo_coeff[0].shape[0] - shls_slice = (0, mol.nbas) ao_loc = mol.ao_loc_nr() + opt = getattr(ni, "gdftopt", None) + if opt is None: + ni.build(mol, grids.coords) + opt = ni.gdftopt + _sorted_mol = opt._sorted_mol + mo_coeff = opt.sort_orbitals(mo_coeff, axis=[1]) + dmvo = [(dmvo[0]+dmvo[0].T)*.5, (dmvo[1]+dmvo[1].T)*.5] + dmvo0 = opt.sort_orbitals(dmvo[0], axis=[0, 1]) + dmvo1 = opt.sort_orbitals(dmvo[1], axis=[0, 1]) + f1vo = cp.zeros((2,4,nao,nao)) deriv = 2 + if xctype == "LDA": + fmat_, ao_deriv = tdrks._lda_eval_mat_, 1 + elif xctype == "GGA": + fmat_, ao_deriv = tdrks._gga_eval_mat_, 2 + elif xctype == "MGGA": + fmat_, ao_deriv = tdrks._mgga_eval_mat_, 2 + if xctype == 'LDA': - def lda_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[0], wv) - for k in range(4): - vmat[k] += numint._dot_ao_ao(mol, ao[k], aow, mask, shls_slice, ao_loc) - ao_deriv = 1 for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho = (ni.eval_rho2(mol, ao[0], mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao[0], mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + mo_coeff_mask_a = mo_coeff[0, mask] + mo_coeff_mask_b = mo_coeff[1, mask] + dmvo0_mask = dmvo0[mask[:, None], mask] + dmvo1_mask = dmvo1[mask[:, None], mask] + rho = (ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype)) + vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:3] u_u, u_d, d_d = fxc[0].T * weight - rho1a = ni.eval_rho(mol, ao[0], dmvo[0], mask, xctype, hermi=1) - rho1b = ni.eval_rho(mol, ao[0], dmvo[1], mask, xctype, hermi=1) + rho1a = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype, hermi=1) + rho1b = ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype, hermi=1) - lda_sum_(f1vo[0], ao, u_u*rho1a+u_d*rho1b, mask) - lda_sum_(f1vo[1], ao, u_d*rho1a+d_d*rho1b, mask) + fmat_(_sorted_mol, f1vo[0], ao, u_u*rho1a+u_d*rho1b, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1], ao, u_d*rho1a+d_d*rho1b, mask, shls_slice, ao_loc) elif xctype == 'GGA': - def gga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv, mask, ao_loc) - ao_deriv = 2 for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:3] + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + mo_coeff_mask_a = mo_coeff[0, mask] + mo_coeff_mask_b = mo_coeff[1, mask] + dmvo0_mask = dmvo0[mask[:, None], mask] + dmvo1_mask = dmvo1[mask[:, None], mask] + rho = cp.stack((ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype))) + vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:3] - rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) - wv = numint._uks_gga_wv1(rho, rho1, vxc, fxc, weight) - gga_sum_(f1vo[0], ao, wv[0], mask) - gga_sum_(f1vo[1], ao, wv[1], mask) + rho1 = cp.stack((ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=1), + ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=1))) + tmp = contract("axg,axbyg->byg", rho1, fxc) + wv = contract("byg,g->byg", tmp, weight) + fmat_(_sorted_mol, f1vo[0], ao, wv[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1], ao, wv[1], mask, shls_slice, ao_loc) elif xctype == 'MGGA': + raise NotImplementedError(f'td-uks for functional {xc_code}') def mgga_sum_(vmat, ao, wv, mask): aow = numint._scale_ao(ao[:4], wv[:4]) tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) @@ -1028,11 +1108,12 @@ def mgga_sum_(vmat, ao, wv, mask): raise NotImplementedError(f'td-uks for functional {xc_code}') f1vo[:,1:] *= -1 + f1vo = opt.unsort_orbitals(f1vo, axis=[3, 4]) return f1vo class Gradients(tdrhf_grad.Gradients): @lib.with_doc(grad_elec.__doc__) - def grad_elec(self, xy, singlet=None, atmlst=None): + def grad_elec(self, xy, singlet=None, atmlst=None, verbose=None): return grad_elec(self, xy, atmlst, self.max_memory, self.verbose) Grad = Gradients diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index 18ede0e32..5636d60f2 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -71,16 +71,24 @@ def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): nvar = 1 else: nvar = rho_tmz.shape[1] - # spin-flip part - fxc_sf = 0.0 + fxc_sf = cp.zeros((nvar,nvar,ngrids)) + kxc_sf = cp.zeros((nvar,nvar,2,nvar,ngrids)) + rho = _project_spin_paxis2(rho_tmz, sgridz) - fxc = func(rho, deriv)[2] - fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, weights.size) - if not isinstance(fxc, cp.ndarray): - fxc = cp.array(fxc) - fxc_sf += fxc[1,:,1].dot(weights) - - return None,None,fxc_sf + xc_orig = func(rho, deriv) + if deriv > 1: + fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids, weights.size) + if not isinstance(fxc, cp.ndarray): + fxc = cp.array(fxc) + fxc_sf += fxc[1,:,1].dot(weights) + + if deriv > 2: + kxc = xc_orig[3].reshape(2, nvar, 2, nvar, 2, nvar, ngrids, weights.size) + if not isinstance(kxc, cp.ndarray): + kxc = cp.array(kxc) + kxc_sf[:,:,0] += kxc[1,:,1,:,0].dot(weights) + kxc_sf[:,:,1] += kxc[1,:,1,:,1].dot(weights*sgridz) + return None,None,fxc_sf,kxc_sf def _project_spin_paxis2(rho_tm, sgridz=None): diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index d7dc3d830..0e6439b5c 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -1022,6 +1022,13 @@ def get_ab(self, mf=None, mo_energy=None, mo_coeff=None, mo_occ=None, collinear= return get_ab_sf(mf, mo_energy=mo_energy, mo_coeff=mo_coeff, mo_occ=mo_occ, collinear=collinear, collinear_samples=collinear_samples) + def Gradients(self): + if getattr(self._scf, 'with_df', None): + raise NotImplementedError('DFT TD-SCF gradients are not implemented') + else: + from gpu4pyscf.grad import tduks_sf + return tduks_sf.Gradients(self) + def gen_tdhf_operation(td, mf, fock_ao=None, singlet=True, wfnsym=None): '''Generate function to compute From d48bb899c5fbe7a477f59d63e92c7963187cf88d Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 23 Oct 2025 16:37:34 +0800 Subject: [PATCH 19/32] finish debuging gga-xc part --- gpu4pyscf/grad/tduks_sf.py | 210 +++++++++---------------------------- 1 file changed, 50 insertions(+), 160 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index cc53f0f3d..4cbeaea37 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -18,10 +18,11 @@ from pyscf import lib from gpu4pyscf.lib import logger from gpu4pyscf.scf import ucphf -from pyscf.dft import numint +from gpu4pyscf.dft import numint from gpu4pyscf.df import int3c2e -from gpu4pyscf.lib.cupy_helper import contract +from gpu4pyscf.lib.cupy_helper import contract, add_sparse from gpu4pyscf.grad import rks as rks_grad +from gpu4pyscf.grad import rhf as rhf_grad from gpu4pyscf.grad import tdrhf as tdrhf_grad from gpu4pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf, mcfun_eval_xc_adapter_sf from gpu4pyscf.grad import tdrks @@ -100,7 +101,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, (dmxpy_ab, dmxmy_ab), - (dmzoo_a,dmzoo_b), True, True, max_memory) + (dmzoo_a,dmzoo_b), True, True, max_memory, td_grad.base.extype) k1ao_xpy, k1ao_xmy = k1ao # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf @@ -191,7 +192,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, (dmxpy_ba, dmxmy_ba), - (dmzoo_a,dmzoo_b), True, True, max_memory) + (dmzoo_a,dmzoo_b), True, True, max_memory, td_grad.base.extype) k1ao_xpy, k1ao_xmy = k1ao # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf @@ -341,8 +342,8 @@ def fvind(x): im0b = reduce(cp.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) im0 = im0a + im0b - dmz1doo_a = z1ao[0] + dmzoo_a - dmz1doo_b = z1ao[1] + dmzoo_b + dmz1dooa = z1ao[0] + dmzoo_a + dmz1doob = z1ao[1] + dmzoo_b oo0a = reduce(cp.dot, (orboa, orboa.T)) oo0b = reduce(cp.dot, (orbob, orbob.T)) @@ -350,21 +351,21 @@ def fvind(x): h1 = cp.asarray(mf_grad.get_hcore(mol)) # without 1/r like terms s1 = cp.asarray(mf_grad.get_ovlp(mol)) dh_ground = contract("xij,ij->xi", h1, oo0a + oo0b) - dh_td = contract("xij,ij->xi", h1, (dmz1doo_a + dmz1doo_b) * .5) + dh_td = contract("xij,ij->xi", h1, (dmz1dooa + dmz1doob) * 0.25 + (dmz1dooa + dmz1doob).T * 0.25) ds = contract("xij,ij->xi", s1, (im0 + im0.T) * 0.5) dh1e_ground = int3c2e.get_dh1e(mol, oo0a + oo0b) # 1/r like terms if mol.has_ecp(): dh1e_ground += rhf_grad.get_dh1e_ecp(mol, oo0a + oo0b) # 1/r like terms - dh1e_td = int3c2e.get_dh1e(mol, (dmz1doo_a + dmz1doo_b) * 0.25 + (dmz1doo_a + dmz1doo_b).T * 0.25) # 1/r like terms + dh1e_td = int3c2e.get_dh1e(mol, (dmz1dooa + dmz1doob) * 0.25 + (dmz1dooa + dmz1doob).T * 0.25) # 1/r like terms if mol.has_ecp(): dh1e_td += rhf_grad.get_dh1e_ecp( - mol, (dmz1doo_a + dmz1doo_b) * 0.25 + (dmz1doo_a + dmz1doo_b).T * 0.25) # 1/r like terms + mol, (dmz1dooa + dmz1doob) * 0.25 + (dmz1dooa + dmz1doob).T * 0.25) # 1/r like terms if td_grad.base.extype == 0: if abs(hyb) > 1e-10: - dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ab.T.get(), -dmxmy_ab.T.get(), - oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ab.get(), dmxmy_ab.get()) + dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ab.T.get(), -dmxmy_ab.T.get(), + oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ab.get(), dmxmy_ab.get()) vj, vk = mf_grad.get_jk(mol, dm) if not isinstance(vj, cp.ndarray): vj = cp.asarray(vj) @@ -382,8 +383,8 @@ def fvind(x): veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] else: - dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ab.T.get(), - oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ab) + dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ab.T.get(), + oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ab) vj = mf_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) if not isinstance(vj, cp.ndarray): vj = cp.asarray(vj) @@ -392,8 +393,8 @@ def fvind(x): veff1[:,:3] = vj[0] + vj[1] elif td_grad.base.extype == 1: if abs(hyb) > 1e-10: - dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ba.get(), dmxmy_ba.get(), - oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ba.T.get(), -dmxmy_ba.T.get()) + dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ba.get(), dmxmy_ba.get(), + oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ba.T.get(), -dmxmy_ba.T.get()) vj, vk = mf_grad.get_jk(mol, dm) if not isinstance(vj, cp.ndarray): vj = cp.asarray(vj) @@ -411,8 +412,8 @@ def fvind(x): veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] else: - dm = (oo0a.get(), (dmz1doo_a+dmz1doo_a.T).get(), dmxpy_ba.get(), - oo0b.get(), (dmz1doo_b+dmz1doo_b.T).get(), dmxpy_ba.T.get()) + dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ba.get(), + oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ba.T.get()) vj = mf_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) if not isinstance(vj, cp.ndarray): vj = cp.asarray(vj) @@ -441,7 +442,7 @@ def fvind(x): delec = 2.0 * (dh_ground + dh_td - ds) aoslices = mol.aoslice_by_atom() delec = cp.asarray([cp.sum(delec[:, p0:p1], axis=1) for p0, p1 in aoslices[:, 2:]]) - de += delec + de += delec + dh1e_ground + dh1e_td for k, ia in enumerate(atmlst): shl0, shl1, p0, p1 = offsetdic[ia] @@ -451,10 +452,10 @@ def fvind(x): de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) - de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1doo_a[p0:p1]) *.5 - de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doo_b[p0:p1]) *.5 - de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1doo_a[:,p0:p1]) *.5 - de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doo_b[:,p0:p1]) *.5 + de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1dooa[p0:p1]) *.5 + de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doob[p0:p1]) *.5 + de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1dooa[:,p0:p1]) *.5 + de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doob[:,p0:p1]) *.5 de[k] += cp.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 de[k] += cp.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 @@ -547,7 +548,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, if xctype == "LDA": fmat_, ao_deriv = tdrks._lda_eval_mat_, 1 elif xctype == "GGA": - fmat_, ao_deriv = tdrks._gga_eval_mat_, 2 + fmat_, ao_deriv = _gga_eval_mat_, 2 elif xctype == "MGGA": fmat_, ao_deriv = tdrks._mgga_eval_mat_, 2 @@ -664,7 +665,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fxc_sf, kxc_sf = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[2:4] if extype == 0: - rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype) + rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=0, with_lapl=False) wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) @@ -675,7 +676,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fmat_(_sorted_mol, k1ao_xpy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xpy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) - rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype) + rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=0, with_lapl=False) wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[1][1], ao, wv_sf, mask, shls_slice, ao_loc) @@ -686,8 +687,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fmat_(_sorted_mol, k1ao_xmy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xmy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) elif extype == 1: - rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype) - + rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=0, with_lapl=False) wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) @@ -697,7 +697,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fmat_(_sorted_mol, k1ao_xpy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao_xpy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) - rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype) + rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=0, with_lapl=False) wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[1][1], ao, -wv_sf, mask, shls_slice, ao_loc) @@ -719,113 +719,18 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, # wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) tmp = contract("axg,axbyg->byg", rho2, fxc) wv = contract("byg,g->byg", tmp, weight) + wv[:,0] *= 0.5 fmat_(_sorted_mol, f1oo[0], ao, wv[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1oo[1], ao, wv[1], mask, shls_slice, ao_loc) if with_vxc: # wv = numint._uks_gga_wv0(rho, vxc, weight) wv = vxc * weight + wv[:,0] *= 0.5 fmat_(_sorted_mol, v1ao[0], ao, wv[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, v1ao[1], ao, wv[1], mask, shls_slice, ao_loc) elif xctype == 'MGGA': raise NotImplementedError('MGGA not implemented') - def mgga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - - aow = numint._scale_ao(ao[1], wv[4], aow) - tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[2], wv[4], aow) - tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[3], wv[4], aow) - tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) - rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[4]*2, mask, ao_loc, True) - - ao_deriv = 2 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - ngrid=weight.shape[-1] - if extype == 0: - pass - elif extype == 1: - pass - - rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[0][0], mask, xctype) - rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[0][1], mask, xctype) - # Padding for laplacian - rho1_ab = cp.empty((5, ngrid)) - rho1_ba = cp.empty((5, ngrid)) - rho1_ab[:4] = rho1_ab_tmp[:4] - rho1_ba[:4] = rho1_ba_tmp[:4] - rho1_ab[4] = rho1_ab_tmp[5] - rho1_ba[4] = rho1_ba_tmp[5] - - wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) - mgga_sum_(f1vo[0][1], ao, wv_sf[0]+wv_sf[1], mask) - mgga_sum_(f1vo[0][0], ao, wv_sf[1]+wv_sf[0], mask) - - if with_kxc: - gv_sf = uks_sf_mgga_wv2_p((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) - mgga_sum_(k1ao_xpy[0][0], ao, gv_sf[0][0], mask) - mgga_sum_(k1ao_xpy[0][1], ao, gv_sf[1][0], mask) - mgga_sum_(k1ao_xpy[1][0], ao, gv_sf[0][1], mask) - mgga_sum_(k1ao_xpy[1][1], ao, gv_sf[1][1], mask) - - rho1_ab_tmp = ni.eval_rho(mol, ao, dmvo[1][0], mask, xctype) - rho1_ba_tmp = ni.eval_rho(mol, ao, dmvo[1][1], mask, xctype) - # Padding for laplacian - rho1_ab = cp.empty((5, ngrid)) - rho1_ba = cp.empty((5, ngrid)) - rho1_ab[:4] = rho1_ab_tmp[:4] - rho1_ba[:4] = rho1_ba_tmp[:4] - rho1_ab[4] = rho1_ab_tmp[5] - rho1_ba[4] = rho1_ba_tmp[5] - - wv_sf = uks_sf_mgga_wv1((rho1_ab,rho1_ba), fxc_sf[...,p0:p1],weight) - mgga_sum_(f1vo[1][1], ao, wv_sf[0]-wv_sf[1], mask) - mgga_sum_(f1vo[1][0], ao, wv_sf[1]-wv_sf[0], mask) - - if with_kxc: - gv_sf = uks_sf_mgga_wv2_m((rho1_ab,rho1_ba), kxc_sf[...,p0:p1], weight) - mgga_sum_(k1ao_xmy[0][0], ao, gv_sf[0][0], mask) - mgga_sum_(k1ao_xmy[0][1], ao, gv_sf[1][0], mask) - mgga_sum_(k1ao_xmy[1][0], ao, gv_sf[0][1], mask) - mgga_sum_(k1ao_xmy[1][1], ao, gv_sf[1][1], mask) - - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] - - if dmoo is not None: - rho2 = (ni.eval_rho(mol, ao, dmoo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmoo[1], mask, xctype, hermi=1)) - wv_tmp = numint._uks_mgga_wv1(rho, rho2, vxc, fxc, weight) - # # Padding for laplacian - wv = cp.empty((2,5,ngrid)) - wv[0][:4] = wv_tmp[0][:4] - wv[0][4] = wv_tmp[0][5] - wv[1][:4] = wv_tmp[1][:4] - wv[1][4] = wv_tmp[1][5] - - mgga_sum_(f1oo[0], ao, wv[0], mask) - mgga_sum_(f1oo[1], ao, wv[1], mask) - - if with_vxc: - wv_tmp = numint._uks_mgga_wv0(rho, vxc, weight) - # # Padding for laplacian - wv = cp.empty((2,5,ngrid)) - wv[0][:4] = wv_tmp[0][:4] - wv[0][4] = wv_tmp[0][5] - wv[1][:4] = wv_tmp[1][:4] - wv[1][4] = wv_tmp[1][5] - - mgga_sum_(v1ao[0], ao, wv[0], mask) - mgga_sum_(v1ao[1], ao, wv[1], mask) else: raise NotImplementedError(f'td-uks for functional {xc_code}') @@ -846,6 +751,17 @@ def mgga_sum_(vmat, ao, wv, mask): return f1vo, f1oo, v1ao, (k1ao_xpy,k1ao_xmy) +def _gga_eval_mat_(mol, vmat, ao, wv, mask, shls_slice, ao_loc): + # wv[0] *= 0.5 # *.5 because vmat + vmat.T at the end + aow = numint._scale_ao(ao[:4], wv[:4]) + tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + vtmp = tmp + tmp.T + add_sparse(vmat[0], vtmp, mask) + wv = cp.asarray(wv, order="C") + vtmp = rks_grad._gga_grad_sum_(ao, wv) + add_sparse(vmat[1:], vtmp, mask) + return vmat + def uks_sf_gga_wv1(rho1, fxc_sf, weight, extype): # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. if extype == 0: @@ -1030,7 +946,7 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): if xctype == "LDA": fmat_, ao_deriv = tdrks._lda_eval_mat_, 1 elif xctype == "GGA": - fmat_, ao_deriv = tdrks._gga_eval_mat_, 2 + fmat_, ao_deriv = _gga_eval_mat_, 2 elif xctype == "MGGA": fmat_, ao_deriv = tdrks._mgga_eval_mat_, 2 @@ -1059,48 +975,22 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] dmvo1_mask = dmvo1[mask[:, None], mask] - rho = cp.stack((ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype), - ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype))) + with_lapl = False + rho = cp.stack((ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype, with_lapl), + ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype, with_lapl))) vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:3] - rho1 = cp.stack((ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=1), - ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=1))) + rho1 = cp.stack(( + ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=1, with_lapl=with_lapl), + ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=1, with_lapl=with_lapl))) tmp = contract("axg,axbyg->byg", rho1, fxc) wv = contract("byg,g->byg", tmp, weight) + wv[:, 0] *= 0.5 fmat_(_sorted_mol, f1vo[0], ao, wv[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[1], ao, wv[1], mask, shls_slice, ao_loc) elif xctype == 'MGGA': raise NotImplementedError(f'td-uks for functional {xc_code}') - def mgga_sum_(vmat, ao, wv, mask): - aow = numint._scale_ao(ao[:4], wv[:4]) - tmp = numint._dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) - - aow = numint._scale_ao(ao[1], wv[5], aow) - tmp += numint._dot_ao_ao(mol, ao[1], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[2], wv[5], aow) - tmp += numint._dot_ao_ao(mol, ao[2], aow, mask, shls_slice, ao_loc) - aow = numint._scale_ao(ao[3], wv[5], aow) - tmp += numint._dot_ao_ao(mol, ao[3], aow, mask, shls_slice, ao_loc) - vmat[0] += tmp + tmp.T - - rks_grad._gga_grad_sum_(vmat[1:], mol, ao, wv[:4], mask, ao_loc) - rks_grad._tau_grad_dot_(vmat[1:], mol, ao, wv[5]*2, mask, ao_loc, True) - - ao_deriv = 2 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho = (ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype), - ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) - vxc, fxc, kxc = ni.eval_xc(xc_code, rho, 1, deriv=deriv)[1:] - - rho1 = (ni.eval_rho(mol, ao, dmvo[0], mask, xctype, hermi=1), - ni.eval_rho(mol, ao, dmvo[1], mask, xctype, hermi=1)) - wv = numint._uks_mgga_wv1(rho, rho1, vxc, fxc, weight) - mgga_sum_(f1vo[0], ao, wv[0], mask) - mgga_sum_(f1vo[1], ao, wv[1], mask) - - vxc = fxc = rho = rho1 = None elif xctype == 'HF': pass @@ -1108,7 +998,7 @@ def mgga_sum_(vmat, ao, wv, mask): raise NotImplementedError(f'td-uks for functional {xc_code}') f1vo[:,1:] *= -1 - f1vo = opt.unsort_orbitals(f1vo, axis=[3, 4]) + f1vo = opt.unsort_orbitals(f1vo, axis=[2, 3]) return f1vo class Gradients(tdrhf_grad.Gradients): From babbf1c152f830137899e63322d0c0cef20b3171 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 24 Oct 2025 11:44:29 +0800 Subject: [PATCH 20/32] finish debug --- gpu4pyscf/grad/tduks_sf.py | 174 +++++++++------------ gpu4pyscf/grad/tests/test_tduks_sf_grad.py | 47 ++++++ 2 files changed, 121 insertions(+), 100 deletions(-) create mode 100644 gpu4pyscf/grad/tests/test_tduks_sf_grad.py diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 4cbeaea37..92de515cf 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -361,68 +361,38 @@ def fvind(x): if mol.has_ecp(): dh1e_td += rhf_grad.get_dh1e_ecp( mol, (dmz1dooa + dmz1doob) * 0.25 + (dmz1dooa + dmz1doob).T * 0.25) # 1/r like terms - + if atmlst is None: + atmlst = range(mol.natm) + dvhf_all = 0 + j_factor = 1.0 + k_factor = 0.0 + with_k = ni.libxc.is_hybrid_xc(mf.xc) + if with_k: + k_factor = hyb + dvhf = td_grad.get_veff(mol, cp.stack(((dmz1dooa + dmz1dooa.T) * 0.25 + oo0a, + (dmz1doob + dmz1doob.T) * 0.25 + oo0b,)), j_factor, k_factor) + dvhf_all += dvhf + dvhf = td_grad.get_veff(mol, cp.stack(((dmz1dooa + dmz1dooa.T), (dmz1doob + dmz1doob.T))) * 0.25, + j_factor, k_factor) + dvhf_all -= dvhf if td_grad.base.extype == 0: - if abs(hyb) > 1e-10: - dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ab.T.get(), -dmxmy_ab.T.get(), - oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ab.get(), dmxmy_ab.get()) - vj, vk = mf_grad.get_jk(mol, dm) - if not isinstance(vj, cp.ndarray): - vj = cp.asarray(vj) - if not isinstance(vk, cp.ndarray): - vk = cp.asarray(vk) - vj = vj.reshape(2,4,3,nao,nao) - vk = vk.reshape(2,4,3,nao,nao) * hyb - vj[:,2:4] *= 0.0 - if abs(omega) > 1e-10: - with mol.with_range_coulomb(omega): - vk += mf_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) - if not isinstance(vk, cp.ndarray): - vk = cp.asarray(vk) + dvhf = td_grad.get_veff(mol, cp.stack(((dmxpy_ab + dmxpy_ab.T), (dmxpy_ab + dmxpy_ab.T))) * 0.5, + 0.0, k_factor) + dvhf_all += dvhf * 1 + dvhf = td_grad.get_veff(mol, cp.stack(((dmxmy_ab - dmxmy_ab.T), (dmxmy_ab - dmxmy_ab.T))) * 0.5, + j_factor=0.0, k_factor=k_factor, hermi=2) + dvhf_all += dvhf * 1 - veff1 = cp.zeros((2,4,3,nao,nao)) - veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] - else: - dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ab.T.get(), - oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ab) - vj = mf_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) - if not isinstance(vj, cp.ndarray): - vj = cp.asarray(vj) - vj[:,2] *= 0.0 - veff1 = cp.zeros((2,4,3,nao,nao)) - veff1[:,:3] = vj[0] + vj[1] elif td_grad.base.extype == 1: - if abs(hyb) > 1e-10: - dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ba.get(), dmxmy_ba.get(), - oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ba.T.get(), -dmxmy_ba.T.get()) - vj, vk = mf_grad.get_jk(mol, dm) - if not isinstance(vj, cp.ndarray): - vj = cp.asarray(vj) - if not isinstance(vk, cp.ndarray): - vk = cp.asarray(vk) - vj = vj.reshape(2,4,3,nao,nao) - vk = vk.reshape(2,4,3,nao,nao) * hyb - vj[:,2:4] *= 0.0 - if abs(omega) > 1e-10: - with mol.with_range_coulomb(omega): - vk += mf_grad.get_k(mol, dm).reshape(2,4,3,nao,nao) * (alpha-hyb) - if not isinstance(vk, cp.ndarray): - vk = cp.asarray(vk) - - veff1 = cp.zeros((2,4,3,nao,nao)) - veff1[:,:2] = vj[0,:2] + vj[1,:2] - vk[:,:2] - else: - dm = (oo0a.get(), (dmz1dooa+dmz1dooa.T).get(), dmxpy_ba.get(), - oo0b.get(), (dmz1doob+dmz1doob.T).get(), dmxpy_ba.T.get()) - vj = mf_grad.get_j(mol, dm).reshape(2,3,3,nao,nao) - if not isinstance(vj, cp.ndarray): - vj = cp.asarray(vj) - vj[:,2] *= 0.0 - veff1 = cp.zeros((2,4,3,nao,nao)) - veff1[:,:3] = vj[0] + vj[1] + dvhf = td_grad.get_veff(mol, cp.stack(((dmxpy_ba + dmxpy_ba.T), (dmxpy_ba.T + dmxpy_ba))) * 0.5, + 0.0, k_factor) + dvhf_all += dvhf * 1 + dvhf = td_grad.get_veff(mol, cp.stack(((dmxmy_ba - dmxmy_ba.T), (-dmxmy_ba.T + dmxmy_ba))) * 0.5, + j_factor=0.0, k_factor=k_factor, hermi=2) + dvhf_all += dvhf * 1 fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) - + veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,0] += vxc1[:,1:] veff1[:,1] += (f1oo[:,1:] + fxcz1[:,1:])*2 veff1[0,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] + k1ao_xpy[1,0,1:] + k1ao_xpy[1,1,1:] @@ -442,51 +412,55 @@ def fvind(x): delec = 2.0 * (dh_ground + dh_td - ds) aoslices = mol.aoslice_by_atom() delec = cp.asarray([cp.sum(delec[:, p0:p1], axis=1) for p0, p1 in aoslices[:, 2:]]) - de += delec + dh1e_ground + dh1e_td - for k, ia in enumerate(atmlst): - shl0, shl1, p0, p1 = offsetdic[ia] - - # Ground state gradients - de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], oo0a[p0:p1]) - de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], oo0b[p0:p1]) - de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], oo0a[:,p0:p1]) - de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], oo0b[:,p0:p1]) - - de[k] += cp.einsum('xpq,pq->x', veff1a[0,:,p0:p1], dmz1dooa[p0:p1]) *.5 - de[k] += cp.einsum('xpq,pq->x', veff1b[0,:,p0:p1], dmz1doob[p0:p1]) *.5 - de[k] += cp.einsum('xpq,qp->x', veff1a[0,:,p0:p1], dmz1dooa[:,p0:p1]) *.5 - de[k] += cp.einsum('xpq,qp->x', veff1b[0,:,p0:p1], dmz1doob[:,p0:p1]) *.5 - - de[k] += cp.einsum('xij,ij->x', veff1a[1,:,p0:p1], oo0a[p0:p1]) *0.5 - de[k] += cp.einsum('xij,ij->x', veff1b[1,:,p0:p1], oo0b[p0:p1]) *0.5 - if td_grad.base.extype == 0: - de[k] += cp.einsum('xij,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) - de[k] += cp.einsum('xji,ij->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) - - de[k] += cp.einsum('xij,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) - de[k] += cp.einsum('xji,ij->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) - - if abs(hyb) > 1e-10: - de[k] -= cp.einsum('xij,ij->x', vk[1,2,:,p0:p1], dmxpy_ab[p0:p1,:]) - de[k] -= cp.einsum('xji,ij->x', vk[0,2,:,p0:p1], dmxpy_ab[:,p0:p1]) - - de[k] -= cp.einsum('xij,ij->x', vk[1,3,:,p0:p1], dmxmy_ab[p0:p1,:]) - de[k] += cp.einsum('xji,ij->x', vk[0,3,:,p0:p1], dmxmy_ab[:,p0:p1]) - elif td_grad.base.extype == 1: - de[k] += cp.einsum('xij,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) - de[k] += cp.einsum('xji,ij->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) - - de[k] += cp.einsum('xij,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) - de[k] += cp.einsum('xji,ij->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) - - if abs(hyb) > 1e-10: - de[k] -= cp.einsum('xij,ij->x', vk[0,2,:,p0:p1], dmxpy_ba[p0:p1,:]) - de[k] -= cp.einsum('xji,ij->x', vk[1,2,:,p0:p1], dmxpy_ba[:,p0:p1]) - - de[k] -= cp.einsum('xij,ij->x', vk[0,3,:,p0:p1], dmxmy_ba[p0:p1,:]) - de[k] += cp.einsum('xji,ij->x', vk[1,3,:,p0:p1], dmxmy_ba[:,p0:p1]) + deveff0 = cp.asarray( + [contract("xpq,pq->x", veff1a[0,:,p0:p1], oo0a[p0:p1] + dmz1dooa[p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) + deveff0 += cp.asarray( + [contract("xpq,pq->x", veff1b[0,:,p0:p1], oo0b[p0:p1] + dmz1doob[p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) + deveff0 += cp.asarray( + [contract("xpq,qp->x", veff1a[0,:,p0:p1], oo0a[:,p0:p1] + dmz1dooa[:,p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) + deveff0 += cp.asarray( + [contract("xpq,qp->x", veff1b[0,:,p0:p1], oo0b[:,p0:p1] + dmz1doob[:,p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) + + deveff1 = cp.asarray( + [contract("xpq,pq->x", veff1a[1,:,p0:p1], oo0a[p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) + deveff1 += cp.asarray( + [contract("xpq,pq->x", veff1b[1,:,p0:p1], oo0b[p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) + if td_grad.base.extype == 0: + deveff2 = cp.asarray( + [contract('xpq,pq->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) + for p0, p1 in aoslices[:, 2:]]) + deveff2 += cp.asarray( + [contract('xqp,pq->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) + for p0, p1 in aoslices[:, 2:]]) + deveff3 = cp.asarray( + [contract('xpq,pq->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) + for p0, p1 in aoslices[:, 2:]]) + deveff3 += cp.asarray( + [contract('xqp,pq->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) + for p0, p1 in aoslices[:, 2:]]) + elif td_grad.base.extype == 1: + deveff2 = cp.asarray( + [contract('xpq,pq->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) + for p0, p1 in aoslices[:, 2:]]) + deveff2 += cp.asarray( + [contract('xqp,pq->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) + for p0, p1 in aoslices[:, 2:]]) + deveff3 = cp.asarray( + [contract('xpq,pq->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) + for p0, p1 in aoslices[:, 2:]]) + deveff3 += cp.asarray( + [contract('xqp,pq->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) + for p0, p1 in aoslices[:, 2:]]) + + de += 2.0 * dvhf_all + delec + dh1e_ground + dh1e_td + deveff0 + deveff1 + deveff2 + deveff3 log.timer('TDUKS nuclear gradients', *time0) return de.get() diff --git a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py new file mode 100644 index 000000000..6225fefbc --- /dev/null +++ b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py @@ -0,0 +1,47 @@ +# Copyright 2021-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pyscf +import numpy as np +import unittest +import pytest +from pyscf import scf, dft, tdscf +import gpu4pyscf +from gpu4pyscf import scf as gpu_scf +from packaging import version +from gpu4pyscf.lib.multi_gpu import num_devices + +atom = """ +O 0.0000000000 0.0000000000 0.0000000000 +H 0.0000000000 -0.7570000000 0.5870000000 +H 0.0000000000 0.7570000000 0.5870000000 +""" + +pyscf_25 = version.parse(pyscf.__version__) <= version.parse("2.5.0") + +bas0 = "cc-pvdz" + +def diagonalize_tda(a, nroots=5): + nocc, nvir = a.shape[:2] + nov = nocc * nvir + a = a.reshape(nov, nov) + e, xy = np.linalg.eig(np.asarray(a)) + sorted_indices = np.argsort(e) + + e_sorted = e[sorted_indices] + xy_sorted = xy[:, sorted_indices] + + e_sorted_final = e_sorted[e_sorted > 1e-3] + xy_sorted = xy_sorted[:, e_sorted > 1e-3] + return e_sorted_final[:nroots], xy_sorted[:, :nroots] \ No newline at end of file From 4a58b7f6b744163a05a0ce89e75d2249bcd9d728 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 24 Oct 2025 17:03:41 +0800 Subject: [PATCH 21/32] simplify the codes --- gpu4pyscf/grad/tduks_sf.py | 652 +++++++++++-------------------------- 1 file changed, 198 insertions(+), 454 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 92de515cf..49cdf2fd5 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -74,194 +74,129 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): nmoa = nocca + nvira nmob = noccb + nvirb - if td_grad.base.extype == 0: # spin-flip-up - # x_ab, a means vira, b means occb - x_ab, y_ab = x_y - if not isinstance(x_ab, cp.ndarray): - x_ab = cp.asarray(x_ab) - xpy_ab = x_ab.T - xmy_ab = x_ab.T + x, y = x_y + if not isinstance(x, cp.ndarray): + x = cp.asarray(x) + x = x.T - dvv_a = cp.einsum('ai,bi->ab', xpy_ab, xpy_ab) + cp.einsum('ai,bi->ab', xmy_ab, xmy_ab) # T^{ab \alpha \beta}*2 - doo_b =-cp.einsum('ai,aj->ij', xpy_ab, xpy_ab) - cp.einsum('ai,aj->ij', xmy_ab, xmy_ab) # T^{ij \alpha \beta}*2 - - dmxpy_ab = reduce(cp.dot, (orbva, xpy_ab, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} - dmxmy_ab = reduce(cp.dot, (orbva, xmy_ab, orbob.T)) # ua ai iv -> uv -> (X-Y)_{uv \alpha \beta} + ni = mf._numint + ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) + if td_grad.base.extype == 0: # spin-flip-up + dvv_a = cp.einsum('ai,bi->ab', x, x) * 2 + doo_b =-cp.einsum('ai,aj->ij', x, x) * 2 + dmx = reduce(cp.dot, (orbva, x, orbob.T)) # ua ai iv -> uv -> (X+Y)_{uv \alpha \beta} dmzoo_b = reduce(cp.dot, (orbob, doo_b, orbob.T)) # \sum_{\sigma ab} 2*Tij \sigma C_{iu} C_{iu} dmzoo_a = reduce(cp.dot, (orbva, dvv_a, orbva.T)) - - ni = mf._numint - ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) - omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - - # used by mcfun. - rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, - mo_coeff, mo_occ, spin=1) - - f1vo, f1oo, vxc1, k1ao = \ - _contract_xc_kernel(td_grad, mf.xc, (dmxpy_ab, dmxmy_ab), - (dmzoo_a,dmzoo_b), True, True, max_memory, td_grad.base.extype) - k1ao_xpy, k1ao_xmy = k1ao - - # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf - # f1oo, (2,4,nao,nao), 2T with fxc_sc - # vxc1, ao with v1^{\sigma} - # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc - - if abs(hyb) > 1e-10: - dm = (dmzoo_a, dmxpy_ab.T, -dmxmy_ab.T, - dmzoo_b, dmxpy_ab, dmxmy_ab) - vj, vk = mf.get_jk(mol, dm, hermi=0) - vk *= hyb - if abs(omega) > 1e-10: - vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) - vj = vj.reshape(2,3,nao,nao) - vk = vk.reshape(2,3,nao,nao) - - veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] - veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 - - veff = - vk[:,1] + f1vo[0,:,0] - veff0mop_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvob += cp.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - wvoa -= cp.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - - veff = -vk[:,2] + f1vo[1,:,0] - veff0mom_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvob += cp.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - wvoa -= cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 - - else: - dm = (dmzoo_a, dmxpy_ab.T, -dmxmy_ab.T, - dmzoo_b, dmxpy_ab, dmxmy_ab) - vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) - - veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] - veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 - - veff = f1vo[0,:,0] - veff0mop_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvob += cp.einsum('ca,ci->ai', veff0mop_ab[nocca:,noccb:], xpy_ab) *2 - wvoa -= cp.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) *2 - - veff = f1vo[1,:,0] - veff0mom_ab = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) - - wvob += cp.einsum('ca,ci->ai', veff0mom_ab[nocca:,noccb:], xmy_ab) *2 - wvoa -= cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) *2 elif td_grad.base.extype == 1: # spin-flip-down - # x_ab, a means vira, b means occb - x_ba, y_ba = x_y - if not isinstance(x_ba, cp.ndarray): - x_ba = cp.asarray(x_ba) - xpy_ba = x_ba.T - xmy_ba = x_ba.T - - dvv_b = cp.einsum('ai,bi->ab', xpy_ba, xpy_ba) + cp.einsum('ai,bi->ab', xmy_ba, xmy_ba) # T^{ab \beta \alpha}*2 - doo_a =-cp.einsum('ai,aj->ij', xpy_ba, xpy_ba) - cp.einsum('ai,aj->ij', xmy_ba, xmy_ba) # T^{ij \beta \alpha}*2 - - dmxpy_ba = reduce(cp.dot, (orbvb, xpy_ba, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} - dmxmy_ba = reduce(cp.dot, (orbvb, xmy_ba, orboa.T)) # ua ai iv -> uv -> (X-Y)_{uv \beta \alpha} - + dvv_b = cp.einsum('ai,bi->ab', x, x) * 2 + doo_a =-cp.einsum('ai,aj->ij', x, x) * 2 + dmx = reduce(cp.dot, (orbvb, x, orboa.T)) # ua ai iv -> uv -> (X+Y)_{uv \beta \alpha} dmzoo_a = reduce(cp.dot, (orboa, doo_a, orboa.T)) # \sum_{\sigma ab} 2*Tab \sigma C_{au} C_{bu} dmzoo_b = reduce(cp.dot, (orbvb, dvv_b, orbvb.T)) + else: + raise RuntimeError("Only spin-flip UHF/UKS is supported") - ni = mf._numint - ni.libxc.test_deriv_order(mf.xc, 3, raise_error=True) - omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - - # used by mcfun. - rho0, vxc, fxc = ni.cache_xc_kernel(mf.mol, mf.grids, mf.xc, - mo_coeff, mo_occ, spin=1) - - f1vo, f1oo, vxc1, k1ao = \ - _contract_xc_kernel(td_grad, mf.xc, (dmxpy_ba, dmxmy_ba), - (dmzoo_a,dmzoo_b), True, True, max_memory, td_grad.base.extype) - k1ao_xpy, k1ao_xmy = k1ao - - # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf - # f1oo, (2,4,nao,nao), 2T with fxc_sc - # vxc1, ao with v1^{\sigma} - # k1ao_xpy,(2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc - - if abs(hyb) > 1e-10: - dm = (dmzoo_a, dmxpy_ba, dmxmy_ba, - dmzoo_b, dmxpy_ba.T, -dmxmy_ba.T) - vj, vk = mf.get_jk(mol, dm, hermi=0) - if not isinstance(vj, cp.ndarray): - vj = cp.asarray(vj) - if not isinstance(vk, cp.ndarray): - vk = cp.asarray(vk) - vk *= hyb - if abs(omega) > 1e-10: - vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) - vj = vj.reshape(2,3,nao,nao) - vk = vk.reshape(2,3,nao,nao) - - veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] - veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 - + f1vo, f1oo, vxc1, k1ao = \ + _contract_xc_kernel(td_grad, mf.xc, dmx, + (dmzoo_a,dmzoo_b), True, True, max_memory, td_grad.base.extype) + # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf + # f1oo, (2,4,nao,nao), 2T with fxc_sc + # vxc1, ao with v1^{\sigma} + # k1ao, (2,2,4,nao,nao), (X+Y)(X+Y) and (X-Y)(X-Y) with gxc + + if abs(hyb) > 1e-10: + # TODO: This is not supported for density fitting. + if td_grad.base.extype == 0: + dm = (dmzoo_a, dmx.T, -dmx.T, + dmzoo_b, dmx, dmx) + else: # extype == 1 + dm = (dmzoo_a, dmx, dmx, + dmzoo_b, dmx.T, -dmx.T) + + vj, vk = mf.get_jk(mol, dm, hermi=0) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) + if not isinstance(vk, cp.ndarray): + vk = cp.asarray(vk) + + vk *= hyb + if abs(omega) > 1e-10: + vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + vj = vj.reshape(2,3,nao,nao) + vk = vk.reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] - vk[:,0]+ f1oo[:,0] + veff0doo[0] += (k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0] + +k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0]) + veff0doo[1] += (k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0] + +k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0]) + + wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + if td_grad.base.extype == 0: veff = - vk[:,1] + f1vo[0,:,0] - veff0mop_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - - wvoa += cp.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob -= cp.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + veff0mop = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + wvob += cp.einsum('ca,ci->ai', veff0mop[nocca:,noccb:], x) *2 + wvoa -= cp.einsum('il,al->ai', veff0mop[:nocca,:noccb], x) *2 veff = -vk[:,2] + f1vo[1,:,0] - veff0mom_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - - wvoa += cp.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob -= cp.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 - - else: - dm = (dmzoo_a, dmxpy_ba, dmxmy_ba, - dmzoo_b, dmxpy_ba.T, -dmxmy_ba.T) - vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) - - veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] - veff0doo[0] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] + k1ao_xpy[1,0,0] + k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] + k1ao_xmy[1,0,0] + k1ao_xmy[1,1,0]) - veff0doo[1] += (k1ao_xpy[0,0,0] + k1ao_xpy[0,1,0] - k1ao_xpy[1,0,0] - k1ao_xpy[1,1,0] - +k1ao_xmy[0,0,0] + k1ao_xmy[0,1,0] - k1ao_xmy[1,0,0] - k1ao_xmy[1,1,0]) - - wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 - wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 + veff0mom = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + wvob += cp.einsum('ca,ci->ai', veff0mom[nocca:,noccb:], x) *2 + wvoa -= cp.einsum('il,al->ai', veff0mom[:nocca,:noccb], x) *2 + + else: # extype == 1 + veff = - vk[:,1] + f1vo[0,:,0] + veff0mop = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + wvoa += cp.einsum('ca,ci->ai', veff0mop[noccb:,nocca:], x) *2 + wvob -= cp.einsum('il,al->ai', veff0mop[:noccb,:nocca], x) *2 + veff = -vk[:,2] + f1vo[1,:,0] + veff0mom = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + wvoa += cp.einsum('ca,ci->ai', veff0mom[noccb:,nocca:], x) *2 + wvob -= cp.einsum('il,al->ai', veff0mom[:noccb,:nocca], x) *2 + + else: # Pure functional + if td_grad.base.extype == 0: + dm = (dmzoo_a, dmx.T, -dmx.T, + dmzoo_b, dmx, dmx) + else: # extype == 1 + dm = (dmzoo_a, dmx, dmx, + dmzoo_b, dmx.T, -dmx.T) + vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) + + veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] + veff0doo[0] += (k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0] + +k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0]) + veff0doo[1] += (k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0] + +k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0]) + + wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 + wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 + + if td_grad.base.extype == 0: veff = f1vo[0,:,0] - veff0mop_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) - - wvoa += cp.einsum('ca,ci->ai', veff0mop_ba[noccb:,nocca:], xpy_ba) *2 - wvob -= cp.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) *2 + veff0mop = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + wvob += cp.einsum('ca,ci->ai', veff0mop[nocca:,noccb:], x) *2 + wvoa -= cp.einsum('il,al->ai', veff0mop[:nocca,:noccb], x) *2 veff = f1vo[1,:,0] - veff0mom_ba = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + veff0mom = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) + wvob += cp.einsum('ca,ci->ai', veff0mom[nocca:,noccb:], x) *2 + wvoa -= cp.einsum('il,al->ai', veff0mom[:nocca,:noccb], x) *2 + + else: # extype == 1 + veff = f1vo[0,:,0] + veff0mop = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + wvoa += cp.einsum('ca,ci->ai', veff0mop[noccb:,nocca:], x) *2 + wvob -= cp.einsum('il,al->ai', veff0mop[:noccb,:nocca], x) *2 - wvoa += cp.einsum('ca,ci->ai', veff0mom_ba[noccb:,nocca:], xmy_ba) *2 - wvob -= cp.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) *2 - else: - raise RuntimeError("Only spin-flip UHF/UKS is supported") + veff = f1vo[1,:,0] + veff0mom = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) + wvoa += cp.einsum('ca,ci->ai', veff0mom[noccb:,nocca:], x) *2 + wvob -= cp.einsum('il,al->ai', veff0mom[:noccb,:nocca], x) *2 vresp = mf.gen_response(hermi=1) @@ -293,28 +228,26 @@ def fvind(x): im0a = cp.zeros((nmoa,nmoa)) im0b = cp.zeros((nmob,nmob)) + im0a[:nocca,:nocca] = reduce(cp.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 + im0b[:noccb,:noccb] = reduce(cp.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 if td_grad.base.extype == 0: - im0a[:nocca,:nocca] = reduce(cp.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 - im0b[:noccb,:noccb] = reduce(cp.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 - im0b[:noccb,:noccb] += cp.einsum('aj,ai->ij', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 - im0b[:noccb,:noccb] += cp.einsum('aj,ai->ij', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + im0b[:noccb,:noccb] += cp.einsum('aj,ai->ij', veff0mop[nocca:,:noccb], x) *0.5 + im0b[:noccb,:noccb] += cp.einsum('aj,ai->ij', veff0mom[nocca:,:noccb], x) *0.5 - im0a[nocca:,nocca:] = cp.einsum('bi,ai->ab', veff0mop_ab[nocca:,:noccb], xpy_ab) *0.5 - im0a[nocca:,nocca:] += cp.einsum('bi,ai->ab', veff0mom_ab[nocca:,:noccb], xmy_ab) *0.5 + im0a[nocca:,nocca:] = cp.einsum('bi,ai->ab', veff0mop[nocca:,:noccb], x) *0.5 + im0a[nocca:,nocca:] += cp.einsum('bi,ai->ab', veff0mom[nocca:,:noccb], x) *0.5 - im0a[nocca:,:nocca] = cp.einsum('il,al->ai', veff0mop_ab[:nocca,:noccb], xpy_ab) - im0a[nocca:,:nocca] += cp.einsum('il,al->ai', veff0mom_ab[:nocca,:noccb], xmy_ab) + im0a[nocca:,:nocca] = cp.einsum('il,al->ai', veff0mop[:nocca,:noccb], x) + im0a[nocca:,:nocca] += cp.einsum('il,al->ai', veff0mom[:nocca,:noccb], x) elif td_grad.base.extype == 1: - im0a[:nocca,:nocca] = reduce(cp.dot, (orboa.T, veff0doo[0]+veff[0], orboa)) *.5 - im0b[:noccb,:noccb] = reduce(cp.dot, (orbob.T, veff0doo[1]+veff[1], orbob)) *.5 - im0a[:nocca,:nocca] += cp.einsum('aj,ai->ij', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 - im0a[:nocca,:nocca] += cp.einsum('aj,ai->ij', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + im0a[:nocca,:nocca] += cp.einsum('aj,ai->ij', veff0mop[noccb:,:nocca], x) *0.5 + im0a[:nocca,:nocca] += cp.einsum('aj,ai->ij', veff0mom[noccb:,:nocca], x) *0.5 - im0b[noccb:,noccb:] = cp.einsum('bi,ai->ab', veff0mop_ba[noccb:,:nocca], xpy_ba) *0.5 - im0b[noccb:,noccb:] += cp.einsum('bi,ai->ab', veff0mom_ba[noccb:,:nocca], xmy_ba) *0.5 + im0b[noccb:,noccb:] = cp.einsum('bi,ai->ab', veff0mop[noccb:,:nocca], x) *0.5 + im0b[noccb:,noccb:] += cp.einsum('bi,ai->ab', veff0mom[noccb:,:nocca], x) *0.5 - im0b[noccb:,:noccb] = cp.einsum('il,al->ai', veff0mop_ba[:noccb,:nocca], xpy_ba) - im0b[noccb:,:noccb] += cp.einsum('il,al->ai', veff0mom_ba[:noccb,:nocca], xmy_ba) + im0b[noccb:,:noccb] = cp.einsum('il,al->ai', veff0mop[:noccb,:nocca], x) + im0b[noccb:,:noccb] += cp.einsum('il,al->ai', veff0mom[:noccb,:nocca], x) zeta_a = (mo_energy[0][:,None] + mo_energy[0]) * .5 zeta_b = (mo_energy[1][:,None] + mo_energy[1]) * .5 @@ -376,18 +309,18 @@ def fvind(x): j_factor, k_factor) dvhf_all -= dvhf if td_grad.base.extype == 0: - dvhf = td_grad.get_veff(mol, cp.stack(((dmxpy_ab + dmxpy_ab.T), (dmxpy_ab + dmxpy_ab.T))) * 0.5, + dvhf = td_grad.get_veff(mol, cp.stack(((dmx + dmx.T), (dmx + dmx.T))) * 0.5, 0.0, k_factor) dvhf_all += dvhf * 1 - dvhf = td_grad.get_veff(mol, cp.stack(((dmxmy_ab - dmxmy_ab.T), (dmxmy_ab - dmxmy_ab.T))) * 0.5, + dvhf = td_grad.get_veff(mol, cp.stack(((dmx - dmx.T), (dmx - dmx.T))) * 0.5, j_factor=0.0, k_factor=k_factor, hermi=2) dvhf_all += dvhf * 1 elif td_grad.base.extype == 1: - dvhf = td_grad.get_veff(mol, cp.stack(((dmxpy_ba + dmxpy_ba.T), (dmxpy_ba.T + dmxpy_ba))) * 0.5, + dvhf = td_grad.get_veff(mol, cp.stack(((dmx + dmx.T), (dmx.T + dmx))) * 0.5, 0.0, k_factor) dvhf_all += dvhf * 1 - dvhf = td_grad.get_veff(mol, cp.stack(((dmxmy_ba - dmxmy_ba.T), (-dmxmy_ba.T + dmxmy_ba))) * 0.5, + dvhf = td_grad.get_veff(mol, cp.stack(((dmx - dmx.T), (-dmx.T + dmx))) * 0.5, j_factor=0.0, k_factor=k_factor, hermi=2) dvhf_all += dvhf * 1 @@ -395,10 +328,10 @@ def fvind(x): veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,0] += vxc1[:,1:] veff1[:,1] += (f1oo[:,1:] + fxcz1[:,1:])*2 - veff1[0,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] + k1ao_xpy[1,0,1:] + k1ao_xpy[1,1,1:] - +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] + k1ao_xmy[1,0,1:] + k1ao_xmy[1,1,1:])*2 - veff1[1,1] += (k1ao_xpy[0,0,1:] + k1ao_xpy[0,1,1:] - k1ao_xpy[1,0,1:] - k1ao_xpy[1,1,1:] - +k1ao_xmy[0,0,1:] + k1ao_xmy[0,1,1:] - k1ao_xmy[1,0,1:] - k1ao_xmy[1,1,1:])*2 + veff1[0,1] += (k1ao[0,0,1:] + k1ao[0,1,1:] + k1ao[1,0,1:] + k1ao[1,1,1:] + +k1ao[0,0,1:] + k1ao[0,1,1:] + k1ao[1,0,1:] + k1ao[1,1,1:])*2 + veff1[1,1] += (k1ao[0,0,1:] + k1ao[0,1,1:] - k1ao[1,0,1:] - k1ao[1,1,1:] + +k1ao[0,0,1:] + k1ao[0,1,1:] - k1ao[1,0,1:] - k1ao[1,1,1:])*2 veff1[:,2] += f1vo[0,:,1:] veff1[:,3] += f1vo[1,:,1:] @@ -435,33 +368,35 @@ def fvind(x): if td_grad.base.extype == 0: deveff2 = cp.asarray( - [contract('xpq,pq->x', veff1b[2,:,p0:p1], dmxpy_ab[p0:p1,:]) + [contract('xpq,pq->x', veff1b[2,:,p0:p1], dmx[p0:p1,:]) for p0, p1 in aoslices[:, 2:]]) deveff2 += cp.asarray( - [contract('xqp,pq->x', veff1b[2,:,p0:p1], dmxpy_ab[:,p0:p1]) + [contract('xqp,pq->x', veff1b[2,:,p0:p1], dmx[:,p0:p1]) for p0, p1 in aoslices[:, 2:]]) deveff3 = cp.asarray( - [contract('xpq,pq->x', veff1b[3,:,p0:p1], dmxmy_ab[p0:p1,:]) + [contract('xpq,pq->x', veff1b[3,:,p0:p1], dmx[p0:p1,:]) for p0, p1 in aoslices[:, 2:]]) deveff3 += cp.asarray( - [contract('xqp,pq->x', veff1b[3,:,p0:p1], dmxmy_ab[:,p0:p1]) + [contract('xqp,pq->x', veff1b[3,:,p0:p1], dmx[:,p0:p1]) for p0, p1 in aoslices[:, 2:]]) elif td_grad.base.extype == 1: deveff2 = cp.asarray( - [contract('xpq,pq->x', veff1a[2,:,p0:p1], dmxpy_ba[p0:p1,:]) + [contract('xpq,pq->x', veff1a[2,:,p0:p1], dmx[p0:p1,:]) for p0, p1 in aoslices[:, 2:]]) deveff2 += cp.asarray( - [contract('xqp,pq->x', veff1a[2,:,p0:p1], dmxpy_ba[:,p0:p1]) + [contract('xqp,pq->x', veff1a[2,:,p0:p1], dmx[:,p0:p1]) for p0, p1 in aoslices[:, 2:]]) deveff3 = cp.asarray( - [contract('xpq,pq->x', veff1a[3,:,p0:p1], dmxmy_ba[p0:p1,:]) + [contract('xpq,pq->x', veff1a[3,:,p0:p1], dmx[p0:p1,:]) for p0, p1 in aoslices[:, 2:]]) deveff3 += cp.asarray( - [contract('xqp,pq->x', veff1a[3,:,p0:p1], dmxmy_ba[:,p0:p1]) + [contract('xqp,pq->x', veff1a[3,:,p0:p1], dmx[:,p0:p1]) for p0, p1 in aoslices[:, 2:]]) de += 2.0 * dvhf_all + delec + dh1e_ground + dh1e_td + deveff0 + deveff1 + deveff2 + deveff3 log.timer('TDUKS nuclear gradients', *time0) + print("de") + print(de) return de.get() def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, @@ -500,31 +435,21 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, else: v1ao = None if with_kxc: - k1ao_xpy = cp.zeros((2,2,4,nao,nao)) - k1ao_xmy = cp.zeros((2,2,4,nao,nao)) + k1ao = cp.zeros((2,2,4,nao,nao)) deriv = 3 else: - k1ao_xpy = k1ao_xmy = None - - dmvo0 = opt.sort_orbitals(dmvo[0], axis=[0, 1]) - dmvo1 = opt.sort_orbitals(dmvo[1], axis=[0, 1]) + k1ao = None - # # create a mc object to use mcfun. - # nimc = numint2c.NumInt2C() - # nimc.collinear = 'mcol' - # nimc.collinear_samples=td_grad.base.collinear_samples + dmvo0 = opt.sort_orbitals(dmvo, axis=[0, 1]) - # calculate the derivatives. - # fxc_sf,kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, - # td_grad.base.collinear_samples,deriv=3)[2:] - p0,p1=0,0 # the two parameters are used for counts the batch of grids. + p0, p1 = 0, 0 if xctype == "LDA": fmat_, ao_deriv = tdrks._lda_eval_mat_, 1 elif xctype == "GGA": fmat_, ao_deriv = _gga_eval_mat_, 2 elif xctype == "MGGA": - fmat_, ao_deriv = tdrks._mgga_eval_mat_, 2 + raise NotImplementedError("MGGA is not supported") if xctype == 'LDA': for ao, mask, weight, coords \ @@ -535,10 +460,8 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] - dmvo1_mask = dmvo1[mask[:, None], mask] with_lapl = False - # fxc_sf, kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, - # td_grad.base.collinear_samples,deriv=3)[2:] + rhoa_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], None, xctype, with_lapl) rhob_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, @@ -550,53 +473,31 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fxc_sf, kxc_sf = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[2:4] s_s = fxc_sf * weight - + rho1 = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) + f_val = rho1 * s_s * 2 # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y + + fmat_(_sorted_mol, f1vo[0][1], ao, f_val, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][0], ao, f_val, mask, shls_slice, ao_loc) + + k_idx = -1 if extype == 0: - rho1_ab = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) - # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y - fmat_(_sorted_mol, f1vo[0][1], ao, rho1_ab*s_s*2, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[0][0], ao, rho1_ab*s_s*2, mask, shls_slice, ao_loc) - # lda_sum_(f1vo[0][1], ao, rho1_ab*s_s*2, mask) - # lda_sum_(f1vo[0][0], ao, rho1_ab*s_s*2, mask) - - if with_kxc: - s_s_n = kxc_sf[:,:,0] * weight - s_s_s = kxc_sf[:,:,1] * weight - fmat_(_sorted_mol, k1ao_xpy[0][0], ao, s_s_n*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xpy[1][0], ao, s_s_s*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) - - rho1_ab = ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype) - # py attention to the order of f1vo[1][1] and f1vo[1][0] - fmat_(_sorted_mol, f1vo[1][1], ao, rho1_ab*s_s*2, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[1][0], ao, -rho1_ab*s_s*2, mask, shls_slice, ao_loc) - - if with_kxc: - # Note the "-" - fmat_(_sorted_mol, k1ao_xmy[0][0], ao, s_s_n*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xmy[1][0], ao, s_s_s*2*rho1_ab*rho1_ab, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][1], ao, f_val, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][0], ao, -f_val, mask, shls_slice, ao_loc) + k_idx = 0 elif extype == 1: - rho1_ba = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) - # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y - fmat_(_sorted_mol, f1vo[0][1], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[0][0], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) - - if with_kxc: - s_s_n = kxc_sf[:,:,0] * weight - s_s_s = kxc_sf[:,:,1] * weight - fmat_(_sorted_mol, k1ao_xpy[0][1], ao, s_s_n*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xpy[1][1], ao, s_s_s*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) - - rho1_ba = ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype) - # py attention to the order of f1vo[1][1] and f1vo[1][0] - fmat_(_sorted_mol, f1vo[1][1], ao, -rho1_ba*s_s*2, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[1][0], ao, rho1_ba*s_s*2, mask, shls_slice, ao_loc) - - if with_kxc: - # Note the "-" - fmat_(_sorted_mol, k1ao_xmy[0][1], ao, s_s_n*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xmy[1][1], ao, s_s_s*2*rho1_ba*rho1_ba, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][1], ao, -f_val, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1][0], ao, f_val, mask, shls_slice, ao_loc) + k_idx = 1 + + if with_kxc: + s_s_n = kxc_sf[:,:,0] * weight + s_s_s = kxc_sf[:,:,1] * weight + k_val_n = s_s_n * 2 * rho1 * rho1 + k_val_s = s_s_s * 2 * rho1 * rho1 + fmat_(_sorted_mol, k1ao[0][k_idx], ao, k_val_n, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao[1][k_idx], ao, k_val_s, mask, shls_slice, ao_loc) rho = (ni.eval_rho2(_sorted_mol, ao[0],mo_coeff_mask_a, mo_occ[0], mask, xctype), ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype)) @@ -623,11 +524,8 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] - dmvo1_mask = dmvo1[mask[:, None], mask] with_lapl = False - # fxc_sf, kxc_sf = cache_xc_kernel_sf(ni,mol,mf.grids,mf.xc,mo_coeff,mo_occ, - # td_grad.base.collinear_samples,deriv=3)[2:] rhoa_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], None, xctype, with_lapl) rhob_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, @@ -638,49 +536,26 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, eval_xc_eff = mcfun_eval_xc_adapter_sf(ni, xc_code, td_grad.base.collinear_samples) fxc_sf, kxc_sf = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[2:4] - if extype == 0: - rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=0, with_lapl=False) - - wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf,weight, extype) - fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) + rho1 = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=0, with_lapl=False) + wv_sf = uks_sf_gga_wv1(rho1,fxc_sf,weight) - if with_kxc: - gv_sf = uks_sf_gga_wv2_p(rho1_ab,kxc_sf,weight, extype) - fmat_(_sorted_mol, k1ao_xpy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xpy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) - rho1_ab = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=0, with_lapl=False) - - wv_sf = uks_sf_gga_wv1(rho1_ab,fxc_sf,weight, extype) + k_idx = -1 + if extype == 0: fmat_(_sorted_mol, f1vo[1][1], ao, wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[1][0], ao, -wv_sf, mask, shls_slice, ao_loc) - - if with_kxc: - gv_sf = uks_sf_gga_wv2_m(rho1_ab,kxc_sf,weight, extype) - fmat_(_sorted_mol, k1ao_xmy[0][0], ao, gv_sf[0], mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xmy[1][0], ao, gv_sf[1], mask, shls_slice, ao_loc) + k_idx = 0 elif extype == 1: - rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo0_mask, mask, xctype, hermi=0, with_lapl=False) - wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf,weight, extype) - fmat_(_sorted_mol, f1vo[0][1], ao, wv_sf, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[0][0], ao, wv_sf, mask, shls_slice, ao_loc) - - if with_kxc: - gv_sf = uks_sf_gga_wv2_p(rho1_ba,kxc_sf,weight, extype) - fmat_(_sorted_mol, k1ao_xpy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xpy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) - - rho1_ba = ni.eval_rho(_sorted_mol, ao, dmvo1_mask, mask, xctype, hermi=0, with_lapl=False) - - wv_sf = uks_sf_gga_wv1(rho1_ba,fxc_sf,weight, extype) fmat_(_sorted_mol, f1vo[1][1], ao, -wv_sf, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[1][0], ao, wv_sf, mask, shls_slice, ao_loc) + k_idx = 1 - if with_kxc: - gv_sf = uks_sf_gga_wv2_m(rho1_ba,kxc_sf,weight, extype) - fmat_(_sorted_mol, k1ao_xmy[0][1], ao, gv_sf[0], mask, shls_slice, ao_loc) - fmat_(_sorted_mol, k1ao_xmy[1][1], ao, gv_sf[1], mask, shls_slice, ao_loc) + if with_kxc: + gv_sf = uks_sf_gga_wv2_p(rho1, kxc_sf, weight) + fmat_(_sorted_mol, k1ao[0][k_idx], ao, gv_sf[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, k1ao[1][k_idx], ao, gv_sf[1], mask, shls_slice, ao_loc) rho = cp.stack([ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, mo_occ[0], mask, xctype), ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, mo_occ[1], mask, xctype)]) @@ -690,14 +565,12 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, dmoo1_mask = dmoo1[mask[:, None], mask] rho2 = cp.stack([ni.eval_rho(_sorted_mol, ao, dmoo0_mask, mask, xctype, hermi=1), ni.eval_rho(_sorted_mol, ao, dmoo1_mask, mask, xctype, hermi=1)]) - # wv = numint._uks_gga_wv1(rho, rho2, vxc, fxc, weight) tmp = contract("axg,axbyg->byg", rho2, fxc) wv = contract("byg,g->byg", tmp, weight) wv[:,0] *= 0.5 fmat_(_sorted_mol, f1oo[0], ao, wv[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1oo[1], ao, wv[1], mask, shls_slice, ao_loc) if with_vxc: - # wv = numint._uks_gga_wv0(rho, vxc, weight) wv = vxc * weight wv[:,0] *= 0.5 fmat_(_sorted_mol, v1ao[0], ao, wv[0], mask, shls_slice, ao_loc) @@ -718,12 +591,10 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, v1ao[:,1:] *= -1 v1ao = opt.unsort_orbitals(v1ao, axis=[2, 3]) if with_kxc: - k1ao_xpy[:,:,1:] *= -1 - k1ao_xmy[:,:,1:] *= -1 - k1ao_xpy = opt.unsort_orbitals(k1ao_xpy, axis=[3, 4]) - k1ao_xmy = opt.unsort_orbitals(k1ao_xmy, axis=[3, 4]) + k1ao[:,:,1:] *= -1 + k1ao = opt.unsort_orbitals(k1ao, axis=[3, 4]) - return f1vo, f1oo, v1ao, (k1ao_xpy,k1ao_xmy) + return f1vo, f1oo, v1ao, k1ao def _gga_eval_mat_(mol, vmat, ao, wv, mask, shls_slice, ao_loc): # wv[0] *= 0.5 # *.5 because vmat + vmat.T at the end @@ -736,155 +607,28 @@ def _gga_eval_mat_(mol, vmat, ao, wv, mask, shls_slice, ao_loc): add_sparse(vmat[1:], vtmp, mask) return vmat -def uks_sf_gga_wv1(rho1, fxc_sf, weight, extype): +def uks_sf_gga_wv1(rho1, fxc_sf, weight): # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. - if extype == 0: - ngrid = weight.shape[-1] - wv_ab = cp.empty((4,ngrid)) - wv_ab = cp.einsum('yp,xyp->xp', rho1,fxc_sf) - - # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - wv_ab[1:] *=2.0 - return wv_ab*weight - elif extype == 1: - ngrid = weight.shape[-1] - wv_ba = cp.empty((4,ngrid)) - wv_ba = cp.einsum('yp,xyp->xp', rho1,fxc_sf) - - # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - wv_ba[1:] *=2.0 - return wv_ba*weight - # rho1_ab,rho1_ba = rho1 - # ngrid = weight.shape[-1] - # wv_ab, wv_ba = cp.empty((2,4,ngrid)) - # wv_ab = cp.einsum('yp,xyp->xp', rho1_ab,fxc_sf) - # wv_ba = cp.einsum('yp,xyp->xp', rho1_ba,fxc_sf) - # # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 - # # wv_ba[0] = wv_ba[0] *2 *.5 - - # # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - # wv_ab[1:] *=2.0 - # wv_ba[1:] *=2.0 - # return wv_ab*weight, wv_ba*weight - -def uks_sf_gga_wv2_p(rho1, kxc_sf, weight, extype): - # kxc_sf with a shape (4,4,2,4,ngrid), 4 means I,\nabla_x,y,z, - # 0: n, \nabla_x,y,z n; 1: s, \nabla_x,y,z s. - if extype == 0: - ngrid = weight.shape[-1] - gv_ab = cp.empty((2,4,ngrid)) - # Note *2 and *0.5 like in function uks_sf_gga_wv1 - gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf, optimize=True) - - gv_ab[0,1:] *=2.0 - gv_ab[1,1:] *=2.0 - return gv_ab*weight - elif extype == 1: - ngrid = weight.shape[-1] - gv_ba = cp.empty((2,4,ngrid)) - # Note *2 and *0.5 like in function uks_sf_gga_wv1 - gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf, optimize=True) - - gv_ba[0,1:] *=2.0 - gv_ba[1,1:] *=2.0 - return gv_ba*weight - # rho1_ab,rho1_ba = rho1 - # ngrid = weight.shape[-1] - # gv_ab, gv_ba = cp.empty((2,2,4,ngrid)) - # # Note *2 and *0.5 like in function uks_sf_gga_wv1 - # gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) - # gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) - - # gv_ab[0,1:] *=2.0 - # gv_ab[1,1:] *=2.0 - # gv_ba[0,1:] *=2.0 - # gv_ba[1,1:] *=2.0 - # return gv_ab*weight, gv_ba*weight - -def uks_sf_gga_wv2_m(rho1, kxc_sf,weight, extype): - if extype == 0: - ngrid = weight.shape[-1] - gv_ab = cp.empty((2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf , optimize=True) - - gv_ab[:,1:] *=2.0 - return gv_ab*weight - elif extype == 1: - ngrid = weight.shape[-1] - gv_ba = cp.empty((2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf , optimize=True) - - gv_ba[:,1:] *=2.0 - return gv_ba*weight - # rho1_ab,rho1_ba = rho1 - # ngrid = weight.shape[-1] - # gv_ab, gv_ba = cp.empty((2,2,5,ngrid)) - # # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - # gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) - # gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) - - # gv_ab[:,1:] *=2.0 - # gv_ba[:,1:] *=2.0 - # return gv_ab*weight, gv_ba*weight - -def uks_sf_mgga_wv1(rho1, fxc_sf,weight): - rho1_ab,rho1_ba = rho1 - # fxc_sf with a shape (5,5,ngrid), 5 means I, \nabla_x,y,z s, u - # s_s, s_Ns, Ns_s, Ns_Ns, s_u, u_s, u_Ns, Ns_u, u_u ngrid = weight.shape[-1] - wv_ab, wv_ba = cp.empty((2,5,ngrid)) - wv_ab = cp.einsum('yp,xyp->xp', rho1_ab,fxc_sf) - wv_ba = cp.einsum('yp,xyp->xp', rho1_ba,fxc_sf) - # wv_ab[0] = wv_ab[0] *2 *.5 # *2 bacause of kernel, *0.5 for the (x + x.T)*0.5 - # wv_ba[0] = wv_ba[0] *2 *.5 + wv = cp.empty((4,ngrid)) + wv = cp.einsum('yp,xyp->xp', rho1, fxc_sf) # Don't forget (sigma_x sigma_x + sigma_y sigma_y) needs *2 for kernel term. - wv_ab[1:4] *=2.0 - wv_ba[1:4] *=2.0 - # *0.5 below is for tau->ao - wv_ab[4] *= 0.5 - wv_ba[4] *= 0.5 - return wv_ab*weight, wv_ba*weight - -def uks_sf_mgga_wv2_p(rho1, kxc_sf,weight): - rho1_ab,rho1_ba = rho1 - # kxc_sf with a shape (5,5,2,5,ngrid), 5 means s \nabla_x,y,z s, u - # s_s -> 0: n, \nabla_x,y,z n, tau ; 1: s, \nabla_x,y,z s, u - # s_Ns -> - # Ns_s -> - # Ns_Ns -> - # s_u -> - # u_s -> - # u_Ns -> - # Ns_u -> - # u_u -> - ngrid = weight.shape[-1] - gv_ab, gv_ba = cp.empty((2,2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab+rho1_ba, kxc_sf, optimize=True) - gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba+rho1_ab, kxc_sf, optimize=True) - - gv_ab[:,1:4] *=2.0 - gv_ba[:,1:4] *=2.0 - gv_ab[:,4] *= 0.5 - gv_ba[:,4] *= 0.5 - return gv_ab*weight, gv_ba*weight - -def uks_sf_mgga_wv2_m(rho1, kxc_sf,weight): - rho1_ab,rho1_ba = rho1 + wv[1:] *=2.0 + return wv*weight + + +def uks_sf_gga_wv2_p(rho1, kxc_sf, weight): + # kxc_sf with a shape (4,4,2,4,ngrid), 4 means I,\nabla_x,y,z, + # 0: n, \nabla_x,y,z n; 1: s, \nabla_x,y,z s. ngrid = weight.shape[-1] - gv_ab, gv_ba = cp.empty((2,2,5,ngrid)) - # Note *2 and *0.5 like in function uks_sf_mgga_wv1 - gv_ab = cp.einsum('xp,yp,xyvzp->vzp', rho1_ab, rho1_ab-rho1_ba, kxc_sf , optimize=True) - gv_ba = cp.einsum('xp,yp,xyvzp->vzp', rho1_ba, rho1_ba-rho1_ab, kxc_sf , optimize=True) - - gv_ab[:,1:4] *=2.0 - gv_ba[:,1:4] *=2.0 - gv_ab[:,4] *= 0.5 - gv_ba[:,4] *= 0.5 - return gv_ab*weight, gv_ba*weight + gv = cp.empty((2,4,ngrid)) + # Note *2 and *0.5 like in function uks_sf_gga_wv1 + gv = cp.einsum('xp,yp,xyvzp->vzp', rho1, rho1, kxc_sf, optimize=True) + + gv[0,1:] *=2.0 + gv[1,1:] *=2.0 + return gv*weight def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): From be30fa1407a138e934545e84ba4d15d077a200e3 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 28 Oct 2025 12:55:24 +0800 Subject: [PATCH 22/32] finish sf-tddft-gradient for LDA and GGA --- gpu4pyscf/grad/tduks_sf.py | 90 ++++++++++++++------ gpu4pyscf/grad/tests/test_tduks_sf_grad.py | 96 +++++++++++++++++++--- 2 files changed, 147 insertions(+), 39 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 49cdf2fd5..278d39c88 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -28,9 +28,10 @@ from gpu4pyscf.grad import tdrks +# TODO: meta-GGA should be supported. def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): - ''' Spin flip TDDFT gradient in UKS framework. Note: This function supports - both TDA or TDDFT results. + ''' Spin flip TDA gradient in UKS framework. Note: This function supports + both TDA or TDA results. Parameters ---------- @@ -123,7 +124,11 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): vk *= hyb if abs(omega) > 1e-10: - vk += mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + vk_omega = mf.get_k(mol, dm, hermi=0, omega=omega) * (alpha-hyb) + if not isinstance(vk_omega, cp.ndarray): + vk_omega = cp.asarray(vk_omega) + vk += vk_omega + vk_omega = None vj = vj.reshape(2,3,nao,nao) vk = vk.reshape(2,3,nao,nao) @@ -166,6 +171,8 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): dm = (dmzoo_a, dmx, dmx, dmzoo_b, dmx.T, -dmx.T) vj = mf.get_j(mol, dm, hermi=0).reshape(2,3,nao,nao) + if not isinstance(vj, cp.ndarray): + vj = cp.asarray(vj) veff0doo = vj[0,0]+vj[1,0] + f1oo[:,0] veff0doo[0] += (k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0] @@ -324,6 +331,31 @@ def fvind(x): j_factor=0.0, k_factor=k_factor, hermi=2) dvhf_all += dvhf * 1 + if with_k and omega != 0: + j_factor = 0.0 + k_factor = alpha-hyb # =beta + dvhf = td_grad.get_veff(mol, cp.stack(((dmz1dooa + dmz1dooa.T) * 0.25 + oo0a, + (dmz1doob + dmz1doob.T) * 0.25 + oo0b,)), j_factor, k_factor, omega=omega) + dvhf_all += dvhf + dvhf = td_grad.get_veff(mol, cp.stack(((dmz1dooa + dmz1dooa.T), (dmz1doob + dmz1doob.T))) * 0.25, + j_factor, k_factor, omega=omega) + dvhf_all -= dvhf + if td_grad.base.extype == 0: + dvhf = td_grad.get_veff(mol, cp.stack(((dmx + dmx.T), (dmx + dmx.T))) * 0.5, + 0.0, k_factor, omega=omega) + dvhf_all += dvhf * 1 + dvhf = td_grad.get_veff(mol, cp.stack(((dmx - dmx.T), (dmx - dmx.T))) * 0.5, + j_factor=0.0, k_factor=k_factor, hermi=2, omega=omega) + dvhf_all += dvhf * 1 + + elif td_grad.base.extype == 1: + dvhf = td_grad.get_veff(mol, cp.stack(((dmx + dmx.T), (dmx.T + dmx))) * 0.5, + 0.0, k_factor, omega=omega) + dvhf_all += dvhf * 1 + dvhf = td_grad.get_veff(mol, cp.stack(((dmx - dmx.T), (-dmx.T + dmx))) * 0.5, + j_factor=0.0, k_factor=k_factor, hermi=2, omega=omega) + dvhf_all += dvhf * 1 + fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,0] += vxc1[:,1:] @@ -395,8 +427,6 @@ def fvind(x): de += 2.0 * dvhf_all + delec + dh1e_ground + dh1e_td + deveff0 + deveff1 + deveff2 + deveff3 log.timer('TDUKS nuclear gradients', *time0) - print("de") - print(de) return de.get() def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, @@ -450,7 +480,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, fmat_, ao_deriv = _gga_eval_mat_, 2 elif xctype == "MGGA": raise NotImplementedError("MGGA is not supported") - + # TODO: LDA, GGA and meta-GGA can be gathered together if xctype == 'LDA': for ao, mask, weight, coords \ in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): @@ -462,9 +492,9 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, dmvo0_mask = dmvo0[mask[:, None], mask] with_lapl = False - rhoa_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_a, + rhoa_slice = ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_a, mo_occ[0], None, xctype, with_lapl) - rhob_slice = ni.eval_rho2(_sorted_mol, ao, mo_coeff_mask_b, + rhob_slice = ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], None, xctype, with_lapl) rho_ab = (rhoa_slice, rhob_slice) rho_z = cp.array([rho_ab[0]+rho_ab[1], @@ -475,7 +505,7 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, rho1 = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype) f_val = rho1 * s_s * 2 # s_s*2 because of \sigma_x \sigma_x + \sigma_y \sigma_y - + f_val = f_val[0] fmat_(_sorted_mol, f1vo[0][1], ao, f_val, mask, shls_slice, ao_loc) fmat_(_sorted_mol, f1vo[0][0], ao, f_val, mask, shls_slice, ao_loc) @@ -496,28 +526,33 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, s_s_s = kxc_sf[:,:,1] * weight k_val_n = s_s_n * 2 * rho1 * rho1 k_val_s = s_s_s * 2 * rho1 * rho1 + k_val_n = k_val_n[0,0] + k_val_s = k_val_s[0,0] fmat_(_sorted_mol, k1ao[0][k_idx], ao, k_val_n, mask, shls_slice, ao_loc) fmat_(_sorted_mol, k1ao[1][k_idx], ao, k_val_s, mask, shls_slice, ao_loc) - rho = (ni.eval_rho2(_sorted_mol, ao[0],mo_coeff_mask_a, mo_occ[0], mask, xctype), - ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype)) + rho = cp.array((ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype))) vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:] - u_u, u_d, d_d = fxc[0].T * weight if dmoo is not None: dmoo0_mask = dmoo0[mask[:, None], mask] dmoo1_mask = dmoo1[mask[:, None], mask] - rho2a = ni.eval_rho(_sorted_mol, ao[0], dmoo0_mask, mask, xctype, hermi=1) - rho2b = ni.eval_rho(_sorted_mol, ao[0], dmoo1_mask, mask, xctype, hermi=1) - fmat_(_sorted_mol, f1oo[0], ao, u_u*rho2a+u_d*rho2b, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1oo[1], ao, u_d*rho2a+d_d*rho2b, mask, shls_slice, ao_loc) + rho2 = cp.array((ni.eval_rho(_sorted_mol, ao[0], dmoo0_mask, mask, xctype, hermi=1), + ni.eval_rho(_sorted_mol, ao[0], dmoo1_mask, mask, xctype, hermi=1))) + rho2 = rho2[:, cp.newaxis].copy() + tmp = contract("axg,axbyg->byg", rho2, fxc) + wv = contract("byg,g->byg", tmp, weight) + tmp = None + fmat_(_sorted_mol, f1oo[0], ao, wv[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1oo[1], ao, wv[1], mask, shls_slice, ao_loc) if with_vxc: - vrho = vxc[0].T * weight + vrho = vxc * weight fmat_(_sorted_mol, v1ao[0], ao, vrho[0], mask, shls_slice, ao_loc) fmat_(_sorted_mol, v1ao[1], ao, vrho[1], mask, shls_slice, ao_loc) elif xctype == 'GGA': for ao, mask, weight, coords \ - in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): p0 = p1 p1+= weight.shape[0] @@ -607,6 +642,7 @@ def _gga_eval_mat_(mol, vmat, ao, wv, mask, shls_slice, ao_loc): add_sparse(vmat[1:], vtmp, mask) return vmat + def uks_sf_gga_wv1(rho1, fxc_sf, weight): # fxc_sf with a shape (4,4,ngrid), 4 means I, \nabla_x,y,z. ngrid = weight.shape[-1] @@ -676,15 +712,17 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] dmvo1_mask = dmvo1[mask[:, None], mask] - rho = (ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_a, mo_occ[0], mask, xctype), - ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype)) + rho = cp.array((ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_a, mo_occ[0], mask, xctype), + ni.eval_rho2(_sorted_mol, ao[0], mo_coeff_mask_b, mo_occ[1], mask, xctype))) vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, spin=1)[1:3] - u_u, u_d, d_d = fxc[0].T * weight - rho1a = ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype, hermi=1) - rho1b = ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype, hermi=1) - - fmat_(_sorted_mol, f1vo[0], ao, u_u*rho1a+u_d*rho1b, mask, shls_slice, ao_loc) - fmat_(_sorted_mol, f1vo[1], ao, u_d*rho1a+d_d*rho1b, mask, shls_slice, ao_loc) + rho2 = cp.array((ni.eval_rho(_sorted_mol, ao[0], dmvo0_mask, mask, xctype, hermi=1), + ni.eval_rho(_sorted_mol, ao[0], dmvo1_mask, mask, xctype, hermi=1))) + rho2 = rho2[:, cp.newaxis].copy() + tmp = contract("axg,axbyg->byg", rho2, fxc) + wv = contract("byg,g->byg", tmp, weight) + tmp = None + fmat_(_sorted_mol, f1vo[0], ao, wv[0], mask, shls_slice, ao_loc) + fmat_(_sorted_mol, f1vo[1], ao, wv[1], mask, shls_slice, ao_loc) elif xctype == 'GGA': for ao, mask, weight, coords \ diff --git a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py index 6225fefbc..2475d121e 100644 --- a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py +++ b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py @@ -32,16 +32,86 @@ bas0 = "cc-pvdz" -def diagonalize_tda(a, nroots=5): - nocc, nvir = a.shape[:2] - nov = nocc * nvir - a = a.reshape(nov, nov) - e, xy = np.linalg.eig(np.asarray(a)) - sorted_indices = np.argsort(e) - - e_sorted = e[sorted_indices] - xy_sorted = xy[:, sorted_indices] - - e_sorted_final = e_sorted[e_sorted > 1e-3] - xy_sorted = xy_sorted[:, e_sorted > 1e-3] - return e_sorted_final[:nroots], xy_sorted[:, :nroots] \ No newline at end of file +def setUpModule(): + global mol + mol = pyscf.M( + atom=atom, + basis=bas0, + max_memory=32000, + charge=1, + spin=1, + output="/dev/null", + verbose=1, + ) + + +def tearDownModule(): + global mol + mol.stdout.close() + del mol + + +def benchmark_with_cpu(mol, xc, nstates=3, lindep=1.0e-12, tda=False, extype=0): + mf = dft.UKS(mol, xc=xc).to_gpu().run() + tdsf = mf.SFTDA() + tdsf.extype = extype + tdsf.collinear = 'mcol' + tdsf.nstates=5 + tdsf.collinear_samples=10 + output = tdsf.kernel() + + g = tdsf.Gradients() + g.kernel() + + return g.de + + +def _check_grad(mol, xc, tol=1e-5, lindep=1.0e-12, disp=None, tda=True, method="cpu", extype=0): + if not tda: + raise NotImplementedError("spin-flip TDDFT gradients is not implemented") + if method == "cpu": + grad_gpu = benchmark_with_cpu(mol, xc, nstates=5, lindep=lindep, tda=tda, extype=extype) + else: + raise NotImplementedError("Only compared with CPU") + + return grad_gpu + + +class KnownValues(unittest.TestCase): + @unittest.skipIf(num_devices > 1, '') + def test_grad_b3lyp_tda_spinflip_up_cpu(self): + grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu") + # ref from pyscf-forge + ref = np.array([[ 8.79547051e-16, 8.63728537e-14, 1.87755267e-01], + [-4.31890391e-16, 2.15026042e-01, -9.38746716e-02], + [-4.50003252e-16, -2.15026042e-01, -9.38746716e-02]]) + assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_b3lyp_tda_spinflip_down_cpu(self): + grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu", extype=1) + # ref from pyscf-forge + ref = np.array([[-3.01640558e-16, 1.52982216e-13, 5.10689029e-02], + [ 1.36165869e-16, 4.52872857e-02, -2.55387304e-02], + [-3.08111636e-17, -4.52872857e-02, -2.55387304e-02],]) + assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_svwn_tda_spinflip_down_cpu(self): + grad_gpu = _check_grad(mol, xc="svwn", tol=5e-10, method="cpu", extype=1) + # ref from pyscf-forge + ref = np.array([[-8.15030724e-16, -6.13885762e-14, 6.41681368e-02], + [ 1.12931062e-16, 5.34632826e-02, -3.20887796e-02], + [ 7.97399496e-17, -5.34632826e-02, -3.20887796e-02],]) + assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_camb3lyp_tda_spinflip_down_cpu(self): + grad_gpu = _check_grad(mol, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) + # ref from pyscf-forge + ref = np.array([[-7.43754261e-18, -1.56347842e-13, 4.99263503e-02], + [-1.84572351e-17, 4.52908126e-02, -2.49673842e-02], + [ 2.40683934e-17, -4.52908126e-02, -2.49673842e-02],]) + assert abs(grad_gpu - ref).max() < 1e-5 + + +if __name__ == "__main__": + print("Full Tests for spin-flip TD-UKS Gradient") + unittest.main() From 655bbc9d2f552c4d96e6bb4c4a97f472f74cd875 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 28 Oct 2025 13:52:32 +0800 Subject: [PATCH 23/32] fix some typos --- gpu4pyscf/grad/tduks_sf.py | 28 +++++++++------------- gpu4pyscf/grad/tests/test_tduks_sf_grad.py | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 278d39c88..c486eaa2a 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -29,7 +29,7 @@ # TODO: meta-GGA should be supported. -def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): +def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): ''' Spin flip TDA gradient in UKS framework. Note: This function supports both TDA or TDA results. @@ -101,7 +101,7 @@ def grad_elec(td_grad, x_y, atmlst=None, max_memory=2000, verbose=logger.INFO): f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, dmx, - (dmzoo_a,dmzoo_b), True, True, max_memory, td_grad.base.extype) + (dmzoo_a,dmzoo_b), True, True, td_grad.base.extype) # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf # f1oo, (2,4,nao,nao), 2T with fxc_sc # vxc1, ao with v1^{\sigma} @@ -356,7 +356,7 @@ def fvind(x): j_factor=0.0, k_factor=k_factor, hermi=2, omega=omega) dvhf_all += dvhf * 1 - fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao, max_memory) + fxcz1 = _contract_xc_kernel_z(td_grad, mf.xc, z1ao) veff1 = cp.zeros((2,4,3,nao,nao)) veff1[:,0] += vxc1[:,1:] veff1[:,1] += (f1oo[:,1:] + fxcz1[:,1:])*2 @@ -372,15 +372,15 @@ def fvind(x): if atmlst is None: atmlst = range(mol.natm) - offsetdic = mol.offset_nr_by_atom() + de = cp.zeros((len(atmlst),3)) delec = 2.0 * (dh_ground + dh_td - ds) aoslices = mol.aoslice_by_atom() delec = cp.asarray([cp.sum(delec[:, p0:p1], axis=1) for p0, p1 in aoslices[:, 2:]]) deveff0 = cp.asarray( - [contract("xpq,pq->x", veff1a[0,:,p0:p1], oo0a[p0:p1] + dmz1dooa[p0:p1] * 0.5) - for p0, p1 in aoslices[:, 2:]]) + [contract("xpq,pq->x", veff1a[0,:,p0:p1], oo0a[p0:p1] + dmz1dooa[p0:p1] * 0.5) + for p0, p1 in aoslices[:, 2:]]) deveff0 += cp.asarray( [contract("xpq,pq->x", veff1b[0,:,p0:p1], oo0b[p0:p1] + dmz1doob[p0:p1] * 0.5) for p0, p1 in aoslices[:, 2:]]) @@ -430,7 +430,7 @@ def fvind(x): return de.get() def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, - with_kxc=True, max_memory=2000, extype=0): + with_kxc=True, extype=0): mol = td_grad.mol mf = td_grad.base._scf grids = mf.grids @@ -472,8 +472,6 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, dmvo0 = opt.sort_orbitals(dmvo, axis=[0, 1]) - p0, p1 = 0, 0 - if xctype == "LDA": fmat_, ao_deriv = tdrks._lda_eval_mat_, 1 elif xctype == "GGA": @@ -484,8 +482,6 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, if xctype == 'LDA': for ao, mask, weight, coords \ in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): - p0 = p1 - p1+= weight.shape[0] mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] @@ -553,8 +549,6 @@ def _contract_xc_kernel(td_grad, xc_code, dmvo, dmoo=None, with_vxc=True, elif xctype == 'GGA': for ao, mask, weight, coords \ in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): - p0 = p1 - p1+= weight.shape[0] mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] @@ -667,7 +661,7 @@ def uks_sf_gga_wv2_p(rho1, kxc_sf, weight): return gv*weight -def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): +def _contract_xc_kernel_z(td_grad, xc_code, dmvo): mol = td_grad.base._scf.mol mf = td_grad.base._scf grids = mf.grids @@ -707,7 +701,7 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): if xctype == 'LDA': for ao, mask, weight, coords \ - in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] @@ -726,7 +720,7 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): elif xctype == 'GGA': for ao, mask, weight, coords \ - in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv): mo_coeff_mask_a = mo_coeff[0, mask] mo_coeff_mask_b = mo_coeff[1, mask] dmvo0_mask = dmvo0[mask[:, None], mask] @@ -760,7 +754,7 @@ def _contract_xc_kernel_z(td_grad, xc_code, dmvo, max_memory=2000): class Gradients(tdrhf_grad.Gradients): @lib.with_doc(grad_elec.__doc__) def grad_elec(self, xy, singlet=None, atmlst=None, verbose=None): - return grad_elec(self, xy, atmlst, self.max_memory, self.verbose) + return grad_elec(self, xy, atmlst, self.verbose) Grad = Gradients diff --git a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py index 2475d121e..9494c866d 100644 --- a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py +++ b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py @@ -58,7 +58,7 @@ def benchmark_with_cpu(mol, xc, nstates=3, lindep=1.0e-12, tda=False, extype=0): tdsf.collinear = 'mcol' tdsf.nstates=5 tdsf.collinear_samples=10 - output = tdsf.kernel() + tdsf.kernel() g = tdsf.Gradients() g.kernel() From 4d83459a0aefba8a614f0c0ea4f37dd26bb17f1b Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 30 Oct 2025 14:42:33 +0800 Subject: [PATCH 24/32] fix some typos --- gpu4pyscf/tdscf/_uhf_resp_sf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu4pyscf/tdscf/_uhf_resp_sf.py b/gpu4pyscf/tdscf/_uhf_resp_sf.py index 5636d60f2..c22c858fe 100644 --- a/gpu4pyscf/tdscf/_uhf_resp_sf.py +++ b/gpu4pyscf/tdscf/_uhf_resp_sf.py @@ -28,7 +28,7 @@ from concurrent.futures import ThreadPoolExecutor -MAX_GRIDS_PER_TASK = 65536 +MAX_GRIDS_PER_TASK = 8192 # Approximately (2,4,2,4,200,8192) ~ 800MB def _prange(start, end, step): '''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...''' From 5b9eba95235e07a60a2be79a3529222c85da36c3 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 30 Oct 2025 14:47:23 +0800 Subject: [PATCH 25/32] remove some typos --- gpu4pyscf/tdscf/uhf.py | 230 ----------------------------------------- 1 file changed, 230 deletions(-) diff --git a/gpu4pyscf/tdscf/uhf.py b/gpu4pyscf/tdscf/uhf.py index 25e7126cf..e937d7479 100644 --- a/gpu4pyscf/tdscf/uhf.py +++ b/gpu4pyscf/tdscf/uhf.py @@ -19,13 +19,11 @@ from pyscf.tdscf import uhf as tdhf_cpu from pyscf import ao2mo from pyscf.data import nist -from pyscf.data import nist from gpu4pyscf.tdscf._lr_eig import eigh as lr_eigh, eig as lr_eig, real_eig from gpu4pyscf import scf from gpu4pyscf.lib import logger from gpu4pyscf.lib.cupy_helper import contract, tag_array from gpu4pyscf.tdscf._uhf_resp_sf import gen_uhf_response_sf, cache_xc_kernel_sf -from gpu4pyscf.tdscf._uhf_resp_sf import gen_uhf_response_sf, cache_xc_kernel_sf from gpu4pyscf.gto.int3c1e import int1e_grids from gpu4pyscf.tdscf import rhf as tdhf_gpu from gpu4pyscf.dft import KohnShamDFT @@ -622,223 +620,6 @@ def add_hf_(a, b, hyb=1): return a, b - -def get_ab_sf(mf, mo_energy=None, mo_coeff=None, mo_occ=None, collinear='col', collinear_samples=200): - r''' - From pyscf-forge - A and B matrices for TDDFT response function. - - A[i,a,j,b] = \delta_{ab}\delta_{ij}(E_a - E_i) + (ia||bj) - B[i,a,j,b] = (ia||jb) - - Spin symmetry is not considered in the returned A, B lists. - List A has two items: (A_baba, A_abab). - List B has two items: (B_baab, B_abba). - ''' - if mo_energy is None: mo_energy = mf.mo_energy - if mo_coeff is None: mo_coeff = mf.mo_coeff - if mo_occ is None: mo_occ = mf.mo_occ - if not isinstance(mo_coeff, cp.ndarray): - mo_coeff = cp.asarray(mo_coeff) - if not isinstance(mo_energy, cp.ndarray): - mo_energy = cp.asarray(mo_energy) - if not isinstance(mo_occ, cp.ndarray): - mo_occ = cp.asarray(mo_occ) - - mol = mf.mol - nao = mol.nao_nr() - occidx_a = cp.where(mo_occ[0]==1)[0] - viridx_a = cp.where(mo_occ[0]==0)[0] - occidx_b = cp.where(mo_occ[1]==1)[0] - viridx_b = cp.where(mo_occ[1]==0)[0] - orbo_a = mo_coeff[0][:,occidx_a] - orbv_a = mo_coeff[0][:,viridx_a] - orbo_b = mo_coeff[1][:,occidx_b] - orbv_b = mo_coeff[1][:,viridx_b] - nocc_a = orbo_a.shape[1] - nvir_a = orbv_a.shape[1] - nocc_b = orbo_b.shape[1] - nvir_b = orbv_b.shape[1] - - e_ia_b2a = (mo_energy[0][viridx_a,None] - mo_energy[1][occidx_b]).T - e_ia_a2b = (mo_energy[1][viridx_b,None] - mo_energy[0][occidx_a]).T - - a_b2a = cp.diag(e_ia_b2a.ravel()).reshape(nocc_b,nvir_a,nocc_b,nvir_a) - a_a2b = cp.diag(e_ia_a2b.ravel()).reshape(nocc_a,nvir_b,nocc_a,nvir_b) - b_b2a = cp.zeros((nocc_b,nvir_a,nocc_a,nvir_b)) - b_a2b = cp.zeros((nocc_a,nvir_b,nocc_b,nvir_a)) - a = (a_b2a, a_a2b) - b = (b_b2a, b_a2b) - - def add_hf_(a, b, hyb=1): - # In spin flip TDA/ TDDFT, hartree potential is zero. - # A : iabj ---> ijba; B : iajb ---> ibja - eri_a_b2a = ao2mo.general(mol, [orbo_b.get() ,orbo_b.get() ,orbv_a.get() ,orbv_a.get()], compact=False) - eri_a_a2b = ao2mo.general(mol, [orbo_a.get() ,orbo_a.get() ,orbv_b.get() ,orbv_b.get()], compact=False) - eri_b_b2a = ao2mo.general(mol, [orbo_b.get() ,orbv_b.get() ,orbo_a.get() ,orbv_a.get()], compact=False) - eri_b_a2b = ao2mo.general(mol, [orbo_a.get() ,orbv_a.get() ,orbo_b.get() ,orbv_b.get()], compact=False) - - eri_a_b2a = eri_a_b2a.reshape(nocc_b,nocc_b,nvir_a,nvir_a) - eri_a_a2b = eri_a_a2b.reshape(nocc_a,nocc_a,nvir_b,nvir_b) - eri_b_b2a = eri_b_b2a.reshape(nocc_b,nvir_b,nocc_a,nvir_a) - eri_b_a2b = eri_b_a2b.reshape(nocc_a,nvir_a,nocc_b,nvir_b) - - a_b2a, a_a2b = a - b_b2a, b_a2b = b - - a_b2a-= cp.einsum('ijba->iajb', eri_a_b2a) * hyb - a_a2b-= cp.einsum('ijba->iajb', eri_a_a2b) * hyb - b_b2a-= cp.einsum('ibja->iajb', eri_b_b2a) * hyb - b_a2b-= cp.einsum('ibja->iajb', eri_b_a2b) * hyb - - if isinstance(mf, scf.hf.KohnShamDFT): - from pyscf.dft import xc_deriv - from pyscf.dft import numint2c - ni0 = mf._numint - ni = numint2c.NumInt2C() - ni.collinear = 'mcol' - ni.collinear_samples = collinear_samples - ni.libxc.test_deriv_order(mf.xc, 2, raise_error=True) - if mf.nlc or ni.libxc.is_nlc(mf.xc): - logger.warn(mf, 'NLC functional found in DFT object. Its second ' - 'deriviative is not available. Its contribution is ' - 'not included in the response function.') - omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, mol.spin) - - if collinear == 'mcol': - add_hf_(a, b, hyb) - xctype = ni._xc_type(mf.xc) - mem_now = lib.current_memory()[0] - max_memory = max(2000, mf.max_memory*.8-mem_now) - # it should be optimized, which is the disadvantage of mc approach. - fxc = cache_xc_kernel_sf(ni0, mol, mf.grids, mf.xc, mo_coeff, mo_occ, collinear_samples)[2] - p0,p1=0,0 # the two parameters are used for counts the batch of grids. - opt = getattr(ni0, 'gdftopt', None) - if opt is None: - ni0.build(mol, mf.grids.coords) - opt = ni0.gdftopt - _sorted_mol = opt._sorted_mol - orbo_a = opt.sort_orbitals(orbo_a, axis=[0]) - orbv_a = opt.sort_orbitals(orbv_a, axis=[0]) - orbo_b = opt.sort_orbitals(orbo_b, axis=[0]) - orbv_b = opt.sort_orbitals(orbv_b, axis=[0]) - if xctype == 'LDA': - ao_deriv = 0 - for ao, mask, weight, coords \ - in ni0.block_loop(_sorted_mol, mf.grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - wfxc= fxc[0,0][...,p0:p1] * weight - orbo_a_mask = orbo_a[mask] - orbv_a_mask = orbv_a[mask] - orbo_b_mask = orbo_b[mask] - orbv_b_mask = orbv_b[mask] - - rho_o_a = contract('pr,pi->ri', ao, orbo_a_mask) - rho_v_a = contract('pr,pi->ri', ao, orbv_a_mask) - rho_o_b = contract('pr,pi->ri', ao, orbo_b_mask) - rho_v_b = contract('pr,pi->ri', ao, orbv_b_mask) - rho_ov_b2a = contract('ri,ra->ria', rho_o_b, rho_v_a) - rho_ov_a2b = contract('ri,ra->ria', rho_o_a, rho_v_b) - - w_ov = contract('ria,r->ria', rho_ov_b2a, wfxc*2.0) - iajb = contract('ria,rjb->iajb', rho_ov_b2a, w_ov) - a_b2a += iajb - iajb = contract('ria,rjb->iajb', rho_ov_a2b, w_ov) - b_a2b += iajb - - w_ov = contract('ria,r->ria', rho_ov_a2b, wfxc*2.0) - iajb = contract('ria,rjb->iajb', rho_ov_a2b, w_ov) - a_a2b += iajb - iajb = contract('ria,rjb->iajb', rho_ov_b2a, w_ov) - b_b2a += iajb - - elif xctype == 'GGA': - ao_deriv = 1 - for ao, mask, weight, coords \ - in ni0.block_loop(_sorted_mol, mf.grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - wfxc= fxc[...,p0:p1] * weight - orbo_a_mask = orbo_a[mask] - orbv_a_mask = orbv_a[mask] - orbo_b_mask = orbo_b[mask] - orbv_b_mask = orbv_b[mask] - - rho_o_a = contract('xpr,pi->xri', ao, orbo_a_mask) - rho_v_a = contract('xpr,pi->xri', ao, orbv_a_mask) - rho_o_b = contract('xpr,pi->xri', ao, orbo_b_mask) - rho_v_b = contract('xpr,pi->xri', ao, orbv_b_mask) - rho_ov_b2a = contract('xri,ra->xria', rho_o_b, rho_v_a[0]) - rho_ov_a2b = contract('xri,ra->xria', rho_o_a, rho_v_b[0]) - rho_ov_b2a[1:4] += contract('ri,xra->xria', rho_o_b[0], rho_v_a[1:4]) - rho_ov_a2b[1:4] += contract('ri,xra->xria', rho_o_a[0], rho_v_b[1:4]) - - w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) - a_b2a += iajb - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) - b_b2a += iajb - - w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) - a_a2b += iajb - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) - b_a2b += iajb - - elif xctype == 'HF': - pass - - elif xctype == 'NLC': - raise NotImplementedError('NLC') - - elif xctype == 'MGGA': - ao_deriv = 1 - for ao, mask, weight, coords \ - in ni0.block_loop(_sorted_mol, mf.grids, nao, ao_deriv, max_memory): - p0 = p1 - p1+= weight.shape[0] - wfxc = fxc[...,p0:p1] * weight - orbo_a_mask = orbo_a[mask] - orbv_a_mask = orbv_a[mask] - orbo_b_mask = orbo_b[mask] - orbv_b_mask = orbv_b[mask] - - rho_oa = contract('xpr,pi->xri', ao, orbo_a_mask) - rho_ob = contract('xpr,pi->xri', ao, orbo_b_mask) - rho_va = contract('xpr,pi->xri', ao, orbv_a_mask) - rho_vb = contract('xpr,pi->xri', ao, orbv_b_mask) - rho_ov_b2a = contract('xri,ra->xria', rho_ob, rho_va[0]) - rho_ov_a2b = contract('xri,ra->xria', rho_oa, rho_vb[0]) - rho_ov_b2a[1:4] += contract('ri,xra->xria', rho_ob[0], rho_va[1:4]) - rho_ov_a2b[1:4] += contract('ri,xra->xria', rho_oa[0], rho_vb[1:4]) - tau_ov_b2a = contract('xri,xra->ria', rho_ob[1:4], rho_va[1:4]) * .5 - tau_ov_a2b = contract('xri,xra->ria', rho_oa[1:4], rho_vb[1:4]) * .5 - rho_ov_b2a = cp.vstack([rho_ov_b2a, tau_ov_b2a[cp.newaxis]]) - rho_ov_a2b = cp.vstack([rho_ov_a2b, tau_ov_a2b[cp.newaxis]]) - - w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_b2a) - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) - a_b2a += iajb - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) - b_b2a += iajb - - w_ov = contract('xyr,xria->yria', wfxc*2.0, rho_ov_a2b) - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_a2b) - a_a2b += iajb - iajb = contract('xria,xrjb->iajb', w_ov, rho_ov_b2a) - b_a2b += iajb - elif collinear == 'col': - add_hf_(a, b, hyb) - elif collinear == 'ncol': - raise NotImplementedError('Locally collinear approach is not implemented') - else: - add_hf_(a, b) - a = (a[0].get(), a[1].get()) # flip-up flip-down - b = (b[0].get(), b[1].get()) - return a, b - - REAL_EIG_THRESHOLD = tdhf_cpu.REAL_EIG_THRESHOLD def gen_tda_operation(td, mf, fock_ao=None, wfnsym=None): @@ -1182,17 +963,6 @@ def _finalize(self): logger.note(self, 'Spin-flip-down Excited State energies (eV)\n%s', self.e * nist.HARTREE2EV) return self - def _finalize(self): - '''Hook for dumping results and clearing up the object.''' - if not all(self.converged): - logger.note(self, 'TD-SCF states %s not converged.', - [i for i, x in enumerate(self.converged) if not x]) - if self.extype == 0: - logger.note(self, 'Spin-flip-up Excited State energies (eV)\n%s', self.e * nist.HARTREE2EV) - elif self.extype == 1: - logger.note(self, 'Spin-flip-down Excited State energies (eV)\n%s', self.e * nist.HARTREE2EV) - return self - def kernel(self, x0=None, nstates=None): '''Spin-flip TDA diagonalization solver ''' From 9b387ff7a5ae6ac06c5d67908956a4ec206bbc9e Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 30 Oct 2025 16:17:41 +0800 Subject: [PATCH 26/32] fix some typos --- gpu4pyscf/grad/tduks_sf.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index c486eaa2a..05038da27 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -32,16 +32,11 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): ''' Spin flip TDA gradient in UKS framework. Note: This function supports both TDA or TDA results. - - Parameters - ---------- - Args: - td_grad : sftda.TDA_SF object. - - Returns: - The gradient of excited states: Ei^{\\xi} = E0^{\\xi} + wi^{\\xi} + This function is based on https://github.com/pyscf/pyscf-forge/blob/master/pyscf/grad/tduks_sf.py ''' + if getattr(td_grad.base._scf, 'with_df', None) is not None: + raise NotImplementedError('Density fitting TDA-SF gradient is not supported yet.') log = logger.new_logger(td_grad, verbose) time0 = logger.process_clock(), logger.perf_counter() @@ -755,8 +750,3 @@ class Gradients(tdrhf_grad.Gradients): @lib.with_doc(grad_elec.__doc__) def grad_elec(self, xy, singlet=None, atmlst=None, verbose=None): return grad_elec(self, xy, atmlst, self.verbose) - -Grad = Gradients - -from pyscf import sftda -sftda.uks_sf.TDA_SF.Gradients = sftda.uks_sf.TDDFT_SF.Gradients = lib.class_as_method(Gradients) \ No newline at end of file From c32f7d45ba3b9ce913ed6a37402f03e847f9a09a Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 10:30:25 +0800 Subject: [PATCH 27/32] add more output --- gpu4pyscf/grad/tests/test_tduks_sf_grad.py | 26 ++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py index 9494c866d..72dfb83ae 100644 --- a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py +++ b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py @@ -59,53 +59,65 @@ def benchmark_with_cpu(mol, xc, nstates=3, lindep=1.0e-12, tda=False, extype=0): tdsf.nstates=5 tdsf.collinear_samples=10 tdsf.kernel() + print("xc", xc) + print("extype", extype) + print("etot", mf.e_tot) + print("excitation energy", tdsf.e) g = tdsf.Gradients() g.kernel() - return g.de + return mf.e_tot, tdsf.e, g.de def _check_grad(mol, xc, tol=1e-5, lindep=1.0e-12, disp=None, tda=True, method="cpu", extype=0): if not tda: raise NotImplementedError("spin-flip TDDFT gradients is not implemented") if method == "cpu": - grad_gpu = benchmark_with_cpu(mol, xc, nstates=5, lindep=lindep, tda=tda, extype=extype) + etot, e, grad_gpu = benchmark_with_cpu(mol, xc, nstates=5, lindep=lindep, tda=tda, extype=extype) else: raise NotImplementedError("Only compared with CPU") - return grad_gpu + return etot, e, grad_gpu class KnownValues(unittest.TestCase): @unittest.skipIf(num_devices > 1, '') def test_grad_b3lyp_tda_spinflip_up_cpu(self): - grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu") + etot, e, grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu") # ref from pyscf-forge + assert abs(etot - -75.9674347270528) < 1e-8 + assert abs(e - np.array([0.46618494, 0.53438998, 0.60047275, 0.65786033, 0.92091718])).max() < 1e-5 ref = np.array([[ 8.79547051e-16, 8.63728537e-14, 1.87755267e-01], [-4.31890391e-16, 2.15026042e-01, -9.38746716e-02], [-4.50003252e-16, -2.15026042e-01, -9.38746716e-02]]) assert abs(grad_gpu - ref).max() < 1e-5 def test_grad_b3lyp_tda_spinflip_down_cpu(self): - grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu", extype=1) + etot, e, grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu", extype=1) # ref from pyscf-forge + assert abs(etot - -75.96743472705282) < 1e-8 + assert abs(e - np.array([0.0034149, 0.08157731, 0.23027453, 0.50644857, 0.51065628])).max() < 1e-5 ref = np.array([[-3.01640558e-16, 1.52982216e-13, 5.10689029e-02], [ 1.36165869e-16, 4.52872857e-02, -2.55387304e-02], [-3.08111636e-17, -4.52872857e-02, -2.55387304e-02],]) assert abs(grad_gpu - ref).max() < 1e-5 def test_grad_svwn_tda_spinflip_down_cpu(self): - grad_gpu = _check_grad(mol, xc="svwn", tol=5e-10, method="cpu", extype=1) + etot, e, grad_gpu = _check_grad(mol, xc="svwn", tol=5e-10, method="cpu", extype=1) # ref from pyscf-forge + assert abs(etot - -75.39033965461661) < 1e-8 + assert abs(e - np.array([0.00210504, 0.07530215, 0.22255285, 0.50300732, 0.50382963])).max() < 1e-5 ref = np.array([[-8.15030724e-16, -6.13885762e-14, 6.41681368e-02], [ 1.12931062e-16, 5.34632826e-02, -3.20887796e-02], [ 7.97399496e-17, -5.34632826e-02, -3.20887796e-02],]) assert abs(grad_gpu - ref).max() < 1e-5 def test_grad_camb3lyp_tda_spinflip_down_cpu(self): - grad_gpu = _check_grad(mol, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) + etot, e, grad_gpu = _check_grad(mol, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) # ref from pyscf-forge + assert abs(etot - -75.93920847775132) < 1e-8 + assert abs(e - np.array([0.00335301, 0.07772481, 0.2267033, 0.50960632, 0.5133939])).max() < 1e-5 ref = np.array([[-7.43754261e-18, -1.56347842e-13, 4.99263503e-02], [-1.84572351e-17, 4.52908126e-02, -2.49673842e-02], [ 2.40683934e-17, -4.52908126e-02, -2.49673842e-02],]) From a309222d2d5c6690e8e5a537f730b502321173fb Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 14:26:54 +0800 Subject: [PATCH 28/32] add closed-shell tests --- gpu4pyscf/grad/tduks_sf.py | 3 ++ gpu4pyscf/grad/tests/test_tduks_sf_grad.py | 62 ++++++++++++++++++++-- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 05038da27..d278764bc 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -421,6 +421,7 @@ def fvind(x): for p0, p1 in aoslices[:, 2:]]) de += 2.0 * dvhf_all + delec + dh1e_ground + dh1e_td + deveff0 + deveff1 + deveff2 + deveff3 + print("de", de) log.timer('TDUKS nuclear gradients', *time0) return de.get() @@ -750,3 +751,5 @@ class Gradients(tdrhf_grad.Gradients): @lib.with_doc(grad_elec.__doc__) def grad_elec(self, xy, singlet=None, atmlst=None, verbose=None): return grad_elec(self, xy, atmlst, self.verbose) + +Grad = Gradients \ No newline at end of file diff --git a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py index 72dfb83ae..aa09328e3 100644 --- a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py +++ b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py @@ -16,9 +16,8 @@ import numpy as np import unittest import pytest -from pyscf import scf, dft, tdscf +from pyscf import scf, dft import gpu4pyscf -from gpu4pyscf import scf as gpu_scf from packaging import version from gpu4pyscf.lib.multi_gpu import num_devices @@ -33,7 +32,7 @@ bas0 = "cc-pvdz" def setUpModule(): - global mol + global mol, mol1 mol = pyscf.M( atom=atom, basis=bas0, @@ -43,12 +42,23 @@ def setUpModule(): output="/dev/null", verbose=1, ) + mol1 = pyscf.M( + atom=atom, + basis=bas0, + max_memory=32000, + charge=0, + spin=0, + output="/dev/null", + verbose=1, + ) def tearDownModule(): - global mol + global mol, mol1 mol.stdout.close() del mol + mol1.stdout.close() + del mol1 def benchmark_with_cpu(mol, xc, nstates=3, lindep=1.0e-12, tda=False, extype=0): @@ -63,9 +73,13 @@ def benchmark_with_cpu(mol, xc, nstates=3, lindep=1.0e-12, tda=False, extype=0): print("extype", extype) print("etot", mf.e_tot) print("excitation energy", tdsf.e) + print("whether converged", tdsf.converged) + print("xy", tdsf.xy) g = tdsf.Gradients() g.kernel() + print("gradient") + print(g.de) return mf.e_tot, tdsf.e, g.de @@ -122,6 +136,46 @@ def test_grad_camb3lyp_tda_spinflip_down_cpu(self): [-1.84572351e-17, 4.52908126e-02, -2.49673842e-02], [ 2.40683934e-17, -4.52908126e-02, -2.49673842e-02],]) assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_b3lyp_tda_spinflip_up_cpu_closed(self): + etot, e, grad_gpu = _check_grad(mol1, xc="b3lyp", tol=5e-10, method="cpu") + # ref from pyscf-forge + assert abs(etot - -76.42037833354925) < 1e-8 + assert abs(e - np.array([0.25433265, 0.33124974, 0.3313682, 0.40247177, 0.47307456])).max() < 1e-5 + ref = np.array([[ 1.29088518e-16, 6.98423827e-14, 1.25014262e-01], + [-1.36624149e-16, 8.37484153e-02, -6.25098673e-02], + [ 1.80012190e-16, -8.37484153e-02, -6.25098673e-02]]) + assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_b3lyp_tda_spinflip_down_cpu_closed(self): + etot, e, grad_gpu = _check_grad(mol1, xc="b3lyp", tol=5e-10, method="cpu", extype=1) + # ref from pyscf-forge + assert abs(etot - -76.42037833354925) < 1e-8 + assert abs(e - np.array([0.2543327, 0.33124974, 0.3313685, 0.40247202, 0.4730746])).max() < 1e-5 + ref = np.array([[-5.16805682e-16, 7.28823057e-14, 1.25014068e-01], + [ 1.94935391e-16, 8.37484121e-02, -6.25097703e-02], + [ 1.20139074e-17, -8.37484121e-02, -6.25097703e-02],]) + assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_svwn_tda_spinflip_down_cpu_closed(self): + etot, e, grad_gpu = _check_grad(mol1, xc="svwn", tol=5e-10, method="cpu", extype=1) + # ref from pyscf-forge + assert abs(etot - -75.85470242125601) < 1e-8 + assert abs(e - np.array([0.25020513, 0.32400566, 0.32879602, 0.39954396, 0.47440403])).max() < 1e-5 + ref = np.array([[-1.04007210e-16, 2.76349222e-15, 1.40334993e-01], + [ 4.57442221e-17, 9.05506406e-02, -7.01720839e-02], + [ 1.95402062e-16, -9.05506406e-02, -7.01720839e-02],]) + assert abs(grad_gpu - ref).max() < 1e-5 + + def test_grad_camb3lyp_tda_spinflip_down_cpu_closed(self): + etot, e, grad_gpu = _check_grad(mol1, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) + # ref from pyscf-forge + assert abs(etot - -76.39180300401368) < 1e-8 + assert abs(e - np.array([0.25653358, 0.33449489, 0.33602869, 0.40788379, 0.47369817])).max() < 1e-5 + ref = np.array([[ 8.58453733e-14, -2.06289065e-13, 1.21090859e-01], + [-4.13696957e-14, 8.17477776e-02, -6.05484440e-02], + [-4.39523378e-14, -8.17477776e-02, -6.05484440e-02],]) + assert abs(grad_gpu - ref).max() < 1e-5 if __name__ == "__main__": From 8452529f959824256433e8899e534bd08d96d7b9 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 16:24:18 +0800 Subject: [PATCH 29/32] add more debug prints --- gpu4pyscf/grad/tduks_sf.py | 23 ++- gpu4pyscf/grad/tests/test_tduks_sf_grad.py | 166 ++++++++++++--------- 2 files changed, 114 insertions(+), 75 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index d278764bc..b3bdaa0fa 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -93,6 +93,10 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): dmzoo_b = reduce(cp.dot, (orbvb, dvv_b, orbvb.T)) else: raise RuntimeError("Only spin-flip UHF/UKS is supported") + print("dmzoo_a") + print(dmzoo_a) + print("dmzoo_b") + print(dmzoo_b) f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, dmx, @@ -219,6 +223,14 @@ def fvind(x): z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), max_cycle=td_grad.cphf_max_cycle, tol=td_grad.cphf_conv_tol)[0] + print("z1a") + print(z1a) + print("z1b") + print(z1b) + print("wvoa") + print(wvoa) + print("wvob") + print(wvob) time1 = log.timer('Z-vector using UCPHF solver', *time0) @@ -276,6 +288,8 @@ def fvind(x): im0a = reduce(cp.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) im0b = reduce(cp.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) im0 = im0a + im0b + print("im0") + print(im0) dmz1dooa = z1ao[0] + dmzoo_a dmz1doob = z1ao[1] + dmzoo_b @@ -421,7 +435,14 @@ def fvind(x): for p0, p1 in aoslices[:, 2:]]) de += 2.0 * dvhf_all + delec + dh1e_ground + dh1e_td + deveff0 + deveff1 + deveff2 + deveff3 - print("de", de) + print("dvhf_all", dvhf_all) + print("delec", delec) + print("dh1e_ground", dh1e_ground) + print("dh1e_td", dh1e_td) + print("deveff0", deveff0) + print("deveff1", deveff1) + print("deveff2", deveff2) + print("deveff3", deveff3) log.timer('TDUKS nuclear gradients', *time0) return de.get() diff --git a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py index aa09328e3..432230b85 100644 --- a/gpu4pyscf/grad/tests/test_tduks_sf_grad.py +++ b/gpu4pyscf/grad/tests/test_tduks_sf_grad.py @@ -29,7 +29,7 @@ pyscf_25 = version.parse(pyscf.__version__) <= version.parse("2.5.0") -bas0 = "cc-pvdz" +bas0 = "sto-3g" def setUpModule(): global mol, mol1 @@ -96,86 +96,104 @@ def _check_grad(mol, xc, tol=1e-5, lindep=1.0e-12, disp=None, tda=True, method=" class KnownValues(unittest.TestCase): - @unittest.skipIf(num_devices > 1, '') - def test_grad_b3lyp_tda_spinflip_up_cpu(self): + def test_grad_b3lyp_tda_spinflip_up_cpu_debug(self): etot, e, grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu") # ref from pyscf-forge - assert abs(etot - -75.9674347270528) < 1e-8 - assert abs(e - np.array([0.46618494, 0.53438998, 0.60047275, 0.65786033, 0.92091718])).max() < 1e-5 - ref = np.array([[ 8.79547051e-16, 8.63728537e-14, 1.87755267e-01], - [-4.31890391e-16, 2.15026042e-01, -9.38746716e-02], - [-4.50003252e-16, -2.15026042e-01, -9.38746716e-02]]) - assert abs(grad_gpu - ref).max() < 1e-5 - - def test_grad_b3lyp_tda_spinflip_down_cpu(self): - etot, e, grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu", extype=1) - # ref from pyscf-forge - assert abs(etot - -75.96743472705282) < 1e-8 - assert abs(e - np.array([0.0034149, 0.08157731, 0.23027453, 0.50644857, 0.51065628])).max() < 1e-5 - ref = np.array([[-3.01640558e-16, 1.52982216e-13, 5.10689029e-02], - [ 1.36165869e-16, 4.52872857e-02, -2.55387304e-02], - [-3.08111636e-17, -4.52872857e-02, -2.55387304e-02],]) - assert abs(grad_gpu - ref).max() < 1e-5 - - def test_grad_svwn_tda_spinflip_down_cpu(self): - etot, e, grad_gpu = _check_grad(mol, xc="svwn", tol=5e-10, method="cpu", extype=1) - # ref from pyscf-forge - assert abs(etot - -75.39033965461661) < 1e-8 - assert abs(e - np.array([0.00210504, 0.07530215, 0.22255285, 0.50300732, 0.50382963])).max() < 1e-5 - ref = np.array([[-8.15030724e-16, -6.13885762e-14, 6.41681368e-02], - [ 1.12931062e-16, 5.34632826e-02, -3.20887796e-02], - [ 7.97399496e-17, -5.34632826e-02, -3.20887796e-02],]) + assert abs(etot - -74.9528036306103) < 1e-8 + assert abs(e - np.array([0.51445555, 0.58716341, 0.68441162, 0.74694636, 1.19188861])).max() < 1e-5 + ref = np.array([[-3.14927206e-16, -9.72282754e-15, 3.56761126e-01], + [-4.88789957e-18, 3.64308477e-01, -1.78380686e-01], + [ 1.80155075e-17, -3.64308477e-01, -1.78380686e-01]]) assert abs(grad_gpu - ref).max() < 1e-5 - def test_grad_camb3lyp_tda_spinflip_down_cpu(self): - etot, e, grad_gpu = _check_grad(mol, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) - # ref from pyscf-forge - assert abs(etot - -75.93920847775132) < 1e-8 - assert abs(e - np.array([0.00335301, 0.07772481, 0.2267033, 0.50960632, 0.5133939])).max() < 1e-5 - ref = np.array([[-7.43754261e-18, -1.56347842e-13, 4.99263503e-02], - [-1.84572351e-17, 4.52908126e-02, -2.49673842e-02], - [ 2.40683934e-17, -4.52908126e-02, -2.49673842e-02],]) - assert abs(grad_gpu - ref).max() < 1e-5 - - def test_grad_b3lyp_tda_spinflip_up_cpu_closed(self): - etot, e, grad_gpu = _check_grad(mol1, xc="b3lyp", tol=5e-10, method="cpu") - # ref from pyscf-forge - assert abs(etot - -76.42037833354925) < 1e-8 - assert abs(e - np.array([0.25433265, 0.33124974, 0.3313682, 0.40247177, 0.47307456])).max() < 1e-5 - ref = np.array([[ 1.29088518e-16, 6.98423827e-14, 1.25014262e-01], - [-1.36624149e-16, 8.37484153e-02, -6.25098673e-02], - [ 1.80012190e-16, -8.37484153e-02, -6.25098673e-02]]) - assert abs(grad_gpu - ref).max() < 1e-5 - - def test_grad_b3lyp_tda_spinflip_down_cpu_closed(self): + def test_grad_b3lyp_tda_spinflip_down_cpu_closed_debug(self): etot, e, grad_gpu = _check_grad(mol1, xc="b3lyp", tol=5e-10, method="cpu", extype=1) # ref from pyscf-forge - assert abs(etot - -76.42037833354925) < 1e-8 - assert abs(e - np.array([0.2543327, 0.33124974, 0.3313685, 0.40247202, 0.4730746])).max() < 1e-5 - ref = np.array([[-5.16805682e-16, 7.28823057e-14, 1.25014068e-01], - [ 1.94935391e-16, 8.37484121e-02, -6.25097703e-02], - [ 1.20139074e-17, -8.37484121e-02, -6.25097703e-02],]) - assert abs(grad_gpu - ref).max() < 1e-5 - - def test_grad_svwn_tda_spinflip_down_cpu_closed(self): - etot, e, grad_gpu = _check_grad(mol1, xc="svwn", tol=5e-10, method="cpu", extype=1) - # ref from pyscf-forge - assert abs(etot - -75.85470242125601) < 1e-8 - assert abs(e - np.array([0.25020513, 0.32400566, 0.32879602, 0.39954396, 0.47440403])).max() < 1e-5 - ref = np.array([[-1.04007210e-16, 2.76349222e-15, 1.40334993e-01], - [ 4.57442221e-17, 9.05506406e-02, -7.01720839e-02], - [ 1.95402062e-16, -9.05506406e-02, -7.01720839e-02],]) - assert abs(grad_gpu - ref).max() < 1e-5 - - def test_grad_camb3lyp_tda_spinflip_down_cpu_closed(self): - etot, e, grad_gpu = _check_grad(mol1, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) - # ref from pyscf-forge - assert abs(etot - -76.39180300401368) < 1e-8 - assert abs(e - np.array([0.25653358, 0.33449489, 0.33602869, 0.40788379, 0.47369817])).max() < 1e-5 - ref = np.array([[ 8.58453733e-14, -2.06289065e-13, 1.21090859e-01], - [-4.13696957e-14, 8.17477776e-02, -6.05484440e-02], - [-4.39523378e-14, -8.17477776e-02, -6.05484440e-02],]) + assert abs(etot - -75.31258731723625) < 1e-8 + assert abs(e - np.array([0.35887746, 0.44801608, 0.46809436, 0.53804348, 0.64927916])).max() < 1e-5 + ref = np.array([[-3.94619688e-16, 8.33917696e-15, 3.25662471e-01], + [-9.09954329e-17, 2.80744215e-01, -1.62837388e-01], + [ 7.13184304e-17, -2.80744215e-01, -1.62837388e-01],]) assert abs(grad_gpu - ref).max() < 1e-5 + # def test_grad_b3lyp_tda_spinflip_up_cpu(self): + # etot, e, grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu") + # # ref from pyscf-forge + # assert abs(etot - -75.9674347270528) < 1e-8 + # assert abs(e - np.array([0.46618494, 0.53438998, 0.60047275, 0.65786033, 0.92091718])).max() < 1e-5 + # ref = np.array([[ 8.79547051e-16, 8.63728537e-14, 1.87755267e-01], + # [-4.31890391e-16, 2.15026042e-01, -9.38746716e-02], + # [-4.50003252e-16, -2.15026042e-01, -9.38746716e-02]]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_b3lyp_tda_spinflip_down_cpu(self): + # etot, e, grad_gpu = _check_grad(mol, xc="b3lyp", tol=5e-10, method="cpu", extype=1) + # # ref from pyscf-forge + # assert abs(etot - -75.96743472705282) < 1e-8 + # assert abs(e - np.array([0.0034149, 0.08157731, 0.23027453, 0.50644857, 0.51065628])).max() < 1e-5 + # ref = np.array([[-3.01640558e-16, 1.52982216e-13, 5.10689029e-02], + # [ 1.36165869e-16, 4.52872857e-02, -2.55387304e-02], + # [-3.08111636e-17, -4.52872857e-02, -2.55387304e-02],]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_svwn_tda_spinflip_down_cpu(self): + # etot, e, grad_gpu = _check_grad(mol, xc="svwn", tol=5e-10, method="cpu", extype=1) + # # ref from pyscf-forge + # assert abs(etot - -75.39033965461661) < 1e-8 + # assert abs(e - np.array([0.00210504, 0.07530215, 0.22255285, 0.50300732, 0.50382963])).max() < 1e-5 + # ref = np.array([[-8.15030724e-16, -6.13885762e-14, 6.41681368e-02], + # [ 1.12931062e-16, 5.34632826e-02, -3.20887796e-02], + # [ 7.97399496e-17, -5.34632826e-02, -3.20887796e-02],]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_camb3lyp_tda_spinflip_down_cpu(self): + # etot, e, grad_gpu = _check_grad(mol, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) + # # ref from pyscf-forge + # assert abs(etot - -75.93920847775132) < 1e-8 + # assert abs(e - np.array([0.00335301, 0.07772481, 0.2267033, 0.50960632, 0.5133939])).max() < 1e-5 + # ref = np.array([[-7.43754261e-18, -1.56347842e-13, 4.99263503e-02], + # [-1.84572351e-17, 4.52908126e-02, -2.49673842e-02], + # [ 2.40683934e-17, -4.52908126e-02, -2.49673842e-02],]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_b3lyp_tda_spinflip_up_cpu_closed(self): + # etot, e, grad_gpu = _check_grad(mol1, xc="b3lyp", tol=5e-10, method="cpu") + # # ref from pyscf-forge + # assert abs(etot - -76.42037833354925) < 1e-8 + # assert abs(e - np.array([0.25433265, 0.33124974, 0.3313682, 0.40247177, 0.47307456])).max() < 1e-5 + # ref = np.array([[ 1.29088518e-16, 6.98423827e-14, 1.25014262e-01], + # [-1.36624149e-16, 8.37484153e-02, -6.25098673e-02], + # [ 1.80012190e-16, -8.37484153e-02, -6.25098673e-02]]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_b3lyp_tda_spinflip_down_cpu_closed(self): + # etot, e, grad_gpu = _check_grad(mol1, xc="b3lyp", tol=5e-10, method="cpu", extype=1) + # # ref from pyscf-forge + # assert abs(etot - -76.42037833354925) < 1e-8 + # assert abs(e - np.array([0.2543327, 0.33124974, 0.3313685, 0.40247202, 0.4730746])).max() < 1e-5 + # ref = np.array([[-5.16805682e-16, 7.28823057e-14, 1.25014068e-01], + # [ 1.94935391e-16, 8.37484121e-02, -6.25097703e-02], + # [ 1.20139074e-17, -8.37484121e-02, -6.25097703e-02],]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_svwn_tda_spinflip_down_cpu_closed(self): + # etot, e, grad_gpu = _check_grad(mol1, xc="svwn", tol=5e-10, method="cpu", extype=1) + # # ref from pyscf-forge + # assert abs(etot - -75.85470242125601) < 1e-8 + # assert abs(e - np.array([0.25020513, 0.32400566, 0.32879602, 0.39954396, 0.47440403])).max() < 1e-5 + # ref = np.array([[-1.04007210e-16, 2.76349222e-15, 1.40334993e-01], + # [ 4.57442221e-17, 9.05506406e-02, -7.01720839e-02], + # [ 1.95402062e-16, -9.05506406e-02, -7.01720839e-02],]) + # assert abs(grad_gpu - ref).max() < 1e-5 + + # def test_grad_camb3lyp_tda_spinflip_down_cpu_closed(self): + # etot, e, grad_gpu = _check_grad(mol1, xc="camb3lyp", tol=5e-10, method="cpu", extype=1) + # # ref from pyscf-forge + # assert abs(etot - -76.39180300401368) < 1e-8 + # assert abs(e - np.array([0.25653358, 0.33449489, 0.33602869, 0.40788379, 0.47369817])).max() < 1e-5 + # ref = np.array([[ 8.58453733e-14, -2.06289065e-13, 1.21090859e-01], + # [-4.13696957e-14, 8.17477776e-02, -6.05484440e-02], + # [-4.39523378e-14, -8.17477776e-02, -6.05484440e-02],]) + # assert abs(grad_gpu - ref).max() < 1e-5 if __name__ == "__main__": From aa52ad8475122149ae7e9949b2bc0cc275e83d95 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 3 Nov 2025 09:22:06 +0800 Subject: [PATCH 30/32] add more debug prints --- gpu4pyscf/grad/tduks_sf.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index b3bdaa0fa..17ece3d37 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -101,6 +101,15 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, dmx, (dmzoo_a,dmzoo_b), True, True, td_grad.base.extype) + print("f1vo") + print(f1vo) + print("f1oo") + print(f1oo) + print("vxc1") + print(vxc1) + print("k1ao") + print(k1ao) + # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf # f1oo, (2,4,nao,nao), 2T with fxc_sc # vxc1, ao with v1^{\sigma} @@ -120,6 +129,10 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): vj = cp.asarray(vj) if not isinstance(vk, cp.ndarray): vk = cp.asarray(vk) + print("vj") + print(vj) + print("vk") + print(vk) vk *= hyb if abs(omega) > 1e-10: @@ -136,6 +149,8 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): +k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0]) veff0doo[1] += (k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0] +k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0]) + print("veff0doo") + print(veff0doo) wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 @@ -145,22 +160,30 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): veff0mop = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) wvob += cp.einsum('ca,ci->ai', veff0mop[nocca:,noccb:], x) *2 wvoa -= cp.einsum('il,al->ai', veff0mop[:nocca,:noccb], x) *2 + print("veff0mop") + print(veff0mop) veff = -vk[:,2] + f1vo[1,:,0] veff0mom = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) wvob += cp.einsum('ca,ci->ai', veff0mom[nocca:,noccb:], x) *2 wvoa -= cp.einsum('il,al->ai', veff0mom[:nocca,:noccb], x) *2 + print("veff0mom") + print(veff0mom) else: # extype == 1 veff = - vk[:,1] + f1vo[0,:,0] veff0mop = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) wvoa += cp.einsum('ca,ci->ai', veff0mop[noccb:,nocca:], x) *2 wvob -= cp.einsum('il,al->ai', veff0mop[:noccb,:nocca], x) *2 + print("veff0mop") + print(veff0mop) veff = -vk[:,2] + f1vo[1,:,0] veff0mom = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) wvoa += cp.einsum('ca,ci->ai', veff0mom[noccb:,nocca:], x) *2 wvob -= cp.einsum('il,al->ai', veff0mom[:noccb,:nocca], x) *2 + print("veff0mom") + print(veff0mom) else: # Pure functional if td_grad.base.extype == 0: @@ -223,6 +246,11 @@ def fvind(x): z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), max_cycle=td_grad.cphf_max_cycle, tol=td_grad.cphf_conv_tol)[0] + print("cphf_max_cycle") + print(td_grad.cphf_max_cycle) + print("cphf_conv_tol") + print(td_grad.cphf_conv_tol) + print("z1a") print(z1a) print("z1b") From 295d00debba93941d27cc58f636db7e5b6aa95a9 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 4 Nov 2025 09:16:14 +0800 Subject: [PATCH 31/32] remove debug prints --- gpu4pyscf/grad/tduks_sf.py | 51 +------------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/gpu4pyscf/grad/tduks_sf.py b/gpu4pyscf/grad/tduks_sf.py index 17ece3d37..0b95749ca 100644 --- a/gpu4pyscf/grad/tduks_sf.py +++ b/gpu4pyscf/grad/tduks_sf.py @@ -24,7 +24,7 @@ from gpu4pyscf.grad import rks as rks_grad from gpu4pyscf.grad import rhf as rhf_grad from gpu4pyscf.grad import tdrhf as tdrhf_grad -from gpu4pyscf.tdscf._uhf_resp_sf import cache_xc_kernel_sf, mcfun_eval_xc_adapter_sf +from gpu4pyscf.tdscf._uhf_resp_sf import mcfun_eval_xc_adapter_sf from gpu4pyscf.grad import tdrks @@ -93,22 +93,10 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): dmzoo_b = reduce(cp.dot, (orbvb, dvv_b, orbvb.T)) else: raise RuntimeError("Only spin-flip UHF/UKS is supported") - print("dmzoo_a") - print(dmzoo_a) - print("dmzoo_b") - print(dmzoo_b) f1vo, f1oo, vxc1, k1ao = \ _contract_xc_kernel(td_grad, mf.xc, dmx, (dmzoo_a,dmzoo_b), True, True, td_grad.base.extype) - print("f1vo") - print(f1vo) - print("f1oo") - print(f1oo) - print("vxc1") - print(vxc1) - print("k1ao") - print(k1ao) # f1vo, (2,2,4,nao,nao), (X+Y) and (X-Y) with fxc_sf # f1oo, (2,4,nao,nao), 2T with fxc_sc @@ -129,10 +117,6 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): vj = cp.asarray(vj) if not isinstance(vk, cp.ndarray): vk = cp.asarray(vk) - print("vj") - print(vj) - print("vk") - print(vk) vk *= hyb if abs(omega) > 1e-10: @@ -149,8 +133,6 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): +k1ao[0,0,0] + k1ao[0,1,0] + k1ao[1,0,0] + k1ao[1,1,0]) veff0doo[1] += (k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0] +k1ao[0,0,0] + k1ao[0,1,0] - k1ao[1,0,0] - k1ao[1,1,0]) - print("veff0doo") - print(veff0doo) wvoa = reduce(cp.dot, (orbva.T, veff0doo[0], orboa)) *2 wvob = reduce(cp.dot, (orbvb.T, veff0doo[1], orbob)) *2 @@ -160,30 +142,22 @@ def grad_elec(td_grad, x_y, atmlst=None, verbose=logger.INFO): veff0mop = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) wvob += cp.einsum('ca,ci->ai', veff0mop[nocca:,noccb:], x) *2 wvoa -= cp.einsum('il,al->ai', veff0mop[:nocca,:noccb], x) *2 - print("veff0mop") - print(veff0mop) veff = -vk[:,2] + f1vo[1,:,0] veff0mom = reduce(cp.dot, (mo_coeff[0].T, veff[1], mo_coeff[1])) wvob += cp.einsum('ca,ci->ai', veff0mom[nocca:,noccb:], x) *2 wvoa -= cp.einsum('il,al->ai', veff0mom[:nocca,:noccb], x) *2 - print("veff0mom") - print(veff0mom) else: # extype == 1 veff = - vk[:,1] + f1vo[0,:,0] veff0mop = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) wvoa += cp.einsum('ca,ci->ai', veff0mop[noccb:,nocca:], x) *2 wvob -= cp.einsum('il,al->ai', veff0mop[:noccb,:nocca], x) *2 - print("veff0mop") - print(veff0mop) veff = -vk[:,2] + f1vo[1,:,0] veff0mom = reduce(cp.dot, (mo_coeff[1].T, veff[0], mo_coeff[0])) wvoa += cp.einsum('ca,ci->ai', veff0mom[noccb:,nocca:], x) *2 wvob -= cp.einsum('il,al->ai', veff0mom[:noccb,:nocca], x) *2 - print("veff0mom") - print(veff0mom) else: # Pure functional if td_grad.base.extype == 0: @@ -246,19 +220,6 @@ def fvind(x): z1a, z1b = ucphf.solve(fvind, mo_energy, mo_occ, (wvoa,wvob), max_cycle=td_grad.cphf_max_cycle, tol=td_grad.cphf_conv_tol)[0] - print("cphf_max_cycle") - print(td_grad.cphf_max_cycle) - print("cphf_conv_tol") - print(td_grad.cphf_conv_tol) - - print("z1a") - print(z1a) - print("z1b") - print(z1b) - print("wvoa") - print(wvoa) - print("wvob") - print(wvob) time1 = log.timer('Z-vector using UCPHF solver', *time0) @@ -316,8 +277,6 @@ def fvind(x): im0a = reduce(cp.dot, (mo_coeff[0], im0a+zeta_a*dm1a, mo_coeff[0].T)) im0b = reduce(cp.dot, (mo_coeff[1], im0b+zeta_b*dm1b, mo_coeff[1].T)) im0 = im0a + im0b - print("im0") - print(im0) dmz1dooa = z1ao[0] + dmzoo_a dmz1doob = z1ao[1] + dmzoo_b @@ -463,14 +422,6 @@ def fvind(x): for p0, p1 in aoslices[:, 2:]]) de += 2.0 * dvhf_all + delec + dh1e_ground + dh1e_td + deveff0 + deveff1 + deveff2 + deveff3 - print("dvhf_all", dvhf_all) - print("delec", delec) - print("dh1e_ground", dh1e_ground) - print("dh1e_td", dh1e_td) - print("deveff0", deveff0) - print("deveff1", deveff1) - print("deveff2", deveff2) - print("deveff3", deveff3) log.timer('TDUKS nuclear gradients', *time0) return de.get() From 9501f77668de65824133aad3ce46325515e8072b Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 4 Nov 2025 13:48:36 +0800 Subject: [PATCH 32/32] change gpu4pyscf-libxc to 0.7.0 : --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 70660747d..a6fc7223e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ cutensor-cu12==2.2.0 -gpu4pyscf-libxc-cuda12x==0.5.0 +gpu4pyscf-libxc-cuda12x==0.7.0 cupy-cuda12x==13.4.1 pyscf==2.8.0 basis-set-exchange==0.11 diff --git a/setup.py b/setup.py index 111e4f3c2..ca06bade9 100755 --- a/setup.py +++ b/setup.py @@ -138,6 +138,6 @@ def initialize_with_default_plat_name(self): 'pyscf-dispersion', f'cupy-cuda{CUDA_VERSION}>=13.0,!=13.4.0', # Due to expm in cupyx.scipy.linalg and cutensor 2.0 'geometric', - f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.5', + f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.7.0', ] )