From 509550603b3a73783fda647eac62271d88640895 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 30 Oct 2025 16:08:58 +0800 Subject: [PATCH 01/21] begin to write --- gpu4pyscf/dft/tests/test_gks.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 gpu4pyscf/dft/tests/test_gks.py diff --git a/gpu4pyscf/dft/tests/test_gks.py b/gpu4pyscf/dft/tests/test_gks.py new file mode 100644 index 000000000..6a5e12d83 --- /dev/null +++ b/gpu4pyscf/dft/tests/test_gks.py @@ -0,0 +1,28 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pickle +import numpy as np +import unittest +import pyscf +from gpu4pyscf.dft import rks + +atom = ''' +O 0.0000000000 -0.0000000000 0.1174000000 +H -0.7570000000 -0.0000000000 -0.4696000000 +H 0.7570000000 0.0000000000 -0.4696000000 +''' +bas='def2-tzvpp' +grids_level = 5 +nlcgrids_level = 2 \ No newline at end of file From 692fa966d66f7bb4a269a19547bfaa786ab0dd50 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 07:14:55 +0800 Subject: [PATCH 02/21] in writing --- gpu4pyscf/dft/gks.py | 120 ++++++- gpu4pyscf/dft/numint2c.py | 721 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 838 insertions(+), 3 deletions(-) create mode 100644 gpu4pyscf/dft/numint2c.py diff --git a/gpu4pyscf/dft/gks.py b/gpu4pyscf/dft/gks.py index 8d83e3f92..36b3f1ae6 100644 --- a/gpu4pyscf/dft/gks.py +++ b/gpu4pyscf/dft/gks.py @@ -12,19 +12,133 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cupy as cp +from pyscf import lib from pyscf.dft import gks from gpu4pyscf.dft import numint from gpu4pyscf.dft import rks from gpu4pyscf.scf.ghf import GHF +from gpu4pyscf.lib import logger +from gpu4pyscf.lib.cupy_helper import tag_array + + +def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): + '''Coulomb + XC functional + + .. note:: + This function will change the ks object. + + Args: + ks : an instance of :class:`RKS` + XC functional are controlled by ks.xc attribute. Attribute + ks.grids might be initialized. + dm : ndarray or list of ndarrays + A density matrix or a list of density matrices + + Kwargs: + dm_last : ndarray or a list of ndarrays or 0 + The density matrix baseline. If not 0, this function computes the + increment of HF potential w.r.t. the reference HF potential matrix. + vhf_last : ndarray or a list of ndarrays or 0 + The reference Vxc potential matrix. + hermi : int + Whether J, K matrix is hermitian + + | 0 : no hermitian or symmetric + | 1 : hermitian + | 2 : anti-hermitian + + Returns: + matrix Veff = J + Vxc. Veff can be a list matrices, if the input + dm is a list of density matrices. + ''' + if mol is None: mol = ks.mol + if dm is None: dm = ks.make_rdm1() + ks.initialize_grids(mol, dm) + + t0 = (logger.process_clock(), logger.perf_counter()) + + ground_state = isinstance(dm, cp.ndarray) and dm.ndim == 2 + + if hermi == 2: # because rho = 0 + n, exc, vxc = 0, 0, 0 + else: + max_memory = ks.max_memory - lib.current_memory()[0] + ni = ks._numint + n, exc, vxc = ni.get_vxc(mol, ks.grids, ks.xc, dm, + hermi=hermi, max_memory=max_memory) + logger.debug(ks, 'nelec by numeric integration = %s', n) + if ks.do_nlc(): + if ni.libxc.is_nlc(ks.xc): + xc = ks.xc + else: + assert ni.libxc.is_nlc(ks.nlc) + xc = ks.nlc + n, enlc, vnlc = ni.nr_nlc_vxc(mol, ks.nlcgrids, xc, dm, + hermi=hermi, max_memory=max_memory) + exc += enlc + vxc += vnlc + logger.debug(ks, 'nelec with nlc grids = %s', n) + t0 = logger.timer(ks, 'vxc', *t0) + + incremental_jk = (ks._eri is None and ks.direct_scf and + getattr(vhf_last, 'vj', None) is not None) + if incremental_jk: + _dm = cp.asarray(dm) - cp.asarray(dm_last) + else: + _dm = dm + if not ni.libxc.is_hybrid_xc(ks.xc): + vk = None + vj = ks.get_j(mol, _dm, hermi) + if incremental_jk: + vj += vhf_last.vj + vxc += vj + else: + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(ks.xc, spin=mol.spin) + if omega == 0: + vj, vk = ks.get_jk(mol, _dm, hermi) + vk *= hyb + elif alpha == 0: # LR=0, only SR exchange + vj = ks.get_j(mol, _dm, hermi) + vk = ks.get_k(mol, _dm, hermi, omega=-omega) + vk *= hyb + elif hyb == 0: # SR=0, only LR exchange + vj = ks.get_j(mol, _dm, hermi) + vk = ks.get_k(mol, _dm, hermi, omega=omega) + vk *= alpha + else: # SR and LR exchange with different ratios + vj, vk = ks.get_jk(mol, _dm, hermi) + vk *= hyb + vklr = ks.get_k(mol, _dm, hermi, omega=omega) + vklr *= (alpha - hyb) + vk += vklr + if incremental_jk: + vj += vhf_last.vj + vk += vhf_last.vk + vxc += vj - vk + + if ground_state: + exc -= cp.einsum('ij,ji', dm, vk).real * .5 + + if ground_state: + ecoul = cp.einsum('ij,ji', dm, vj).real * .5 + else: + ecoul = None + + vxc = tag_array(vxc, ecoul=ecoul, exc=exc, vj=vj, vk=vk) + return vxc + class GKS(gks.GKS, GHF): from gpu4pyscf.lib.utils import to_cpu, to_gpu, device def __init__(self, mol, xc='LDA,VWN'): - raise NotImplementedError - + GHF.__init__(self, mol) + rks.KohnShamDFT.__init__(self, xc) + # self._numint = NumInt2C() + + get_veff = get_veff reset = rks.RKS.reset energy_elec = rks.RKS.energy_elec - get_veff = NotImplemented nuc_grad_method = NotImplemented to_hf = NotImplemented diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py new file mode 100644 index 000000000..034d3fe03 --- /dev/null +++ b/gpu4pyscf/dft/numint2c.py @@ -0,0 +1,721 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Numerical integration functions for (2-component) GKS with real AO basis +''' + +import functools +import numpy as np +from pyscf import lib +from pyscf.dft import numint +from pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot, BLKSIZE +from pyscf.dft import xc_deriv +from pyscf import __config__ + + +@lib.with_doc( + '''Calculate the electron density and magnetization spin density in the + framework of 2-component real basis. + ''' + numint.eval_rho.__doc__) +def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, + with_lapl=True, verbose=None): + nao = ao.shape[-1] + assert dm.ndim == 2 and nao * 2 == dm.shape[0] + + ngrids, nao = ao.shape[-2:] + xctype = xctype.upper() + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc + + if xctype == 'LDA': + c0a = _dot_ao_dm(mol, ao, dm[:nao], non0tab, shls_slice, ao_loc) + c0b = _dot_ao_dm(mol, ao, dm[nao:], non0tab, shls_slice, ao_loc) + rho_m = _contract_rho_m((ao, ao), (c0a, c0b), hermi, True) + elif xctype == 'GGA': + # first 4 ~ (rho, m), second 4 ~ (0th order, dx, dy, dz) + if hermi: + rho_m = np.empty((4, 4, ngrids)) + else: + rho_m = np.empty((4, 4, ngrids), dtype=np.complex128) + c0a = _dot_ao_dm(mol, ao[0], dm[:nao], non0tab, shls_slice, ao_loc) + c0b = _dot_ao_dm(mol, ao[0], dm[nao:], non0tab, shls_slice, ao_loc) + c0 = (c0a, c0b) + rho_m[:,0] = _contract_rho_m((ao[0], ao[0]), c0, hermi, True) + for i in range(1, 4): + rho_m[:,i] = _contract_rho_m((ao[i], ao[i]), c0, hermi, False) + if hermi: + rho_m[:,1:4] *= 2 # *2 for |ao> dm < dx ao| + |dx ao> dm < ao| + else: + for i in range(1, 4): + c1a = _dot_ao_dm(mol, ao[i], dm[:nao], non0tab, shls_slice, ao_loc) + c1b = _dot_ao_dm(mol, ao[i], dm[nao:], non0tab, shls_slice, ao_loc) + rho_m[:,i] += _contract_rho_m((ao[0], ao[0]), (c1a, c1b), hermi, False) + else: # meta-GGA + if hermi: + dtype = np.double + else: + dtype = np.complex128 + if with_lapl: + rho_m = np.empty((4, 6, ngrids), dtype=dtype) + tau_idx = 5 + else: + rho_m = np.empty((4, 5, ngrids), dtype=dtype) + tau_idx = 4 + c0a = _dot_ao_dm(mol, ao[0], dm[:nao], non0tab, shls_slice, ao_loc) + c0b = _dot_ao_dm(mol, ao[0], dm[nao:], non0tab, shls_slice, ao_loc) + c0 = (c0a, c0b) + rho_m[:,0] = _contract_rho_m((ao[0], ao[0]), c0, hermi, True) + rho_m[:,tau_idx] = 0 + for i in range(1, 4): + c1a = _dot_ao_dm(mol, ao[i], dm[:nao], non0tab, shls_slice, ao_loc) + c1b = _dot_ao_dm(mol, ao[i], dm[nao:], non0tab, shls_slice, ao_loc) + rho_m[:,tau_idx] += _contract_rho_m((ao[i], ao[i]), (c1a, c1b), hermi, True) + + rho_m[:,i] = _contract_rho_m((ao[i], ao[i]), c0, hermi, False) + if hermi: + rho_m[:,i] *= 2 + else: + rho_m[:,i] += _contract_rho_m((ao[0], ao[0]), (c1a, c1b), hermi, False) + if with_lapl: + # TODO: rho_m[:,4] = \nabla^2 rho + raise NotImplementedError + # tau = 1/2 (\nabla f)^2 + rho_m[:,tau_idx] *= .5 + return rho_m + +def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, + max_memory=2000, verbose=None): + xctype = ni._xc_type(xc_code) + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + + make_rho, nset, n2c = ni._gen_rho_evaluator(mol, dms, hermi, False) + nao = n2c // 2 + + nelec = np.zeros(nset) + excsum = np.zeros(nset) + vmat = np.zeros((nset,n2c,n2c), dtype=np.complex128) + + if xctype in ('LDA', 'GGA', 'MGGA'): + f_eval_mat = { + ('LDA' , 'n'): (_ncol_lda_vxc_mat , 0), + ('LDA' , 'm'): (_mcol_lda_vxc_mat , 0), + ('GGA' , 'm'): (_mcol_gga_vxc_mat , 1), + ('MGGA', 'm'): (_mcol_mgga_vxc_mat, 1), + } + fmat, ao_deriv = f_eval_mat[(xctype, ni.collinear[0])] + + if ni.collinear[0] == 'm': # mcol + eval_xc = ni.mcfun_eval_xc_adapter(xc_code) + else: + eval_xc = ni.eval_xc_eff + + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + for i in range(nset): + rho = make_rho(i, ao, mask, xctype) + exc, vxc = eval_xc(xc_code, rho, deriv=1, xctype=xctype)[:2] + if xctype == 'LDA': + den = rho[0] * weight + else: + den = rho[0,0] * weight + nelec[i] += den.sum() + excsum[i] += np.dot(den, exc) + vmat[i] += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, + ao_loc, hermi) + + elif xctype == 'HF': + pass + else: + raise NotImplementedError(f'numint2c.get_vxc for functional {xc_code}') + + if hermi: + vmat = vmat + vmat.conj().transpose(0,2,1) + if isinstance(dms, np.ndarray) and dms.ndim == 2: + vmat = vmat[0] + nelec = nelec[0] + excsum = excsum[0] + return nelec, excsum, vmat + +def _gks_mcol_fxc(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, + rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): + assert ni.collinear[0] == 'm' # mcol + xctype = ni._xc_type(xc_code) + if fxc is None and xctype in ('LDA', 'GGA', 'MGGA'): + fxc = ni.cache_xc_kernel1(mol, grids, xc_code, dm0, + max_memory=max_memory)[2] + + if xctype == 'MGGA': + fmat, ao_deriv = (_mcol_mgga_fxc_mat , 1) + elif xctype == 'GGA': + fmat, ao_deriv = (_mcol_gga_fxc_mat , 1) + else: + fmat, ao_deriv = (_mcol_lda_fxc_mat , 0) + + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + make_rho1, nset, n2c = ni._gen_rho_evaluator(mol, dms, hermi, False) + nao = n2c // 2 + vmat = np.zeros((nset,n2c,n2c), dtype=np.complex128) + + if xctype in ('LDA', 'GGA', 'MGGA'): + _rho0 = None + p1 = 0 + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + p0, p1 = p1, p1 + weight.size + _fxc = fxc[:,:,:,:,p0:p1] + for i in range(nset): + rho1 = make_rho1(i, ao, mask, xctype) + vmat[i] += fmat(mol, ao, weight, _rho0, rho1, _fxc, + mask, shls_slice, ao_loc, hermi) + elif xctype == 'HF': + pass + else: + raise NotImplementedError(f'numint2c.get_fxc for functional {xc_code}') + + if hermi: + vmat = vmat + vmat.conj().transpose(0,2,1) + if isinstance(dms, np.ndarray) and dms.ndim == 2: + vmat = vmat[0] + return vmat + +def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of non-collinear LDA''' + # NOTE vxc in u/d representation + r, mx, my, mz = rho + vxc = xc_deriv.ud2ts(vxc) + vr, vs = vxc[:,0] + s = lib.norm(rho[1:4], axis=0) + + wv = weight * vr + with np.errstate(divide='ignore',invalid='ignore'): + ws = vs * weight / s + ws[s < 1e-20] = 0 + + # * .5 because of v+v.conj().T in r_vxc + if hermi: + wv *= .5 + ws *= .5 + aow = None + aow = _scale_ao(ao, ws*mx, out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao, ws*my, out=aow) # My + tmpy = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + if hermi: + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = np.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + tmpx = tmpy = None + aow = _scale_ao(ao, wv+ws*mz, out=aow) # Mz + mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao, wv-ws*mz, out=aow) # Mz + matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + mat = np.block([[mataa, matab], [matba, matbb]]) + return mat + +def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, + verbose=None, spin=None): + r'''Returns the derivative tensor against the density parameters + + [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], + [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. + + It differs from the eval_xc method in the derivatives of non-local part. + The eval_xc method returns the XC functional derivatives to sigma + (|\nabla \rho|^2) + + Args: + rho: 2-dimensional or 3-dimensional array + Total density and spin density (and their derivatives if GGA or MGGA + functionals) on grids + + Kwargs: + deriv: int + derivative orders + omega: float + define the exponent in the attenuated Coulomb for RSH functional + ''' + if omega is None: omega = ni.omega + if xctype is None: xctype = ni._xc_type(xc_code) + if ni.collinear[0] == 'c': # collinear + t = rho[0] + s = rho[3] + elif ni.collinear[0] == 'n': # ncol + t = rho[0] + m = rho[1:4] + s = lib.norm(m, axis=0) + elif ni.collinear[0] == 'm': # mcol + # called by mcfun.eval_xc_eff which passes (rho, s) only + t, s = rho + else: + raise RuntimeError(f'Unknown collinear scheme {ni.collinear}') + + rho = np.stack([(t + s) * .5, (t - s) * .5]) + if xctype == 'MGGA' and rho.shape[1] == 6: + rho = np.asarray(rho[:,[0,1,2,3,5],:], order='C') + + assert spin is None or spin == 1 + spin = 1 + + out = ni.libxc.eval_xc1(xc_code, rho, spin, deriv, omega) + evfk = [out[0]] + for order in range(1, deriv+1): + evfk.append(xc_deriv.transform_xc(rho, out, xctype, spin, order)) + if deriv < 3: + # The return has at least [e, v, f, k] terms + evfk.extend([None] * (3 - deriv)) + return evfk + +# * Mcfun requires functional derivatives to total-density and spin-density. +# * Make it a global function than a closure so as to be callable by multiprocessing +def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): + evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype) + for order in range(1, deriv+1): + if evfk[order] is not None: + evfk[order] = xc_deriv.ud2ts(evfk[order]) + return evfk + +def mcfun_eval_xc_adapter(ni, xc_code): + '''Wrapper to generate the eval_xc function required by mcfun''' + try: + import mcfun + except ImportError: + raise ImportError('This feature requires mcfun library.\n' + 'Try install mcfun with `pip install mcfun`') + + xctype = ni._xc_type(xc_code) + fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) + nproc = lib.num_threads() + def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, + verbose=None, spin=None): + return mcfun.eval_xc_eff( + fn_eval_xc, rho, deriv, spin_samples=ni.spin_samples, + collinear_threshold=ni.collinear_thrd, + collinear_samples=ni.collinear_samples, workers=nproc) + return eval_xc_eff + +def _mcol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of multi-collinear LDA''' + wv = weight * vxc + if hermi: + wv *= .5 # * .5 because of v+v.conj().T in r_vxc + wr, wmx, wmy, wmz = wv + + # einsum('g,g,xgi,xgj->ij', vxc, weight, ao, ao) + # + einsum('xy,g,g,xgi,ygj->ij', sx, vxc, weight, ao, ao) + # + einsum('xy,g,g,xgi,ygj->ij', sy, vxc, weight, ao, ao) + # + einsum('xy,g,g,xgi,ygj->ij', sz, vxc, weight, ao, ao) + aow = None + aow = _scale_ao(ao, wmx[0], out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao, wmy[0], out=aow) # My + tmpy = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + if hermi: + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = np.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + tmpx = tmpy = None + aow = _scale_ao(ao, wr[0]+wmz[0], out=aow) # Mz + mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao, wr[0]-wmz[0], out=aow) # Mz + matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + mat = np.block([[mataa, matab], [matba, matbb]]) + return mat + +def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of multi-collinear LDA''' + wv = weight * vxc + if hermi: + wv[:,0] *= .5 # * .5 because of v+v.conj().T in r_vxc + wr, wmx, wmy, wmz = wv + + aow = None + aow = _scale_ao(ao[:4], wr[:4]+wmz[:4], out=aow) # Mz + mataa = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wr[:4]-wmz[:4], out=aow) # Mz + matbb = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wmx[:4], out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wmy[:4], out=aow) # My + tmpy = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + if hermi: + assert vxc.dtype == np.double + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = np.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + aow = _scale_ao(ao[1:4], wmx[1:4].conj(), out=aow) # Mx + tmpx += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], wmy[1:4].conj(), out=aow) # My + tmpy += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + aow = _scale_ao(ao[1:4], (wr[1:4]+wmz[1:4]).conj(), out=aow) # Mz + mataa += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz + matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + + mat = np.block([[mataa, matab], [matba, matbb]]) + return mat + +def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of multi-collinear MGGA''' + wv = weight * vxc + tau_idx = 4 + wv[:,tau_idx] *= .5 # *.5 for 1/2 in tau + if hermi: + wv[:,0] *= .5 # * .5 because of v+v.conj().T in r_vxc + wv[:,tau_idx] *= .5 + wr, wmx, wmy, wmz = wv + + aow = None + aow = _scale_ao(ao[:4], wr[:4]+wmz[:4], out=aow) # Mz + mataa = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + mataa += _tau_dot(mol, ao, ao, wr[tau_idx]+wmz[tau_idx], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wr[:4]-wmz[:4], out=aow) # Mz + matbb = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + matbb += _tau_dot(mol, ao, ao, wr[tau_idx]-wmz[tau_idx], mask, shls_slice, ao_loc) + + aow = _scale_ao(ao[:4], wmx[:4], out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + tmpx += _tau_dot(mol, ao, ao, wmx[tau_idx], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wmy[:4], out=aow) # My + tmpy = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + tmpy += _tau_dot(mol, ao, ao, wmy[tau_idx], mask, shls_slice, ao_loc) + if hermi: + assert vxc.dtype == np.double + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = np.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + aow = _scale_ao(ao[1:4], wmx[1:4].conj(), out=aow) # Mx + tmpx += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], wmy[1:4].conj(), out=aow) # My + tmpy += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + aow = _scale_ao(ao[1:4], (wr[1:4]+wmz[1:4]).conj(), out=aow) # Mz + mataa += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz + matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + + mat = np.block([[mataa, matab], [matba, matbb]]) + return mat + +def _mcol_lda_fxc_mat(mol, ao, weight, rho0, rho1, fxc, + mask, shls_slice, ao_loc, hermi): + '''Kernel matrix of multi-collinear LDA''' + vxc1 = np.einsum('ag,abyg->byg', rho1, fxc[:,0,:]) + return _mcol_lda_vxc_mat(mol, ao, weight, rho0, vxc1, mask, shls_slice, + ao_loc, hermi) + +def _mcol_gga_fxc_mat(mol, ao, weight, rho0, rho1, fxc, + mask, shls_slice, ao_loc, hermi): + '''Kernel matrix of multi-collinear GGA''' + vxc1 = np.einsum('axg,axbyg->byg', rho1, fxc) + return _mcol_gga_vxc_mat(mol, ao, weight, rho0, vxc1, mask, shls_slice, + ao_loc, hermi) + +def _mcol_mgga_fxc_mat(mol, ao, weight, rho0, rho1, fxc, + mask, shls_slice, ao_loc, hermi): + '''Kernel matrix of multi-collinear MGGA''' + vxc1 = np.einsum('axg,axbyg->byg', rho1, fxc) + return _mcol_mgga_vxc_mat(mol, ao, weight, rho0, vxc1, mask, shls_slice, + ao_loc, hermi) + +def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): + ''' + hermi indicates whether the density matrix is hermitian. + bra_eq_ket indicates whether bra and ket basis are the same AOs. + ''' + # rho = einsum('xgi,ij,xgj->g', ket, dm, bra.conj()) + # mx = einsum('xy,ygi,ij,xgj->g', sx, ket, dm, bra.conj()) + # my = einsum('xy,ygi,ij,xgj->g', sy, ket, dm, bra.conj()) + # mz = einsum('xy,ygi,ij,xgj->g', sz, ket, dm, bra.conj()) + ket_a, ket_b = ket + bra_a, bra_b = bra + nao = min(ket_a.shape[1], bra_a.shape[1]) + ngrids = ket_a.shape[0] + if hermi: + raa = np.einsum('pi,pi->p', bra_a.real, ket_a[:,:nao].real) + raa+= np.einsum('pi,pi->p', bra_a.imag, ket_a[:,:nao].imag) + rab = np.einsum('pi,pi->p', bra_a.conj(), ket_b[:,:nao]) + rbb = np.einsum('pi,pi->p', bra_b.real, ket_b[:,nao:].real) + rbb+= np.einsum('pi,pi->p', bra_b.imag, ket_b[:,nao:].imag) + rho_m = np.empty((4, ngrids)) + rho_m[0,:] = raa + rbb # rho + rho_m[1,:] = rab.real # mx + rho_m[2,:] = rab.imag # my + rho_m[3,:] = raa - rbb # mz + if bra_eq_ket: + rho_m[1,:] *= 2 + rho_m[2,:] *= 2 + else: + rba = np.einsum('pi,pi->p', bra_b.conj(), ket_a[:,nao:]) + rho_m[1,:] += rba.real + rho_m[2,:] -= rba.imag + else: + raa = np.einsum('pi,pi->p', bra_a.conj(), ket_a[:,:nao]) + rba = np.einsum('pi,pi->p', bra_b.conj(), ket_a[:,nao:]) + rab = np.einsum('pi,pi->p', bra_a.conj(), ket_b[:,:nao]) + rbb = np.einsum('pi,pi->p', bra_b.conj(), ket_b[:,nao:]) + rho_m = np.empty((4, ngrids), dtype=np.complex128) + rho_m[0,:] = raa + rbb # rho + rho_m[1,:] = rab + rba # mx + rho_m[2,:] = (rba - rab) * 1j # my + rho_m[3,:] = raa - rbb # mz + return rho_m + + +class NumInt2C(lib.StreamObject, numint.LibXCMixin): + '''Numerical integration methods for 2-component basis (used by GKS)''' + + # collinear schemes: + # 'col' (collinear, by default) + # 'ncol' (non-collinear) + # 'mcol' (multi-collinear) + collinear = getattr(__config__, 'dft_numint_RnumInt_collinear', 'col') + spin_samples = getattr(__config__, 'dft_numint_RnumInt_spin_samples', 770) + collinear_thrd = getattr(__config__, 'dft_numint_RnumInt_collinear_thrd', 0.99) + collinear_samples = getattr(__config__, 'dft_numint_RnumInt_collinear_samples', 200) + + make_mask = staticmethod(numint.make_mask) + eval_ao = staticmethod(numint.eval_ao) + eval_rho = staticmethod(eval_rho) + + def eval_rho1(self, mol, ao, dm, screen_index=None, xctype='LDA', hermi=0, + with_lapl=True, cutoff=None, ao_cutoff=None, pair_mask=None, + verbose=None): + return self.eval_rho(mol, ao, dm, screen_index, xctype, hermi, + with_lapl, verbose=verbose) + + def eval_rho2(self, mol, ao, mo_coeff, mo_occ, non0tab=None, xctype='LDA', + with_lapl=True, verbose=None): + '''Calculate the electron density for LDA functional and the density + derivatives for GGA functional in the framework of 2-component basis. + ''' + if self.collinear[0] in ('n', 'm'): + # TODO: + dm = np.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + hermi = 1 + rho = self.eval_rho(mol, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) + return rho + + #if mo_coeff.dtype == np.double: + # nao = ao.shape[-1] + # assert nao * 2 == mo_coeff.shape[0] + # mo_aR = mo_coeff[:nao] + # mo_bR = mo_coeff[nao:] + # rho = numint.eval_rho2(mol, ao, mo_aR, mo_occ, non0tab, xctype, with_lapl, verbose) + # rho += numint.eval_rho2(mol, ao, mo_bR, mo_occ, non0tab, xctype, with_lapl, verbose) + #else: + # dm = np.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + # hermi = 1 + # rho = self.eval_rho(mol, dm, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) + #mx = my = mz = None + #return rho, mx, my, mz + raise NotImplementedError(self.collinear) + + def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, + max_memory=2000): + '''Compute the 0th order density, Vxc and fxc. They can be used in TDDFT, + DFT hessian module etc. + ''' + xctype = self._xc_type(xc_code) + if xctype in ('GGA', 'MGGA'): + ao_deriv = 1 + else: + ao_deriv = 0 + n2c = mo_coeff.shape[0] + nao = n2c // 2 + with_lapl = numint.MGGA_DENSITY_LAPL + + if self.collinear[0] in ('m', 'n'): # mcol or ncol + rho = [] + for ao, mask, weight, coords \ + in self.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho.append(self.eval_rho2(mol, ao, mo_coeff, mo_occ, mask, xctype, + with_lapl)) + rho = np.concatenate(rho,axis=-1) + assert rho.dtype == np.double + if self.collinear[0] == 'm': # mcol + eval_xc = self.mcfun_eval_xc_adapter(xc_code) + else: + eval_xc = self.eval_xc_eff + vxc, fxc = eval_xc(xc_code, rho, deriv=2, xctype=xctype)[1:3] + else: + # rhoa and rhob must be real + dm = np.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + dm_a = dm[:nao,:nao].real.copy('C') + dm_b = dm[nao:,nao:].real.copy('C') + ni = self._to_numint1c() + hermi = 1 + rhoa = [] + rhob = [] + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + # rhoa and rhob must be real + rhoa.append(ni.eval_rho(mol, ao, dm_a, mask, xctype, hermi, with_lapl)) + rhob.append(ni.eval_rho(mol, ao, dm_b, mask, xctype, hermi, with_lapl)) + rho = np.stack([np.concatenate(rhoa,axis=-1), np.concatenate(rhob,axis=-1)]) + assert rho.dtype == np.double + vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] + return rho, vxc, fxc + + def cache_xc_kernel1(self, mol, grids, xc_code, dm, spin=0, max_memory=2000): + '''Compute the 0th order density, Vxc and fxc. They can be used in TDDFT, + DFT hessian module etc. + ''' + xctype = self._xc_type(xc_code) + if xctype in ('GGA', 'MGGA'): + ao_deriv = 1 + else: + ao_deriv = 0 + n2c = dm.shape[0] + nao = n2c // 2 + + if self.collinear[0] in ('m', 'n'): # mcol or ncol + hermi = 1 # rho must be real. We need to assume dm hermitian + with_lapl = False + rho = [] + for ao, mask, weight, coords \ + in self.block_loop(mol, grids, nao, ao_deriv, max_memory): + rho.append(self.eval_rho1(mol, ao, dm, mask, xctype, hermi, with_lapl)) + rho = np.concatenate(rho,axis=-1) + assert rho.dtype == np.double + if self.collinear[0] == 'm': # mcol + eval_xc = self.mcfun_eval_xc_adapter(xc_code) + else: + eval_xc = self.eval_xc_eff + vxc, fxc = eval_xc(xc_code, rho, deriv=2, xctype=xctype)[1:3] + else: + # rhoa and rhob must be real. We need to assume dm hermitian + hermi = 1 + # TODO: test if dm[:nao,:nao].imag == 0 + dm_a = dm[:nao,:nao].real.copy('C') + dm_b = dm[nao:,nao:].real.copy('C') + ni = self._to_numint1c() + with_lapl = True + rhoa = [] + rhob = [] + for ao, mask, weight, coords \ + in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): + # rhoa and rhob must be real + rhoa.append(ni.eval_rho(mol, ao, dm_a, mask, xctype, hermi, with_lapl)) + rhob.append(ni.eval_rho(mol, ao, dm_b, mask, xctype, hermi, with_lapl)) + rho = np.stack([np.concatenate(rhoa,axis=-1), np.concatenate(rhob,axis=-1)]) + assert rho.dtype == np.double + vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] + return rho, vxc, fxc + + def get_rho(self, mol, dm, grids, max_memory=2000): + '''Density in real space + ''' + nao = dm.shape[-1] // 2 + dm_a = dm[:nao,:nao].real + dm_b = dm[nao:,nao:].real + ni = self._to_numint1c() + return ni.get_rho(mol, dm_a+dm_b, grids, max_memory) + + _gks_mcol_vxc = _gks_mcol_vxc + _gks_mcol_fxc = _gks_mcol_fxc + + @lib.with_doc(numint.nr_rks.__doc__) + def nr_vxc(self, mol, grids, xc_code, dms, spin=0, relativity=0, hermi=1, + max_memory=2000, verbose=None): + if self.collinear[0] in ('m', 'n'): # mcol or ncol + n, exc, vmat = self._gks_mcol_vxc(mol, grids, xc_code, dms, relativity, + hermi, max_memory, verbose) + else: + nao = dms.shape[-1] // 2 + # ground state density is always real + dm_a = dms[...,:nao,:nao].real.copy('C') + dm_b = dms[...,nao:,nao:].real.copy('C') + dm1 = (dm_a, dm_b) + ni = self._to_numint1c() + n, exc, v = ni.nr_uks(mol, grids, xc_code, dm1, relativity, + hermi, max_memory, verbose) + vmat = np.zeros_like(dms) + vmat[...,:nao,:nao] = v[0] + vmat[...,nao:,nao:] = v[1] + return n.sum(), exc, vmat + get_vxc = nr_gks_vxc = nr_vxc + + @lib.with_doc(numint.nr_nlc_vxc.__doc__) + def nr_nlc_vxc(self, mol, grids, xc_code, dm, spin=0, relativity=0, hermi=1, + max_memory=2000, verbose=None): + assert dm.ndim == 2 + nao = dm.shape[-1] // 2 + # ground state density is always real + dm_a = dm[:nao,:nao].real + dm_b = dm[nao:,nao:].real + ni = self._to_numint1c() + n, exc, v = ni.nr_nlc_vxc(mol, grids, xc_code, dm_a+dm_b, relativity, + hermi, max_memory, verbose) + vmat = np.zeros_like(dm) + vmat[:nao,:nao] = v + vmat[nao:,nao:] = v + return n, exc, vmat + + @lib.with_doc(numint.nr_rks_fxc.__doc__) + def nr_fxc(self, mol, grids, xc_code, dm0, dms, spin=0, relativity=0, hermi=0, + rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): + if self.collinear[0] not in ('c', 'm'): # col or mcol + raise NotImplementedError('non-collinear fxc') + + if self.collinear[0] == 'm': # mcol + fxcmat = self._gks_mcol_fxc(mol, grids, xc_code, dm0, dms, + relativity, hermi, rho0, vxc, fxc, + max_memory, verbose) + else: + dms = np.asarray(dms) + nao = dms.shape[-1] // 2 + if dm0 is not None: + dm0a = dm0[:nao,:nao].real.copy('C') + dm0b = dm0[nao:,nao:].real.copy('C') + dm0 = (dm0a, dm0b) + # dms_a and dms_b may be complex if they are TDDFT amplitudes + dms_a = dms[...,:nao,:nao].copy() + dms_b = dms[...,nao:,nao:].copy() + dm1 = (dms_a, dms_b) + ni = self._to_numint1c() + vmat = ni.nr_uks_fxc(mol, grids, xc_code, dm0, dm1, relativity, + hermi, rho0, vxc, fxc, max_memory, verbose) + fxcmat = np.zeros_like(dms) + fxcmat[...,:nao,:nao] = vmat[0] + fxcmat[...,nao:,nao:] = vmat[1] + return fxcmat + get_fxc = nr_gks_fxc = nr_fxc + + eval_xc_eff = _eval_xc_eff + mcfun_eval_xc_adapter = mcfun_eval_xc_adapter + + block_loop = numint.NumInt.block_loop + _gen_rho_evaluator = numint.NumInt._gen_rho_evaluator + + def _to_numint1c(self): + '''Converts to the associated class to handle collinear systems''' + return self.view(numint.NumInt) \ No newline at end of file From d4f7db9070e805c97d2424c5c2f753598e1e655a Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 11:46:46 +0800 Subject: [PATCH 03/21] finish eval_rho, needs debug --- gpu4pyscf/dft/numint2c.py | 214 ++++++++------------------- gpu4pyscf/dft/tests/test_numint2c.py | 140 ++++++++++++++++++ 2 files changed, 204 insertions(+), 150 deletions(-) create mode 100644 gpu4pyscf/dft/tests/test_numint2c.py diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index 034d3fe03..b72c56eca 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -18,23 +18,20 @@ import functools import numpy as np -from pyscf import lib -from pyscf.dft import numint -from pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot, BLKSIZE -from pyscf.dft import xc_deriv +import cupy as cp +from gpu4pyscf import lib +from gpu4pyscf.dft import numint +from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot, BLKSIZE +from gpu4pyscf.dft import xc_deriv from pyscf import __config__ -@lib.with_doc( - '''Calculate the electron density and magnetization spin density in the - framework of 2-component real basis. - ''' + numint.eval_rho.__doc__) def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, with_lapl=True, verbose=None): - nao = ao.shape[-1] + nao = ao.shape[-2] assert dm.ndim == 2 and nao * 2 == dm.shape[0] - ngrids, nao = ao.shape[-2:] + nao, ngrids = ao.shape[-2:] xctype = xctype.upper() shls_slice = (0, mol.nbas) ao_loc = mol.ao_loc @@ -46,9 +43,9 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, elif xctype == 'GGA': # first 4 ~ (rho, m), second 4 ~ (0th order, dx, dy, dz) if hermi: - rho_m = np.empty((4, 4, ngrids)) + rho_m = cp.empty((4, 4, ngrids)) else: - rho_m = np.empty((4, 4, ngrids), dtype=np.complex128) + rho_m = cp.empty((4, 4, ngrids), dtype=cp.complex128) c0a = _dot_ao_dm(mol, ao[0], dm[:nao], non0tab, shls_slice, ao_loc) c0b = _dot_ao_dm(mol, ao[0], dm[nao:], non0tab, shls_slice, ao_loc) c0 = (c0a, c0b) @@ -64,14 +61,14 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, rho_m[:,i] += _contract_rho_m((ao[0], ao[0]), (c1a, c1b), hermi, False) else: # meta-GGA if hermi: - dtype = np.double + dtype = cp.double else: - dtype = np.complex128 + dtype = cp.complex128 if with_lapl: - rho_m = np.empty((4, 6, ngrids), dtype=dtype) + rho_m = cp.empty((4, 6, ngrids), dtype=dtype) tau_idx = 5 else: - rho_m = np.empty((4, 5, ngrids), dtype=dtype) + rho_m = cp.empty((4, 5, ngrids), dtype=dtype) tau_idx = 4 c0a = _dot_ao_dm(mol, ao[0], dm[:nao], non0tab, shls_slice, ao_loc) c0b = _dot_ao_dm(mol, ao[0], dm[nao:], non0tab, shls_slice, ao_loc) @@ -104,9 +101,9 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, make_rho, nset, n2c = ni._gen_rho_evaluator(mol, dms, hermi, False) nao = n2c // 2 - nelec = np.zeros(nset) - excsum = np.zeros(nset) - vmat = np.zeros((nset,n2c,n2c), dtype=np.complex128) + nelec = cp.zeros(nset) + excsum = cp.zeros(nset) + vmat = cp.zeros((nset,n2c,n2c), dtype=cp.complex128) if xctype in ('LDA', 'GGA', 'MGGA'): f_eval_mat = { @@ -132,7 +129,7 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, else: den = rho[0,0] * weight nelec[i] += den.sum() - excsum[i] += np.dot(den, exc) + excsum[i] += cp.dot(den, exc) vmat[i] += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi) @@ -143,7 +140,7 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, if hermi: vmat = vmat + vmat.conj().transpose(0,2,1) - if isinstance(dms, np.ndarray) and dms.ndim == 2: + if isinstance(dms, cp.ndarray) and dms.ndim == 2: vmat = vmat[0] nelec = nelec[0] excsum = excsum[0] @@ -151,46 +148,7 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, def _gks_mcol_fxc(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): - assert ni.collinear[0] == 'm' # mcol - xctype = ni._xc_type(xc_code) - if fxc is None and xctype in ('LDA', 'GGA', 'MGGA'): - fxc = ni.cache_xc_kernel1(mol, grids, xc_code, dm0, - max_memory=max_memory)[2] - - if xctype == 'MGGA': - fmat, ao_deriv = (_mcol_mgga_fxc_mat , 1) - elif xctype == 'GGA': - fmat, ao_deriv = (_mcol_gga_fxc_mat , 1) - else: - fmat, ao_deriv = (_mcol_lda_fxc_mat , 0) - - shls_slice = (0, mol.nbas) - ao_loc = mol.ao_loc_nr() - make_rho1, nset, n2c = ni._gen_rho_evaluator(mol, dms, hermi, False) - nao = n2c // 2 - vmat = np.zeros((nset,n2c,n2c), dtype=np.complex128) - - if xctype in ('LDA', 'GGA', 'MGGA'): - _rho0 = None - p1 = 0 - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - p0, p1 = p1, p1 + weight.size - _fxc = fxc[:,:,:,:,p0:p1] - for i in range(nset): - rho1 = make_rho1(i, ao, mask, xctype) - vmat[i] += fmat(mol, ao, weight, _rho0, rho1, _fxc, - mask, shls_slice, ao_loc, hermi) - elif xctype == 'HF': - pass - else: - raise NotImplementedError(f'numint2c.get_fxc for functional {xc_code}') - - if hermi: - vmat = vmat + vmat.conj().transpose(0,2,1) - if isinstance(dms, np.ndarray) and dms.ndim == 2: - vmat = vmat[0] - return vmat + raise NotImplementedError('non-collinear lda fxc') def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): '''Vxc matrix of non-collinear LDA''' @@ -201,7 +159,7 @@ def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi s = lib.norm(rho[1:4], axis=0) wv = weight * vr - with np.errstate(divide='ignore',invalid='ignore'): + with cp.errstate(divide='ignore',invalid='ignore'): ws = vs * weight / s ws[s < 1e-20] = 0 @@ -217,7 +175,7 @@ def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi if hermi: # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j - matab = np.zeros_like(matba) + matab = cp.zeros_like(matba) else: # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex matba = tmpx + tmpy * 1j @@ -227,7 +185,7 @@ def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) aow = _scale_ao(ao, wv-ws*mz, out=aow) # Mz matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - mat = np.block([[mataa, matab], [matba, matbb]]) + mat = cp.block([[mataa, matab], [matba, matbb]]) return mat def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, @@ -267,9 +225,9 @@ def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, else: raise RuntimeError(f'Unknown collinear scheme {ni.collinear}') - rho = np.stack([(t + s) * .5, (t - s) * .5]) + rho = cp.stack([(t + s) * .5, (t - s) * .5]) if xctype == 'MGGA' and rho.shape[1] == 6: - rho = np.asarray(rho[:,[0,1,2,3,5],:], order='C') + rho = cp.asarray(rho[:,[0,1,2,3,5],:], order='C') assert spin is None or spin == 1 spin = 1 @@ -330,7 +288,7 @@ def _mcol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi if hermi: # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j - matab = np.zeros_like(matba) + matab = cp.zeros_like(matba) else: # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex matba = tmpx + tmpy * 1j @@ -340,7 +298,7 @@ def _mcol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) aow = _scale_ao(ao, wr[0]-wmz[0], out=aow) # Mz matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - mat = np.block([[mataa, matab], [matba, matbb]]) + mat = cp.block([[mataa, matab], [matba, matbb]]) return mat def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): @@ -360,10 +318,10 @@ def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi aow = _scale_ao(ao[:4], wmy[:4], out=aow) # My tmpy = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) if hermi: - assert vxc.dtype == np.double + assert vxc.dtype == cp.double # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j - matab = np.zeros_like(matba) + matab = cp.zeros_like(matba) else: # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex aow = _scale_ao(ao[1:4], wmx[1:4].conj(), out=aow) # Mx @@ -377,7 +335,7 @@ def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) - mat = np.block([[mataa, matab], [matba, matbb]]) + mat = cp.block([[mataa, matab], [matba, matbb]]) return mat def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): @@ -405,10 +363,10 @@ def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, herm tmpy = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) tmpy += _tau_dot(mol, ao, ao, wmy[tau_idx], mask, shls_slice, ao_loc) if hermi: - assert vxc.dtype == np.double + assert vxc.dtype == cp.double # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j - matab = np.zeros_like(matba) + matab = cp.zeros_like(matba) else: # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex aow = _scale_ao(ao[1:4], wmx[1:4].conj(), out=aow) # Mx @@ -422,29 +380,20 @@ def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, herm aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) - mat = np.block([[mataa, matab], [matba, matbb]]) + mat = cp.block([[mataa, matab], [matba, matbb]]) return mat def _mcol_lda_fxc_mat(mol, ao, weight, rho0, rho1, fxc, mask, shls_slice, ao_loc, hermi): - '''Kernel matrix of multi-collinear LDA''' - vxc1 = np.einsum('ag,abyg->byg', rho1, fxc[:,0,:]) - return _mcol_lda_vxc_mat(mol, ao, weight, rho0, vxc1, mask, shls_slice, - ao_loc, hermi) + raise NotImplementedError('non-collinear lda fxc') def _mcol_gga_fxc_mat(mol, ao, weight, rho0, rho1, fxc, mask, shls_slice, ao_loc, hermi): - '''Kernel matrix of multi-collinear GGA''' - vxc1 = np.einsum('axg,axbyg->byg', rho1, fxc) - return _mcol_gga_vxc_mat(mol, ao, weight, rho0, vxc1, mask, shls_slice, - ao_loc, hermi) + raise NotImplementedError('non-collinear gga fxc') def _mcol_mgga_fxc_mat(mol, ao, weight, rho0, rho1, fxc, mask, shls_slice, ao_loc, hermi): - '''Kernel matrix of multi-collinear MGGA''' - vxc1 = np.einsum('axg,axbyg->byg', rho1, fxc) - return _mcol_mgga_vxc_mat(mol, ao, weight, rho0, vxc1, mask, shls_slice, - ao_loc, hermi) + raise NotImplementedError('non-collinear mgga fxc') def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): ''' @@ -457,15 +406,15 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): # mz = einsum('xy,ygi,ij,xgj->g', sz, ket, dm, bra.conj()) ket_a, ket_b = ket bra_a, bra_b = bra - nao = min(ket_a.shape[1], bra_a.shape[1]) - ngrids = ket_a.shape[0] + nao = min(ket_a.shape[-2], bra_a.shape[-2]) + ngrids = ket_a.shape[-1] if hermi: - raa = np.einsum('pi,pi->p', bra_a.real, ket_a[:,:nao].real) - raa+= np.einsum('pi,pi->p', bra_a.imag, ket_a[:,:nao].imag) - rab = np.einsum('pi,pi->p', bra_a.conj(), ket_b[:,:nao]) - rbb = np.einsum('pi,pi->p', bra_b.real, ket_b[:,nao:].real) - rbb+= np.einsum('pi,pi->p', bra_b.imag, ket_b[:,nao:].imag) - rho_m = np.empty((4, ngrids)) + raa = cp.einsum('ip,ip->p', bra_a.real, ket_a[:nao].real) + raa+= cp.einsum('ip,ip->p', bra_a.imag, ket_a[:nao].imag) + rab = cp.einsum('ip,ip->p', bra_a.conj(), ket_b[:nao]) + rbb = cp.einsum('ip,ip->p', bra_b.real, ket_b[nao:].real) + rbb+= cp.einsum('ip,ip->p', bra_b.imag, ket_b[nao:].imag) + rho_m = cp.empty((4, ngrids)) rho_m[0,:] = raa + rbb # rho rho_m[1,:] = rab.real # mx rho_m[2,:] = rab.imag # my @@ -474,15 +423,15 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): rho_m[1,:] *= 2 rho_m[2,:] *= 2 else: - rba = np.einsum('pi,pi->p', bra_b.conj(), ket_a[:,nao:]) + rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) rho_m[1,:] += rba.real rho_m[2,:] -= rba.imag else: - raa = np.einsum('pi,pi->p', bra_a.conj(), ket_a[:,:nao]) - rba = np.einsum('pi,pi->p', bra_b.conj(), ket_a[:,nao:]) - rab = np.einsum('pi,pi->p', bra_a.conj(), ket_b[:,:nao]) - rbb = np.einsum('pi,pi->p', bra_b.conj(), ket_b[:,nao:]) - rho_m = np.empty((4, ngrids), dtype=np.complex128) + raa = cp.einsum('ip,ip->p', bra_a.conj(), ket_a[:nao]) + rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) + rab = cp.einsum('ip,ip->p', bra_a.conj(), ket_b[:nao]) + rbb = cp.einsum('ip,ip->p', bra_b.conj(), ket_b[nao:]) + rho_m = cp.empty((4, ngrids), dtype=cp.complex128) rho_m[0,:] = raa + rbb # rho rho_m[1,:] = rab + rba # mx rho_m[2,:] = (rba - rab) * 1j # my @@ -519,12 +468,12 @@ def eval_rho2(self, mol, ao, mo_coeff, mo_occ, non0tab=None, xctype='LDA', ''' if self.collinear[0] in ('n', 'm'): # TODO: - dm = np.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) hermi = 1 rho = self.eval_rho(mol, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) return rho - #if mo_coeff.dtype == np.double: + #if mo_coeff.dtype == cp.double: # nao = ao.shape[-1] # assert nao * 2 == mo_coeff.shape[0] # mo_aR = mo_coeff[:nao] @@ -532,7 +481,7 @@ def eval_rho2(self, mol, ao, mo_coeff, mo_occ, non0tab=None, xctype='LDA', # rho = numint.eval_rho2(mol, ao, mo_aR, mo_occ, non0tab, xctype, with_lapl, verbose) # rho += numint.eval_rho2(mol, ao, mo_bR, mo_occ, non0tab, xctype, with_lapl, verbose) #else: - # dm = np.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + # dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) # hermi = 1 # rho = self.eval_rho(mol, dm, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) #mx = my = mz = None @@ -559,8 +508,8 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, in self.block_loop(mol, grids, nao, ao_deriv, max_memory): rho.append(self.eval_rho2(mol, ao, mo_coeff, mo_occ, mask, xctype, with_lapl)) - rho = np.concatenate(rho,axis=-1) - assert rho.dtype == np.double + rho = cp.concatenate(rho,axis=-1) + assert rho.dtype == cp.double if self.collinear[0] == 'm': # mcol eval_xc = self.mcfun_eval_xc_adapter(xc_code) else: @@ -568,7 +517,7 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, vxc, fxc = eval_xc(xc_code, rho, deriv=2, xctype=xctype)[1:3] else: # rhoa and rhob must be real - dm = np.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) dm_a = dm[:nao,:nao].real.copy('C') dm_b = dm[nao:,nao:].real.copy('C') ni = self._to_numint1c() @@ -580,8 +529,8 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, # rhoa and rhob must be real rhoa.append(ni.eval_rho(mol, ao, dm_a, mask, xctype, hermi, with_lapl)) rhob.append(ni.eval_rho(mol, ao, dm_b, mask, xctype, hermi, with_lapl)) - rho = np.stack([np.concatenate(rhoa,axis=-1), np.concatenate(rhob,axis=-1)]) - assert rho.dtype == np.double + rho = cp.stack([cp.concatenate(rhoa,axis=-1), cp.concatenate(rhob,axis=-1)]) + assert rho.dtype == cp.double vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] return rho, vxc, fxc @@ -604,8 +553,8 @@ def cache_xc_kernel1(self, mol, grids, xc_code, dm, spin=0, max_memory=2000): for ao, mask, weight, coords \ in self.block_loop(mol, grids, nao, ao_deriv, max_memory): rho.append(self.eval_rho1(mol, ao, dm, mask, xctype, hermi, with_lapl)) - rho = np.concatenate(rho,axis=-1) - assert rho.dtype == np.double + rho = cp.concatenate(rho,axis=-1) + assert rho.dtype == cp.double if self.collinear[0] == 'm': # mcol eval_xc = self.mcfun_eval_xc_adapter(xc_code) else: @@ -626,8 +575,8 @@ def cache_xc_kernel1(self, mol, grids, xc_code, dm, spin=0, max_memory=2000): # rhoa and rhob must be real rhoa.append(ni.eval_rho(mol, ao, dm_a, mask, xctype, hermi, with_lapl)) rhob.append(ni.eval_rho(mol, ao, dm_b, mask, xctype, hermi, with_lapl)) - rho = np.stack([np.concatenate(rhoa,axis=-1), np.concatenate(rhob,axis=-1)]) - assert rho.dtype == np.double + rho = cp.stack([cp.concatenate(rhoa,axis=-1), cp.concatenate(rhob,axis=-1)]) + assert rho.dtype == cp.double vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] return rho, vxc, fxc @@ -658,7 +607,7 @@ def nr_vxc(self, mol, grids, xc_code, dms, spin=0, relativity=0, hermi=1, ni = self._to_numint1c() n, exc, v = ni.nr_uks(mol, grids, xc_code, dm1, relativity, hermi, max_memory, verbose) - vmat = np.zeros_like(dms) + vmat = cp.zeros_like(dms) vmat[...,:nao,:nao] = v[0] vmat[...,nao:,nao:] = v[1] return n.sum(), exc, vmat @@ -667,47 +616,12 @@ def nr_vxc(self, mol, grids, xc_code, dms, spin=0, relativity=0, hermi=1, @lib.with_doc(numint.nr_nlc_vxc.__doc__) def nr_nlc_vxc(self, mol, grids, xc_code, dm, spin=0, relativity=0, hermi=1, max_memory=2000, verbose=None): - assert dm.ndim == 2 - nao = dm.shape[-1] // 2 - # ground state density is always real - dm_a = dm[:nao,:nao].real - dm_b = dm[nao:,nao:].real - ni = self._to_numint1c() - n, exc, v = ni.nr_nlc_vxc(mol, grids, xc_code, dm_a+dm_b, relativity, - hermi, max_memory, verbose) - vmat = np.zeros_like(dm) - vmat[:nao,:nao] = v - vmat[nao:,nao:] = v - return n, exc, vmat + raise NotImplementedError('non-collinear nlc vxc') @lib.with_doc(numint.nr_rks_fxc.__doc__) def nr_fxc(self, mol, grids, xc_code, dm0, dms, spin=0, relativity=0, hermi=0, rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): - if self.collinear[0] not in ('c', 'm'): # col or mcol - raise NotImplementedError('non-collinear fxc') - - if self.collinear[0] == 'm': # mcol - fxcmat = self._gks_mcol_fxc(mol, grids, xc_code, dm0, dms, - relativity, hermi, rho0, vxc, fxc, - max_memory, verbose) - else: - dms = np.asarray(dms) - nao = dms.shape[-1] // 2 - if dm0 is not None: - dm0a = dm0[:nao,:nao].real.copy('C') - dm0b = dm0[nao:,nao:].real.copy('C') - dm0 = (dm0a, dm0b) - # dms_a and dms_b may be complex if they are TDDFT amplitudes - dms_a = dms[...,:nao,:nao].copy() - dms_b = dms[...,nao:,nao:].copy() - dm1 = (dms_a, dms_b) - ni = self._to_numint1c() - vmat = ni.nr_uks_fxc(mol, grids, xc_code, dm0, dm1, relativity, - hermi, rho0, vxc, fxc, max_memory, verbose) - fxcmat = np.zeros_like(dms) - fxcmat[...,:nao,:nao] = vmat[0] - fxcmat[...,nao:,nao:] = vmat[1] - return fxcmat + raise NotImplementedError('non-collinear fxc') get_fxc = nr_gks_fxc = nr_fxc eval_xc_eff = _eval_xc_eff diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py new file mode 100644 index 000000000..6f5cd40a2 --- /dev/null +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -0,0 +1,140 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import pyscf +import cupy +from pyscf import lib, scf +from pyscf.dft.numint import NumInt as pyscf_numint +from gpu4pyscf.dft import Grids +from gpu4pyscf.dft import numint +from gpu4pyscf.dft.numint import NumInt +from gpu4pyscf import dft +from gpu4pyscf.dft import gen_grid + +def setUpModule(): + global mol, grids_cpu, grids_gpu, dm, dm0, dm1, mo_occ, mo_coeff + mol = pyscf.M( + atom = ''' +O 0.000000 0.000000 0.117790 +H 0.000000 0.755453 -0.471161 +H 0.000000 -0.755453 -0.471161''', + basis = 'ccpvdz', + charge = 1, + spin = 1, # = 2S = spin_up - spin_down + output = '/dev/null') + + np.random.seed(2) + mf = scf.UHF(mol) + mf.kernel() + dm1 = mf.make_rdm1().copy() + dm = dm1 + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + dm0 = (mo_coeff[0]*mo_occ[0]).dot(mo_coeff[0].T) + + grids_gpu = Grids(mol) + grids_gpu.level = 1 + grids_gpu.build() + + grids_cpu = grids_gpu.to_cpu() + grids_cpu.weights = cupy.asnumpy(grids_gpu.weights) + grids_cpu.coords = cupy.asnumpy(grids_gpu.coords) + +def tearDownModule(): + global mol, grids_cpu, grids_gpu + mol.stdout.close() + del mol, grids_cpu, grids_gpu + +LDA = 'LDA_C_VWN' +GGA_PBE = 'GGA_C_PBE' +MGGA_M06 = 'MGGA_C_M06' + +class KnownValues(unittest.TestCase): + + def test_eval_rho(self): + np.random.seed(1) + dm = np.random.random(dm0.shape) + ni_gpu = NumInt() + ni_cpu = pyscf_numint() + for xctype in ('LDA', 'GGA', 'MGGA'): + deriv = 1 + if xctype == 'LDA': + deriv = 0 + ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + + rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) + ref = ni_cpu.eval_rho(mol, ao_cpu, dm, xctype=xctype, hermi=0, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + rho = ni_gpu.eval_rho(mol, ao_gpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + def test_sparse_index(self): + mol = pyscf.M(atom=''' +O 0. 0. 0. +H 5.5 0.3 2.8 +H 1.7 -2.0 0.4''', + basis='def2-tzvpp') + with lib.temporary_env(numint, MIN_BLK_SIZE=128**2): + grids = gen_grid.Grids(mol).set(atom_grid=(200, 1454)).build() + ni = NumInt() + ni.build(mol, grids.coords) + opt = ni.gdftopt + opt.l_ctr_offsets + ao_loc = opt._sorted_mol.ao_loc + ngrids = grids.size + dat = grids.get_non0ao_idx(opt) + assert lib.fp(cupy.hstack([x[1] for x in dat]).get()) == 103.60117204957997 + assert lib.fp(cupy.hstack([x[2] for x in dat]).get()) == 5.616197331343498 + assert lib.fp(cupy.hstack([x[3] for x in dat]).get()) == -22.394314323727 + assert lib.fp(cupy.hstack([x[4] for x in dat]).get()) == 351.2385939586691 + assert [i.size for x in dat for i in x[1:]] == [ + 46, 18, 9, 19, 50, 20, 9, 21, 28, 12, 9, 13, 49, 19, 9, 20, 45, 17, + 9, 18, 45, 17, 9, 18, 45, 17, 9, 18, 45, 17, 9, 18, 50, 20, 9, 21, + 55, 21, 9, 22, 53, 19, 9, 20, 48, 18, 9, 19, 53, 19, 9, 20, 48, 18, + 9, 19, 43, 17, 9, 18, 40, 16, 9, 17, 48, 18, 9, 19, 48, 18, 9, 19, + 48, 18, 9, 19, 57, 21, 9, 22, 23, 11, 9, 12, 28, 12, 9, 13, 37, 15, + 9, 16, 33, 13, 9, 14, 32, 14, 9, 15, 20, 10, 9, 11, 23, 11, 9, 12, + 23, 11, 9, 12, 23, 11, 9, 12, 37, 15, 9, 16] + assert all(x.dtype == np.int32 for x in dat[0][1:]) + + if hasattr(numint, '_sparse_index'): + for i, i0 in enumerate(range(0, ngrids, numint.MIN_BLK_SIZE)): + i1 = min(i0+numint.MIN_BLK_SIZE, ngrids) + ref = numint._sparse_index( + opt._sorted_mol, grids.coords[i0:i1], opt.l_ctr_offsets, ao_loc, opt) + assert all(np.array_equal(r, x) for r, x in zip(ref[1:], dat[i][1:])) + + def test_scale_ao(self): + ao = cupy.random.rand(1, 3, 256) + wv = cupy.random.rand(1, 256) + out = cupy.ones(6 * 256) + ref = cupy.einsum('nip,np->ip', ao, wv) + assert abs(ref - numint._scale_ao(ao, wv)).max() < 1e-12 + assert abs(ref - numint._scale_ao(ao, wv, out=out)).max() < 1e-12 + assert abs(ref - numint._scale_ao(ao+0j, wv, out=out)).max() < 1e-12 + assert abs(ref - numint._scale_ao(ao[0]+0j, wv[0], out=out)).max() < 1e-12 + + ao = ao.transpose(1, 0, 2).copy(order='C').transpose(1, 0, 2) + assert abs(ref - numint._scale_ao(ao, wv)).max() < 1e-12 + + assert abs(ref - numint._scale_ao(ao[0], wv[0])).max() < 1e-12 + +if __name__ == "__main__": + print("Full Tests for dft numint") + unittest.main() From 1a157709b48bbf651455e2f6a86585d49293ec87 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 14:52:19 +0800 Subject: [PATCH 04/21] finish debug eval_rho --- gpu4pyscf/dft/numint2c.py | 8 ++-- gpu4pyscf/dft/tests/test_numint2c.py | 69 ++++------------------------ 2 files changed, 14 insertions(+), 63 deletions(-) diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index b72c56eca..2511cbfb8 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -19,9 +19,9 @@ import functools import numpy as np import cupy as cp -from gpu4pyscf import lib +from pyscf import lib from gpu4pyscf.dft import numint -from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot, BLKSIZE +from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot from gpu4pyscf.dft import xc_deriv from pyscf import __config__ @@ -30,6 +30,8 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, with_lapl=True, verbose=None): nao = ao.shape[-2] assert dm.ndim == 2 and nao * 2 == dm.shape[0] + if not isinstance(dm, cp.ndarray): + dm = cp.asarray(dm) nao, ngrids = ao.shape[-2:] xctype = xctype.upper() @@ -451,7 +453,6 @@ class NumInt2C(lib.StreamObject, numint.LibXCMixin): collinear_thrd = getattr(__config__, 'dft_numint_RnumInt_collinear_thrd', 0.99) collinear_samples = getattr(__config__, 'dft_numint_RnumInt_collinear_samples', 200) - make_mask = staticmethod(numint.make_mask) eval_ao = staticmethod(numint.eval_ao) eval_rho = staticmethod(eval_rho) @@ -628,7 +629,6 @@ def nr_fxc(self, mol, grids, xc_code, dm0, dms, spin=0, relativity=0, hermi=0, mcfun_eval_xc_adapter = mcfun_eval_xc_adapter block_loop = numint.NumInt.block_loop - _gen_rho_evaluator = numint.NumInt._gen_rho_evaluator def _to_numint1c(self): '''Converts to the associated class to handle collinear systems''' diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 6f5cd40a2..a8380829e 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -17,10 +17,10 @@ import pyscf import cupy from pyscf import lib, scf -from pyscf.dft.numint import NumInt as pyscf_numint +from pyscf.dft.numint2c import NumInt2C as pyscf_numint2c from gpu4pyscf.dft import Grids -from gpu4pyscf.dft import numint -from gpu4pyscf.dft.numint import NumInt +from gpu4pyscf.dft import numint2c +from gpu4pyscf.dft.numint2c import NumInt2C from gpu4pyscf import dft from gpu4pyscf.dft import gen_grid @@ -34,16 +34,17 @@ def setUpModule(): basis = 'ccpvdz', charge = 1, spin = 1, # = 2S = spin_up - spin_down - output = '/dev/null') + output = '/dev/null' + ) np.random.seed(2) - mf = scf.UHF(mol) + mf = scf.GHF(mol) mf.kernel() dm1 = mf.make_rdm1().copy() dm = dm1 mo_coeff = mf.mo_coeff mo_occ = mf.mo_occ - dm0 = (mo_coeff[0]*mo_occ[0]).dot(mo_coeff[0].T) + dm0 = (mo_coeff*mo_occ).dot(mo_coeff.T) grids_gpu = Grids(mol) grids_gpu.level = 1 @@ -67,8 +68,8 @@ class KnownValues(unittest.TestCase): def test_eval_rho(self): np.random.seed(1) dm = np.random.random(dm0.shape) - ni_gpu = NumInt() - ni_cpu = pyscf_numint() + ni_gpu = NumInt2C() + ni_cpu = pyscf_numint2c() for xctype in ('LDA', 'GGA', 'MGGA'): deriv = 1 if xctype == 'LDA': @@ -84,57 +85,7 @@ def test_eval_rho(self): ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - def test_sparse_index(self): - mol = pyscf.M(atom=''' -O 0. 0. 0. -H 5.5 0.3 2.8 -H 1.7 -2.0 0.4''', - basis='def2-tzvpp') - with lib.temporary_env(numint, MIN_BLK_SIZE=128**2): - grids = gen_grid.Grids(mol).set(atom_grid=(200, 1454)).build() - ni = NumInt() - ni.build(mol, grids.coords) - opt = ni.gdftopt - opt.l_ctr_offsets - ao_loc = opt._sorted_mol.ao_loc - ngrids = grids.size - dat = grids.get_non0ao_idx(opt) - assert lib.fp(cupy.hstack([x[1] for x in dat]).get()) == 103.60117204957997 - assert lib.fp(cupy.hstack([x[2] for x in dat]).get()) == 5.616197331343498 - assert lib.fp(cupy.hstack([x[3] for x in dat]).get()) == -22.394314323727 - assert lib.fp(cupy.hstack([x[4] for x in dat]).get()) == 351.2385939586691 - assert [i.size for x in dat for i in x[1:]] == [ - 46, 18, 9, 19, 50, 20, 9, 21, 28, 12, 9, 13, 49, 19, 9, 20, 45, 17, - 9, 18, 45, 17, 9, 18, 45, 17, 9, 18, 45, 17, 9, 18, 50, 20, 9, 21, - 55, 21, 9, 22, 53, 19, 9, 20, 48, 18, 9, 19, 53, 19, 9, 20, 48, 18, - 9, 19, 43, 17, 9, 18, 40, 16, 9, 17, 48, 18, 9, 19, 48, 18, 9, 19, - 48, 18, 9, 19, 57, 21, 9, 22, 23, 11, 9, 12, 28, 12, 9, 13, 37, 15, - 9, 16, 33, 13, 9, 14, 32, 14, 9, 15, 20, 10, 9, 11, 23, 11, 9, 12, - 23, 11, 9, 12, 23, 11, 9, 12, 37, 15, 9, 16] - assert all(x.dtype == np.int32 for x in dat[0][1:]) - - if hasattr(numint, '_sparse_index'): - for i, i0 in enumerate(range(0, ngrids, numint.MIN_BLK_SIZE)): - i1 = min(i0+numint.MIN_BLK_SIZE, ngrids) - ref = numint._sparse_index( - opt._sorted_mol, grids.coords[i0:i1], opt.l_ctr_offsets, ao_loc, opt) - assert all(np.array_equal(r, x) for r, x in zip(ref[1:], dat[i][1:])) - - def test_scale_ao(self): - ao = cupy.random.rand(1, 3, 256) - wv = cupy.random.rand(1, 256) - out = cupy.ones(6 * 256) - ref = cupy.einsum('nip,np->ip', ao, wv) - assert abs(ref - numint._scale_ao(ao, wv)).max() < 1e-12 - assert abs(ref - numint._scale_ao(ao, wv, out=out)).max() < 1e-12 - assert abs(ref - numint._scale_ao(ao+0j, wv, out=out)).max() < 1e-12 - assert abs(ref - numint._scale_ao(ao[0]+0j, wv[0], out=out)).max() < 1e-12 - - ao = ao.transpose(1, 0, 2).copy(order='C').transpose(1, 0, 2) - assert abs(ref - numint._scale_ao(ao, wv)).max() < 1e-12 - - assert abs(ref - numint._scale_ao(ao[0], wv[0])).max() < 1e-12 if __name__ == "__main__": - print("Full Tests for dft numint") + print("Full Tests for dft numint2c") unittest.main() From 0b2a3ebadf697e80acd56d4f0aa632a73d6f82b5 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 31 Oct 2025 16:11:02 +0800 Subject: [PATCH 05/21] test eval_rho2 --- gpu4pyscf/dft/numint2c.py | 4 ++++ gpu4pyscf/dft/tests/test_numint2c.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index 2511cbfb8..f90a03066 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -467,6 +467,10 @@ def eval_rho2(self, mol, ao, mo_coeff, mo_occ, non0tab=None, xctype='LDA', '''Calculate the electron density for LDA functional and the density derivatives for GGA functional in the framework of 2-component basis. ''' + if not isinstance(mo_occ, cp.ndarray): + mo_occ = cp.asarray(mo_occ) + if not isinstance(mo_coeff, cp.ndarray): + mo_coeff = cp.asarray(mo_coeff) if self.collinear[0] in ('n', 'm'): # TODO: dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index a8380829e..13801a86c 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -85,6 +85,26 @@ def test_eval_rho(self): ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + def test_eval_rho2(self): + np.random.seed(1) + mo_coeff_test = np.random.random(mo_coeff.shape) + ni_gpu = NumInt2C() + ni_cpu = pyscf_numint2c() + for xctype in ('LDA', 'GGA', 'MGGA'): + deriv = 1 + if xctype == 'LDA': + deriv = 0 + ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + + rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=0, with_lapl=False) + ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=0, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=1, with_lapl=False) + ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=1, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + if __name__ == "__main__": print("Full Tests for dft numint2c") From a2c46a89af1b4b56a4dc66872e3f51f98ff0937a Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 3 Nov 2025 09:32:03 +0800 Subject: [PATCH 06/21] finish debug the eval_rho2 --- gpu4pyscf/dft/numint2c.py | 2 +- gpu4pyscf/dft/tests/test_numint2c.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index f90a03066..1fe420882 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -446,7 +446,7 @@ class NumInt2C(lib.StreamObject, numint.LibXCMixin): # collinear schemes: # 'col' (collinear, by default) - # 'ncol' (non-collinear) + # 'ncol' (non-collinear, also known as locally collinear) # 'mcol' (multi-collinear) collinear = getattr(__config__, 'dft_numint_RnumInt_collinear', 'col') spin_samples = getattr(__config__, 'dft_numint_RnumInt_spin_samples', 770) diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 13801a86c..374e10773 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -89,7 +89,9 @@ def test_eval_rho2(self): np.random.seed(1) mo_coeff_test = np.random.random(mo_coeff.shape) ni_gpu = NumInt2C() + ni_gpu.collinear='m' ni_cpu = pyscf_numint2c() + ni_cpu.collinear='m' for xctype in ('LDA', 'GGA', 'MGGA'): deriv = 1 if xctype == 'LDA': @@ -97,12 +99,8 @@ def test_eval_rho2(self): ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) - rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=0, with_lapl=False) - ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=0, with_lapl=False) - self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - - rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=1, with_lapl=False) - ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, hermi=1, with_lapl=False) + rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) From 3bb4200dd06cb7d6569d71a0a73ef5c4c0e6f06f Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Mon, 3 Nov 2025 09:53:04 +0800 Subject: [PATCH 07/21] add gpu-version mcfun --- gpu4pyscf/dft/mcfun_gpu.py | 458 +++++++++++++++++++++++++++++++++++++ 1 file changed, 458 insertions(+) create mode 100644 gpu4pyscf/dft/mcfun_gpu.py diff --git a/gpu4pyscf/dft/mcfun_gpu.py b/gpu4pyscf/dft/mcfun_gpu.py new file mode 100644 index 000000000..59bd02e13 --- /dev/null +++ b/gpu4pyscf/dft/mcfun_gpu.py @@ -0,0 +1,458 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import warnings +from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor +import numpy as np +from .LebedevGrid import MakeAngularGrid + +MAX_GRIDS_PER_TASK = 200 + +def eval_xc_eff(func, rho_tm, deriv=1, spin_samples=770, + collinear_threshold=None, collinear_samples=200, workers=1): + '''Multi-collinear effective potential and effective kernel. + + Parameters + ---------- + func : Function to evaluate collinear functionals. + The function signature is + exc, vxc, fxc, ... = func((rho, s), deriv) + The input rho and s have shape + * (1, Ngrids) for LDA functionals + * (4, Ngrids) for GGA functionals where the four variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * (5, Ngrids) for meta-GGA functionals where the five variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + The returns of func should have the shape + * (2, Nvar, Ngrids) for vxc + * (2, Nvar, 2, Nvar, Ngrids) for fxc + * (2, Nvar, 2, Nvar, 2, Nvar, Ngrids) for kxc + The dimension Nvar can be + * 1 for LDA + * 4 for GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + Note: for GGA functionals the required returns have different conventions + to the returns of libxc or xcfun in functional derivatives. Here func + needs to return the derivatives to rho, nabla_x(rho), nabla_y(rho), + nabla_z(rho) while libxc (or xcfun) returns derivatives to rho, sigma_uu, + sigma_ud, sigma_dd (sigma_ud = nabla(rho_u) dot nabla(rho_d)). Please + see example 04-xc_wrapper.py for the transformation between the two + conventions. + rho_tm : np array with shape (4, Nvar, Ngrids) + rho_tm[0] is density. rho_tm[1:4] is the magnetization spin vector. Nvar can be + * 1 for LDA functionals + * 4 for GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + deriv : int, optional + Functional derivatives. The current version (the kernel) supports maximum value 2. + spin_samples : int, optional + Number of grid points on spherical surface + workers : int, optional + Parallel execution if workers > 1 + collinear_threshold : float, optional + if specified, filters the points of strongly polarized spins and calls the + eval_xc_collinear_spin for these points. Recommended value 0.99. + collinear_samples : int, optional + Number of samples for eval_xc_collinear_spin. + + Returns + ------- + XC functional and derivatives for each grid. + * [exc] if deriv = 0 + * [exc, vxc] if deriv = 1 + * [exc, vxc, fxc] if deriv = 2 + ''' + + assert deriv < 5 + rho_tm = np.asarray(rho_tm) + if rho_tm.dtype != np.double: + raise RuntimeError('rho and m must be real') + ngrids = rho_tm.shape[-1] + grids_per_task = min(ngrids//(workers*3)+1, MAX_GRIDS_PER_TASK) + if workers == 1: + results = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + r = _eval_xc_lebedev(func, rho_tm[...,p0:p1], deriv, spin_samples, + collinear_threshold, collinear_samples) + results.append(r) + else: + executor = ThreadPoolExecutor + + with executor(max_workers=workers) as ex: + futures = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + f = ex.submit(_eval_xc_lebedev, func, rho_tm[...,p0:p1], deriv, + spin_samples, collinear_threshold, collinear_samples) + futures.append(f) + results = [f.result() for f in futures] + + return [None if x[0] is None else np.concatenate(x, axis=-1) for x in zip(*results)] + + +def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200, workers=1): + assert deriv < 5 + if rho_tmz.dtype != np.double: + raise RuntimeError('rho and mz must be real') + ngrids = rho_tmz.shape[-1] + grids_per_task = min(ngrids//(workers*3)+1, MAX_GRIDS_PER_TASK) + + if workers == 1: + results = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) + results.append(r) + else: + print(collinear_samples) + executor = ThreadPoolExecutor + + with executor(max_workers=workers) as ex: + futures = [] + for p0, p1 in _prange(0, ngrids, grids_per_task): + f = ex.submit(_eval_xc_sf, func, rho_tmz[...,p0:p1], deriv, collinear_samples) + futures.append(f) + results = [f.result() for f in futures] + + return [None if x[0] is None else np.concatenate(x, axis=-1) for x in zip(*results)] + +def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): + ngrids = rho_tmz.shape[-1] + # samples on z=cos(theta) and their weights between [0, 1] + sgridz, weights = _make_paxis_samples(collinear_samples) + blksize = int(np.ceil(1e5 / ngrids)) * 8 + + if rho_tmz.ndim == 2: + nvar = 1 + else: + nvar = rho_tmz.shape[1] + fxc_sf = np.zeros((nvar,nvar,ngrids)) + kxc_sf = np.zeros((nvar,nvar,2,nvar,ngrids)) + for p0, p1 in _prange(0, weights.size, blksize): + rho = _project_spin_paxis2(rho_tmz, sgridz[p0:p1]) + xc_orig = func(rho, deriv) + if deriv > 1: + fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids, p1-p0) + fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) + + if deriv > 2: + kxc = xc_orig[3].reshape(2, nvar, 2, nvar, 2, nvar, ngrids, p1-p0) + kxc_sf[:,:,0] += kxc[1,:,1,:,0].dot(weights[p0:p1]) + kxc_sf[:,:,1] += kxc[1,:,1,:,1].dot(weights[p0:p1]*sgridz[p0:p1]) + return None,None,fxc_sf,kxc_sf + +def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): + '''Multi-collinear functional derivatives for collinear spins + + Parameters + ---------- + func : Function to evaluate collinear functionals. + The function signature is + exc, vxc, fxc, ... = func((rho, s), deriv) + The input rho and s have shape + * (1, Ngrids) for LDA functionals + * (4, Ngrids) for GGA functionals where the four variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * (5, Ngrids) for meta-GGA functionals where the five variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + The returns of func should have the shape + * (2, Nvar, Ngrids) for vxc + * (2, Nvar, 2, Nvar, Ngrids) for fxc + * (2, Nvar, 2, Nvar, 2, Nvar, Ngrids) for kxc + The dimension Nvar can be + * 1 for LDA + * 4 for GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + Note: for GGA functionals the required returns have different conventions + to the returns of libxc or xcfun in functional derivatives. Here func + needs to return the derivatives to rho, nabla_x(rho), nabla_y(rho), + nabla_z(rho) while libxc (or xcfun) returns derivatives to rho, sigma_uu, + sigma_ud, sigma_dd (sigma_ud = nabla(rho_u) dot nabla(rho_d)). + rho_tm : np array with shape (4, Nvar, Ngrids) + rho_tm[0] is density. rho_tm[1:4] is the magnetization spin vector. Nvar can be + * 1 for LDA functionals + * 4 for GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + deriv : int, optional + Functional derivatives. The current version (the kernel) supports maximum value 2. + spin_samples : int, optional + Number of grid points on principal axis + + Returns + ------- + XC functional and derivatives for each grid. + * [exc] if deriv = 0 + * [exc, vxc] if deriv = 1 + * [exc, vxc, fxc] if deriv = 2 + ''' + ngrids = rho_tm.shape[-1] + # samples on z=cos(theta) and their weights between [0, 1] + sgridz, weights = _make_paxis_samples(spin_samples) + blksize = int(np.ceil(1e5 / ngrids)) * 8 + + if rho_tm.ndim == 2: + nvar = 1 + else: + nvar = rho_tm.shape[1] + + rho_ts = _project_spin_paxis(rho_tm) + + # TODO: filter s, nabla(s) ~= 0 + m = rho_tm[1:].reshape(3, nvar, ngrids) + s = rho_ts[1].reshape(nvar, ngrids)[0] + with np.errstate(divide='ignore', invalid='ignore'): + omega = m[:,0] / s + omega[:,s==0] = 0 + + xc_orig = func(rho_ts, deriv) + exc_eff = xc_orig[0] + + omega = omega.reshape(3, ngrids) + if deriv > 0: + vxc = xc_orig[1].reshape(2, nvar, ngrids) + vxc_eff = np.vstack((vxc[:1], np.einsum('xg,rg->rxg', vxc[1], omega))) + + if deriv > 1: + # spin-conserve part + fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids) + fxc_eff = np.empty((4, nvar, 4, nvar, ngrids)) + fxc_eff[0,:,0] = fxc[0,:,0] + fz1 = np.einsum('xyg,rg->rxyg', fxc[1,:,0], omega) + fxc_eff[1:,:,0 ] = fz1 + fxc_eff[0 ,:,1:] = fz1.transpose(1,0,2,3) + tmp = np.einsum('xyg,rg->rxyg', fxc[1,:,1,:], omega) + fxc_eff[1:,:,1:] = np.einsum('rxyg,sg->rxsyg', tmp, omega) + + # spin-flip part + fxc_sf = 0 + for p0, p1 in _prange(0, weights.size, blksize): + rho = _project_spin_paxis(rho_tm, sgridz[p0:p1]) + fxc = func(rho, deriv)[2] + fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, p1 - p0) + # only considers the xx+yy part + fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) + + for i in range(1, 4): + fxc_eff[i,:,i] += fxc_sf + tmp = np.einsum('xyg,rg->rxyg', fxc_sf, omega) + fxc_eff[1:,:,1:] -= np.einsum('rxyg,sg->rxsyg', tmp, omega) + + ret = [exc_eff] + if deriv > 0: + ret.append(vxc_eff) + if deriv > 1: + ret.append(fxc_eff) + if deriv > 2: + raise NotImplementedError + return ret + +def _eval_xc_lebedev(func, rho_tm, deriv, spin_samples, + collinear_threshold=None, collinear_samples=200): + '''Multi-collinear effective potential and effective kernel with projection + samples on spherical surface (the Lebedev grid samples) + ''' + ngrids = rho_tm.shape[-1] + sgrids, weights = _make_sph_samples(spin_samples) + blksize = int(np.ceil(1e4 / ngrids)) * 8 + # import pdb + # pdb.set_trace() + if rho_tm.ndim == 2: + nvar = 1 + else: + nvar = rho_tm.shape[1] + exc_eff = vxc_eff = fxc_eff = kxc_eff = 0 + for p0, p1 in _prange(0, weights.size, blksize): + nsg = p1 - p0 + p_sgrids = sgrids[p0:p1] + p_weights = weights[p0:p1] + rho = _project_spin_sph(rho_tm, p_sgrids) + + xc_orig = func(rho, deriv+1) + + exc = xc_orig[0].reshape(ngrids, nsg) + vxc = xc_orig[1].reshape(2, nvar, ngrids, nsg) + + rho = rho.reshape(2, nvar, ngrids, nsg) + s = rho[1] + rho_pure = rho[0,0] + exc_rho = exc * rho_pure + np.einsum('xgo,xgo->go', vxc[1], s) + exc_eff += np.einsum('go,o->g', exc_rho, p_weights) + + if deriv > 0: + fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids, nsg) + # vs * 2 + s*f_s_st + vxc[1] *= 2 + vxc += np.einsum('xbygo,xgo->bygo', fxc[1], s) + c_tm = _ts2tm_transformation(p_sgrids) + cw_tm = c_tm * p_weights + vxc_eff += np.einsum('rao,axgo->rxg', cw_tm, vxc) + + if deriv > 1: + kxc = xc_orig[3].reshape(2, nvar, 2, nvar, 2, nvar, ngrids, nsg) + fxc[1,:,1] *= 3 + fxc[0,:,1] *= 2 + fxc[1,:,0] *= 2 + fxc += np.einsum('xbyczgo,xgo->byczgo', kxc[1], s) + fxc = np.einsum('rao,axbygo->rxbygo', c_tm, fxc) + fxc_eff += np.einsum('sbo,rxbygo->rxsyg', cw_tm, fxc) + + if deriv > 2: + lxc = xc_orig[4].reshape(2, nvar, 2, nvar, 2, nvar, 2, nvar, ngrids, nsg) + kxc[1,:,1,:,1] *= 4 + kxc[1,:,1,:,0] *= 3 + kxc[1,:,0,:,1] *= 3 + kxc[0,:,1,:,1] *= 3 + kxc[1,:,0,:,0] *= 2 + kxc[0,:,1,:,0] *= 2 + kxc[0,:,0,:,1] *= 2 + + kxc += np.einsum('wbxcydzgo,wgo->bxcydzgo', lxc[1], s) + kxc = np.einsum('rao,axbyczgo->rxbyczgo', c_tm, kxc) + kxc = np.einsum('sbo,rxbyczgo->rxsyczgo', c_tm, kxc) + # kxc = np.einsum('rao,sbo,axbyczgo->rxsyczgo', c_tm, c_tm,kxc) + kxc_eff += np.einsum('tco,rxsyczgo->rxsytzg', cw_tm, kxc) + + # exc in libxc is defined as Exc per particle. exc_eff calculated above is exc*rho. + # Divide exc_eff by rho so as to follow the convention of libxc + if rho_tm.ndim == 2: + rho_pure = rho_tm[0] + else: + rho_pure = rho_tm[0,0] + + exc_eff[rho_pure == 0] = 0 + exc_eff[rho_pure != 0] /= rho_pure[rho_pure != 0] + + # Strongly spin-polarized points (rho ~= |m|) can be considered as collinear spins + if collinear_threshold is not None: + rho, s = _project_spin_paxis(rho_tm.reshape(4, nvar, ngrids)[:,0]) + cs_idx = np.where(s >= rho * collinear_threshold)[0] + if cs_idx.size > 0: + xc_cs = eval_xc_collinear_spin(func, rho_tm[...,cs_idx], deriv, + collinear_samples) + exc_eff[...,cs_idx] = xc_cs[0] + if deriv > 0: + vxc_eff[...,cs_idx] = xc_cs[1] + if deriv > 1: + fxc_eff[...,cs_idx] = xc_cs[2] + if deriv > 2: + kxc_eff[...,cs_idx] = xc_cs[3] + + ret = [exc_eff] + if deriv > 0: + ret.append(vxc_eff) + if deriv > 1: + ret.append(fxc_eff) + if deriv > 2: + ret.append(kxc_eff) + if deriv > 3: + raise NotImplementedError + return ret + +def _make_sph_samples(spin_samples): + '''Integration samples on spherical surface''' + ang_grids = MakeAngularGrid(spin_samples) + directions = ang_grids[:,:3].copy(order='F') + weights = ang_grids[:,3].copy() + return directions, weights + +def _prange(start, end, step): + '''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...''' + if start < end: + for i in range(start, end, step): + yield i, min(i+step, end) + +def _project_spin_sph(rho_tm, sgrids): + '''Projects spin onto spherical surface''' + rho = rho_tm[0] + m = rho_tm[1:] + nsg = sgrids.shape[0] + ngrids = rho.shape[-1] + if rho_tm.ndim == 2: + rho_ts = np.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:, np.newaxis] + rho_ts[1] = np.einsum('mg,om->go', m, sgrids) + rho_ts = rho_ts.reshape(2, ngrids*nsg) + else: + nvar = rho_tm.shape[1] + rho_ts = np.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:, :, np.newaxis] + rho_ts[1] = np.einsum('mxg,om->xgo', m, sgrids) + rho_ts = rho_ts.reshape(2, nvar, ngrids*nsg) + return rho_ts + +def _ts2tm_transformation(sgrids): + ''' + Transformation that projects v_ts(rho,s) to rho/m representation (rho,mx,my,mz) + ''' + nsg = sgrids.shape[0] + c_tm = np.zeros((4, 2, nsg)) + c_tm[0,0] = 1 + c_tm[1:,1] = sgrids.T + return c_tm + +def _make_paxis_samples(spin_samples): + '''Samples on principal axis between [0, 1]''' + rt, wt = np.polynomial.legendre.leggauss(spin_samples) + rt = rt * .5 + .5 + wt *= .5 # normalized to 1 + return rt, wt + +def _project_spin_paxis(rho_tm, sgridz=None): + '''Projects spins onto the principal axis''' + rho = rho_tm[0] + m = rho_tm[1:] + + s = np.linalg.norm(m, axis=0) + if sgridz is None: + rho_ts = np.stack([rho, s]) + else: + ngrids = rho.shape[-1] + nsg = sgridz.shape[0] + if rho_tm.ndim == 2: + rho_ts = np.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:,np.newaxis] + rho_ts[1] = s[:,np.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, ngrids * nsg) + else: + print('222') + nvar = rho_tm.shape[1] + rho_ts = np.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:,:,np.newaxis] + rho_ts[1] = s[:,:,np.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) + return rho_ts + +def _project_spin_paxis2(rho_tm, sgridz=None): + # ToDo: be written into the function _project_spin_paxis(). + # Because use mz rather than |mz| here + '''Projects spins onto the principal axis''' + rho = rho_tm[0] + mz = rho_tm[1] + + if sgridz is None: + rho_ts = np.stack([rho, mz]) + else: + ngrids = rho.shape[-1] + nsg = sgridz.shape[0] + if rho_tm.ndim == 2: + rho_ts = np.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:,np.newaxis] + rho_ts[1] = mz[:,np.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, ngrids * nsg) + else: + nvar = rho_tm.shape[1] + rho_ts = np.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:,:,np.newaxis] + rho_ts[1] = mz[:,:,np.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) + return rho_ts From a427fbb10425327f32a2e023b6428d77e3689db8 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 4 Nov 2025 08:42:05 +0800 Subject: [PATCH 08/21] debug meta-GGA --- gpu4pyscf/dft/mcfun_gpu.py | 194 ++++++---------- gpu4pyscf/dft/numint.py | 68 ++++++ gpu4pyscf/dft/numint2c.py | 333 ++++++++++++--------------- gpu4pyscf/dft/tests/test_numint2c.py | 91 +++++--- 4 files changed, 347 insertions(+), 339 deletions(-) diff --git a/gpu4pyscf/dft/mcfun_gpu.py b/gpu4pyscf/dft/mcfun_gpu.py index 59bd02e13..c243fd1c1 100644 --- a/gpu4pyscf/dft/mcfun_gpu.py +++ b/gpu4pyscf/dft/mcfun_gpu.py @@ -14,14 +14,14 @@ import warnings -from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor +import cupy as cp import numpy as np -from .LebedevGrid import MakeAngularGrid +from pyscf.dft.LebedevGrid import MakeAngularGrid -MAX_GRIDS_PER_TASK = 200 +MAX_GRIDS_PER_TASK = 4096 def eval_xc_eff(func, rho_tm, deriv=1, spin_samples=770, - collinear_threshold=None, collinear_samples=200, workers=1): + collinear_threshold=None, collinear_samples=200): '''Multi-collinear effective potential and effective kernel. Parameters @@ -50,7 +50,7 @@ def eval_xc_eff(func, rho_tm, deriv=1, spin_samples=770, sigma_ud, sigma_dd (sigma_ud = nabla(rho_u) dot nabla(rho_d)). Please see example 04-xc_wrapper.py for the transformation between the two conventions. - rho_tm : np array with shape (4, Nvar, Ngrids) + rho_tm : cp array with shape (4, Nvar, Ngrids) rho_tm[0] is density. rho_tm[1:4] is the magnetization spin vector. Nvar can be * 1 for LDA functionals * 4 for GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) @@ -59,8 +59,6 @@ def eval_xc_eff(func, rho_tm, deriv=1, spin_samples=770, Functional derivatives. The current version (the kernel) supports maximum value 2. spin_samples : int, optional Number of grid points on spherical surface - workers : int, optional - Parallel execution if workers > 1 collinear_threshold : float, optional if specified, filters the points of strongly polarized spins and calls the eval_xc_collinear_spin for these points. Recommended value 0.99. @@ -76,80 +74,19 @@ def eval_xc_eff(func, rho_tm, deriv=1, spin_samples=770, ''' assert deriv < 5 - rho_tm = np.asarray(rho_tm) - if rho_tm.dtype != np.double: + rho_tm = cp.asarray(rho_tm) + if rho_tm.dtype != cp.double: raise RuntimeError('rho and m must be real') ngrids = rho_tm.shape[-1] - grids_per_task = min(ngrids//(workers*3)+1, MAX_GRIDS_PER_TASK) - if workers == 1: - results = [] - for p0, p1 in _prange(0, ngrids, grids_per_task): - r = _eval_xc_lebedev(func, rho_tm[...,p0:p1], deriv, spin_samples, - collinear_threshold, collinear_samples) - results.append(r) - else: - executor = ThreadPoolExecutor - - with executor(max_workers=workers) as ex: - futures = [] - for p0, p1 in _prange(0, ngrids, grids_per_task): - f = ex.submit(_eval_xc_lebedev, func, rho_tm[...,p0:p1], deriv, - spin_samples, collinear_threshold, collinear_samples) - futures.append(f) - results = [f.result() for f in futures] - - return [None if x[0] is None else np.concatenate(x, axis=-1) for x in zip(*results)] - - -def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200, workers=1): - assert deriv < 5 - if rho_tmz.dtype != np.double: - raise RuntimeError('rho and mz must be real') - ngrids = rho_tmz.shape[-1] - grids_per_task = min(ngrids//(workers*3)+1, MAX_GRIDS_PER_TASK) - - if workers == 1: - results = [] - for p0, p1 in _prange(0, ngrids, grids_per_task): - r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) - results.append(r) - else: - print(collinear_samples) - executor = ThreadPoolExecutor - - with executor(max_workers=workers) as ex: - futures = [] - for p0, p1 in _prange(0, ngrids, grids_per_task): - f = ex.submit(_eval_xc_sf, func, rho_tmz[...,p0:p1], deriv, collinear_samples) - futures.append(f) - results = [f.result() for f in futures] - return [None if x[0] is None else np.concatenate(x, axis=-1) for x in zip(*results)] + results = [] + for p0, p1 in _prange(0, ngrids, MAX_GRIDS_PER_TASK): + r = _eval_xc_lebedev(func, rho_tm[...,p0:p1], deriv, spin_samples, + collinear_threshold, collinear_samples) + results.append(r) -def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): - ngrids = rho_tmz.shape[-1] - # samples on z=cos(theta) and their weights between [0, 1] - sgridz, weights = _make_paxis_samples(collinear_samples) - blksize = int(np.ceil(1e5 / ngrids)) * 8 - - if rho_tmz.ndim == 2: - nvar = 1 - else: - nvar = rho_tmz.shape[1] - fxc_sf = np.zeros((nvar,nvar,ngrids)) - kxc_sf = np.zeros((nvar,nvar,2,nvar,ngrids)) - for p0, p1 in _prange(0, weights.size, blksize): - rho = _project_spin_paxis2(rho_tmz, sgridz[p0:p1]) - xc_orig = func(rho, deriv) - if deriv > 1: - fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids, p1-p0) - fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) + return [None if x[0] is None else cp.concatenate(x, axis=-1) for x in zip(*results)] - if deriv > 2: - kxc = xc_orig[3].reshape(2, nvar, 2, nvar, 2, nvar, ngrids, p1-p0) - kxc_sf[:,:,0] += kxc[1,:,1,:,0].dot(weights[p0:p1]) - kxc_sf[:,:,1] += kxc[1,:,1,:,1].dot(weights[p0:p1]*sgridz[p0:p1]) - return None,None,fxc_sf,kxc_sf def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): '''Multi-collinear functional derivatives for collinear spins @@ -178,7 +115,7 @@ def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): needs to return the derivatives to rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) while libxc (or xcfun) returns derivatives to rho, sigma_uu, sigma_ud, sigma_dd (sigma_ud = nabla(rho_u) dot nabla(rho_d)). - rho_tm : np array with shape (4, Nvar, Ngrids) + rho_tm : cp array with shape (4, Nvar, Ngrids) rho_tm[0] is density. rho_tm[1:4] is the magnetization spin vector. Nvar can be * 1 for LDA functionals * 4 for GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) @@ -198,7 +135,7 @@ def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): ngrids = rho_tm.shape[-1] # samples on z=cos(theta) and their weights between [0, 1] sgridz, weights = _make_paxis_samples(spin_samples) - blksize = int(np.ceil(1e5 / ngrids)) * 8 + blksize = int(cp.ceil(1e5 / ngrids)) * 8 if rho_tm.ndim == 2: nvar = 1 @@ -210,28 +147,27 @@ def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): # TODO: filter s, nabla(s) ~= 0 m = rho_tm[1:].reshape(3, nvar, ngrids) s = rho_ts[1].reshape(nvar, ngrids)[0] - with np.errstate(divide='ignore', invalid='ignore'): - omega = m[:,0] / s - omega[:,s==0] = 0 + omega = cp.where(s == 0, 0, m[:, 0] / s) xc_orig = func(rho_ts, deriv) exc_eff = xc_orig[0] + exc_eff = exc_eff[:,0] omega = omega.reshape(3, ngrids) if deriv > 0: vxc = xc_orig[1].reshape(2, nvar, ngrids) - vxc_eff = np.vstack((vxc[:1], np.einsum('xg,rg->rxg', vxc[1], omega))) + vxc_eff = cp.vstack((vxc[:1], cp.einsum('xg,rg->rxg', vxc[1], omega))) if deriv > 1: # spin-conserve part fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids) - fxc_eff = np.empty((4, nvar, 4, nvar, ngrids)) + fxc_eff = cp.empty((4, nvar, 4, nvar, ngrids)) fxc_eff[0,:,0] = fxc[0,:,0] - fz1 = np.einsum('xyg,rg->rxyg', fxc[1,:,0], omega) + fz1 = cp.einsum('xyg,rg->rxyg', fxc[1,:,0], omega) fxc_eff[1:,:,0 ] = fz1 fxc_eff[0 ,:,1:] = fz1.transpose(1,0,2,3) - tmp = np.einsum('xyg,rg->rxyg', fxc[1,:,1,:], omega) - fxc_eff[1:,:,1:] = np.einsum('rxyg,sg->rxsyg', tmp, omega) + tmp = cp.einsum('xyg,rg->rxyg', fxc[1,:,1,:], omega) + fxc_eff[1:,:,1:] = cp.einsum('rxyg,sg->rxsyg', tmp, omega) # spin-flip part fxc_sf = 0 @@ -244,8 +180,8 @@ def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): for i in range(1, 4): fxc_eff[i,:,i] += fxc_sf - tmp = np.einsum('xyg,rg->rxyg', fxc_sf, omega) - fxc_eff[1:,:,1:] -= np.einsum('rxyg,sg->rxsyg', tmp, omega) + tmp = cp.einsum('xyg,rg->rxyg', fxc_sf, omega) + fxc_eff[1:,:,1:] -= cp.einsum('rxyg,sg->rxsyg', tmp, omega) ret = [exc_eff] if deriv > 0: @@ -263,7 +199,9 @@ def _eval_xc_lebedev(func, rho_tm, deriv, spin_samples, ''' ngrids = rho_tm.shape[-1] sgrids, weights = _make_sph_samples(spin_samples) - blksize = int(np.ceil(1e4 / ngrids)) * 8 + sgrids = cp.asarray(sgrids) + weights = cp.asarray(weights) + blksize = int(cp.ceil(1e4 / ngrids)) * 8 # import pdb # pdb.set_trace() if rho_tm.ndim == 2: @@ -285,26 +223,26 @@ def _eval_xc_lebedev(func, rho_tm, deriv, spin_samples, rho = rho.reshape(2, nvar, ngrids, nsg) s = rho[1] rho_pure = rho[0,0] - exc_rho = exc * rho_pure + np.einsum('xgo,xgo->go', vxc[1], s) - exc_eff += np.einsum('go,o->g', exc_rho, p_weights) + exc_rho = exc * rho_pure + cp.einsum('xgo,xgo->go', vxc[1], s) + exc_eff += cp.einsum('go,o->g', exc_rho, p_weights) if deriv > 0: fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids, nsg) # vs * 2 + s*f_s_st vxc[1] *= 2 - vxc += np.einsum('xbygo,xgo->bygo', fxc[1], s) + vxc += cp.einsum('xbygo,xgo->bygo', fxc[1], s) c_tm = _ts2tm_transformation(p_sgrids) cw_tm = c_tm * p_weights - vxc_eff += np.einsum('rao,axgo->rxg', cw_tm, vxc) + vxc_eff += cp.einsum('rao,axgo->rxg', cw_tm, vxc) if deriv > 1: kxc = xc_orig[3].reshape(2, nvar, 2, nvar, 2, nvar, ngrids, nsg) fxc[1,:,1] *= 3 fxc[0,:,1] *= 2 fxc[1,:,0] *= 2 - fxc += np.einsum('xbyczgo,xgo->byczgo', kxc[1], s) - fxc = np.einsum('rao,axbygo->rxbygo', c_tm, fxc) - fxc_eff += np.einsum('sbo,rxbygo->rxsyg', cw_tm, fxc) + fxc += cp.einsum('xbyczgo,xgo->byczgo', kxc[1], s) + fxc = cp.einsum('rao,axbygo->rxbygo', c_tm, fxc) + fxc_eff += cp.einsum('sbo,rxbygo->rxsyg', cw_tm, fxc) if deriv > 2: lxc = xc_orig[4].reshape(2, nvar, 2, nvar, 2, nvar, 2, nvar, ngrids, nsg) @@ -316,11 +254,11 @@ def _eval_xc_lebedev(func, rho_tm, deriv, spin_samples, kxc[0,:,1,:,0] *= 2 kxc[0,:,0,:,1] *= 2 - kxc += np.einsum('wbxcydzgo,wgo->bxcydzgo', lxc[1], s) - kxc = np.einsum('rao,axbyczgo->rxbyczgo', c_tm, kxc) - kxc = np.einsum('sbo,rxbyczgo->rxsyczgo', c_tm, kxc) - # kxc = np.einsum('rao,sbo,axbyczgo->rxsyczgo', c_tm, c_tm,kxc) - kxc_eff += np.einsum('tco,rxsyczgo->rxsytzg', cw_tm, kxc) + kxc += cp.einsum('wbxcydzgo,wgo->bxcydzgo', lxc[1], s) + kxc = cp.einsum('rao,axbyczgo->rxbyczgo', c_tm, kxc) + kxc = cp.einsum('sbo,rxbyczgo->rxsyczgo', c_tm, kxc) + # kxc = cp.einsum('rao,sbo,axbyczgo->rxsyczgo', c_tm, c_tm,kxc) + kxc_eff += cp.einsum('tco,rxsyczgo->rxsytzg', cw_tm, kxc) # exc in libxc is defined as Exc per particle. exc_eff calculated above is exc*rho. # Divide exc_eff by rho so as to follow the convention of libxc @@ -335,10 +273,16 @@ def _eval_xc_lebedev(func, rho_tm, deriv, spin_samples, # Strongly spin-polarized points (rho ~= |m|) can be considered as collinear spins if collinear_threshold is not None: rho, s = _project_spin_paxis(rho_tm.reshape(4, nvar, ngrids)[:,0]) - cs_idx = np.where(s >= rho * collinear_threshold)[0] + cs_idx = cp.where(s >= rho * collinear_threshold)[0] if cs_idx.size > 0: xc_cs = eval_xc_collinear_spin(func, rho_tm[...,cs_idx], deriv, collinear_samples) + print("debug 1") + print("exc_eff", exc_eff.shape) + print("cs_idx", cs_idx.shape) + print("xc_cs", len(xc_cs)) + print("xc_cs[0]", xc_cs[0].shape) + print("rho_tm", rho_tm.shape) exc_eff[...,cs_idx] = xc_cs[0] if deriv > 0: vxc_eff[...,cs_idx] = xc_cs[1] @@ -378,15 +322,15 @@ def _project_spin_sph(rho_tm, sgrids): nsg = sgrids.shape[0] ngrids = rho.shape[-1] if rho_tm.ndim == 2: - rho_ts = np.empty((2, ngrids, nsg)) - rho_ts[0] = rho[:, np.newaxis] - rho_ts[1] = np.einsum('mg,om->go', m, sgrids) + rho_ts = cp.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:, cp.newaxis] + rho_ts[1] = cp.einsum('mg,om->go', m, sgrids) rho_ts = rho_ts.reshape(2, ngrids*nsg) else: nvar = rho_tm.shape[1] - rho_ts = np.empty((2, nvar, ngrids, nsg)) - rho_ts[0] = rho[:, :, np.newaxis] - rho_ts[1] = np.einsum('mxg,om->xgo', m, sgrids) + rho_ts = cp.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:, :, cp.newaxis] + rho_ts[1] = cp.einsum('mxg,om->xgo', m, sgrids) rho_ts = rho_ts.reshape(2, nvar, ngrids*nsg) return rho_ts @@ -395,7 +339,7 @@ def _ts2tm_transformation(sgrids): Transformation that projects v_ts(rho,s) to rho/m representation (rho,mx,my,mz) ''' nsg = sgrids.shape[0] - c_tm = np.zeros((4, 2, nsg)) + c_tm = cp.zeros((4, 2, nsg)) c_tm[0,0] = 1 c_tm[1:,1] = sgrids.T return c_tm @@ -403,6 +347,8 @@ def _ts2tm_transformation(sgrids): def _make_paxis_samples(spin_samples): '''Samples on principal axis between [0, 1]''' rt, wt = np.polynomial.legendre.leggauss(spin_samples) + rt = cp.asarray(rt) + wt = cp.asarray(wt) rt = rt * .5 + .5 wt *= .5 # normalized to 1 return rt, wt @@ -412,23 +358,23 @@ def _project_spin_paxis(rho_tm, sgridz=None): rho = rho_tm[0] m = rho_tm[1:] - s = np.linalg.norm(m, axis=0) + s = cp.linalg.norm(m, axis=0) if sgridz is None: - rho_ts = np.stack([rho, s]) + rho_ts = cp.stack([rho, s]) else: ngrids = rho.shape[-1] nsg = sgridz.shape[0] if rho_tm.ndim == 2: - rho_ts = np.empty((2, ngrids, nsg)) - rho_ts[0] = rho[:,np.newaxis] - rho_ts[1] = s[:,np.newaxis] * sgridz + rho_ts = cp.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:,cp.newaxis] + rho_ts[1] = s[:,cp.newaxis] * sgridz rho_ts = rho_ts.reshape(2, ngrids * nsg) else: print('222') nvar = rho_tm.shape[1] - rho_ts = np.empty((2, nvar, ngrids, nsg)) - rho_ts[0] = rho[:,:,np.newaxis] - rho_ts[1] = s[:,:,np.newaxis] * sgridz + rho_ts = cp.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:,:,cp.newaxis] + rho_ts[1] = s[:,:,cp.newaxis] * sgridz rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) return rho_ts @@ -440,19 +386,19 @@ def _project_spin_paxis2(rho_tm, sgridz=None): mz = rho_tm[1] if sgridz is None: - rho_ts = np.stack([rho, mz]) + rho_ts = cp.stack([rho, mz]) else: ngrids = rho.shape[-1] nsg = sgridz.shape[0] if rho_tm.ndim == 2: - rho_ts = np.empty((2, ngrids, nsg)) - rho_ts[0] = rho[:,np.newaxis] - rho_ts[1] = mz[:,np.newaxis] * sgridz + rho_ts = cp.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:,cp.newaxis] + rho_ts[1] = mz[:,cp.newaxis] * sgridz rho_ts = rho_ts.reshape(2, ngrids * nsg) else: nvar = rho_tm.shape[1] - rho_ts = np.empty((2, nvar, ngrids, nsg)) - rho_ts[0] = rho[:,:,np.newaxis] - rho_ts[1] = mz[:,:,np.newaxis] * sgridz + rho_ts = cp.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:,:,cp.newaxis] + rho_ts[1] = mz[:,:,cp.newaxis] * sgridz rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) return rho_ts diff --git a/gpu4pyscf/dft/numint.py b/gpu4pyscf/dft/numint.py index 55c25dfab..81e1be013 100644 --- a/gpu4pyscf/dft/numint.py +++ b/gpu4pyscf/dft/numint.py @@ -2429,6 +2429,74 @@ def unsort_orbitals(self, sorted_mat, axis=[], out=None): out[tuple(fancy_index)] = sorted_mat return out + def sort_orbitals_2c(self, mat, axis=[]): + ''' Transform given axis of a 2-component matrix (GKS) into sorted AO + + This assumes the axes specified in 'axis' have a dimension of 2*nao, + representing alpha (:nao) and beta (nao:) components. Both components + are sorted using the same AO sorting index. + ''' + idx = self._ao_idx + nao = len(idx) + + # Create the 2-component sorting index: + # [sorted_alpha_indices, sorted_beta_indices] + # E.g., if idx = [0, 2, 1] (nao=3), + # idx_2c = [0, 2, 1, 3+0, 3+2, 3+1] = [0, 2, 1, 3, 5, 4] + # We use numpy to build the index, consistent with self._ao_idx + idx_2c = np.concatenate([idx, idx + nao]) + + shape_ones = (1,) * mat.ndim + fancy_index = [] + for dim, n in enumerate(mat.shape): + if dim in axis: + # Check if the dimension matches the 2-component size + if n != 2 * nao: + raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component sorting") + indices = idx_2c + else: + # Use cupy.arange for non-sorted axes, as in the original sort_orbitals + indices = cupy.arange(n) + + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + + # Perform the sorting using advanced indexing + return mat[tuple(fancy_index)] + + def unsort_orbitals_2c(self, sorted_mat, axis=[], out=None): + ''' Transform given axis of a 2-component matrix from sorted AO to original AO + + This assumes the axes specified in 'axis' have a dimension of 2*nao. + This is the inverse operation of sort_orbitals_2c. + ''' + idx = self._ao_idx + nao = len(idx) + + # The 2-component index is created identically to sort_orbitals_2c + idx_2c = np.concatenate([idx, idx + nao]) + + shape_ones = (1,) * sorted_mat.ndim + fancy_index = [] + for dim, n in enumerate(sorted_mat.shape): + if dim in axis: + # Check if the dimension matches the 2-component size + if n != 2 * nao: + raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component unsorting") + indices = idx_2c + else: + indices = cupy.arange(n) + + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + + if out is None: + out = cupy.empty_like(sorted_mat) + + # Perform the unsorting assignment + out[tuple(fancy_index)] = sorted_mat + return out + class GTOValEnvVars(ctypes.Structure): _fields_ = [ ("natm", ctypes.c_int), diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index 1fe420882..9fb9580fc 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -20,7 +20,7 @@ import numpy as np import cupy as cp from pyscf import lib -from gpu4pyscf.dft import numint +from gpu4pyscf.dft import numint, mcfun_gpu from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot from gpu4pyscf.dft import xc_deriv from pyscf import __config__ @@ -29,6 +29,8 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, with_lapl=True, verbose=None): nao = ao.shape[-2] + print("in eval_rho") + print(ao.shape, dm.shape, nao, ao.shape) assert dm.ndim == 2 and nao * 2 == dm.shape[0] if not isinstance(dm, cp.ndarray): dm = cp.asarray(dm) @@ -39,9 +41,12 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, ao_loc = mol.ao_loc if xctype == 'LDA': + print("debug 0", hermi) c0a = _dot_ao_dm(mol, ao, dm[:nao], non0tab, shls_slice, ao_loc) c0b = _dot_ao_dm(mol, ao, dm[nao:], non0tab, shls_slice, ao_loc) rho_m = _contract_rho_m((ao, ao), (c0a, c0b), hermi, True) + print(rho_m.shape) + print(rho_m) elif xctype == 'GGA': # first 4 ~ (rho, m), second 4 ~ (0th order, dx, dy, dz) if hermi: @@ -99,13 +104,14 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, xctype = ni._xc_type(xc_code) shls_slice = (0, mol.nbas) ao_loc = mol.ao_loc_nr() - - make_rho, nset, n2c = ni._gen_rho_evaluator(mol, dms, hermi, False) + n2c = dms[0].shape[-1] nao = n2c // 2 + opt = ni.gdftopt + _sorted_mol = opt._sorted_mol - nelec = cp.zeros(nset) - excsum = cp.zeros(nset) - vmat = cp.zeros((nset,n2c,n2c), dtype=cp.complex128) + nelec = 0 + excsum = 0 + vmat = cp.zeros((n2c,n2c), dtype=cp.complex128) if xctype in ('LDA', 'GGA', 'MGGA'): f_eval_mat = { @@ -119,21 +125,24 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, if ni.collinear[0] == 'm': # mcol eval_xc = ni.mcfun_eval_xc_adapter(xc_code) else: + raise NotImplementedError('locally-collinear vxc is not implemented') eval_xc = ni.eval_xc_eff for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - for i in range(nset): - rho = make_rho(i, ao, mask, xctype) - exc, vxc = eval_xc(xc_code, rho, deriv=1, xctype=xctype)[:2] - if xctype == 'LDA': - den = rho[0] * weight - else: - den = rho[0,0] * weight - nelec[i] += den.sum() - excsum[i] += cp.dot(den, exc) - vmat[i] += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, - ao_loc, hermi) + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + rho = eval_rho(_sorted_mol, ao, dms, non0tab=None, xctype=xctype, hermi=hermi, + with_lapl=False, verbose=None) + print(rho.shape) + print(rho) + exc, vxc = eval_xc(xc_code, rho, deriv=1, xctype=xctype)[:2] + if xctype == 'LDA': + den = rho[0] * weight + else: + den = rho[0,0] * weight + nelec += den.sum() + excsum += cp.dot(den, exc) + vmat += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, + ao_loc, hermi) elif xctype == 'HF': pass @@ -141,11 +150,8 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, raise NotImplementedError(f'numint2c.get_vxc for functional {xc_code}') if hermi: - vmat = vmat + vmat.conj().transpose(0,2,1) - if isinstance(dms, cp.ndarray) and dms.ndim == 2: - vmat = vmat[0] - nelec = nelec[0] - excsum = excsum[0] + vmat = vmat + vmat.conj().transpose(1,0) + return nelec, excsum, vmat def _gks_mcol_fxc(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, @@ -190,58 +196,58 @@ def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi mat = cp.block([[mataa, matab], [matba, matbb]]) return mat -def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, - verbose=None, spin=None): - r'''Returns the derivative tensor against the density parameters - - [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], - [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. - - It differs from the eval_xc method in the derivatives of non-local part. - The eval_xc method returns the XC functional derivatives to sigma - (|\nabla \rho|^2) - - Args: - rho: 2-dimensional or 3-dimensional array - Total density and spin density (and their derivatives if GGA or MGGA - functionals) on grids - - Kwargs: - deriv: int - derivative orders - omega: float - define the exponent in the attenuated Coulomb for RSH functional - ''' - if omega is None: omega = ni.omega - if xctype is None: xctype = ni._xc_type(xc_code) - if ni.collinear[0] == 'c': # collinear - t = rho[0] - s = rho[3] - elif ni.collinear[0] == 'n': # ncol - t = rho[0] - m = rho[1:4] - s = lib.norm(m, axis=0) - elif ni.collinear[0] == 'm': # mcol - # called by mcfun.eval_xc_eff which passes (rho, s) only - t, s = rho - else: - raise RuntimeError(f'Unknown collinear scheme {ni.collinear}') - - rho = cp.stack([(t + s) * .5, (t - s) * .5]) - if xctype == 'MGGA' and rho.shape[1] == 6: - rho = cp.asarray(rho[:,[0,1,2,3,5],:], order='C') - - assert spin is None or spin == 1 - spin = 1 - - out = ni.libxc.eval_xc1(xc_code, rho, spin, deriv, omega) - evfk = [out[0]] - for order in range(1, deriv+1): - evfk.append(xc_deriv.transform_xc(rho, out, xctype, spin, order)) - if deriv < 3: - # The return has at least [e, v, f, k] terms - evfk.extend([None] * (3 - deriv)) - return evfk +# def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, +# verbose=None, spin=None): +# r'''Returns the derivative tensor against the density parameters + +# [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], +# [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. + +# It differs from the eval_xc method in the derivatives of non-local part. +# The eval_xc method returns the XC functional derivatives to sigma +# (|\nabla \rho|^2) + +# Args: +# rho: 2-dimensional or 3-dimensional array +# Total density and spin density (and their derivatives if GGA or MGGA +# functionals) on grids + +# Kwargs: +# deriv: int +# derivative orders +# omega: float +# define the exponent in the attenuated Coulomb for RSH functional +# ''' +# if omega is None: omega = ni.omega +# if xctype is None: xctype = ni._xc_type(xc_code) +# if ni.collinear[0] == 'c': # collinear +# t = rho[0] +# s = rho[3] +# elif ni.collinear[0] == 'n': # ncol +# t = rho[0] +# m = rho[1:4] +# s = lib.norm(m, axis=0) +# elif ni.collinear[0] == 'm': # mcol +# # called by mcfun.eval_xc_eff which passes (rho, s) only +# t, s = rho +# else: +# raise RuntimeError(f'Unknown collinear scheme {ni.collinear}') + +# rho = cp.stack([(t + s) * .5, (t - s) * .5]) +# if xctype == 'MGGA' and rho.shape[1] == 6: +# rho = cp.asarray(rho[:,[0,1,2,3,5],:], order='C') + +# assert spin is None or spin == 1 +# spin = 1 + +# out = ni.libxc.eval_xc1(xc_code, rho, spin, deriv, omega) +# evfk = [out[0]] +# for order in range(1, deriv+1): +# evfk.append(xc_deriv.transform_xc(rho, out, xctype, spin, order)) +# if deriv < 3: +# # The return has at least [e, v, f, k] terms +# evfk.extend([None] * (3 - deriv)) +# return evfk # * Mcfun requires functional derivatives to total-density and spin-density. # * Make it a global function than a closure so as to be callable by multiprocessing @@ -252,23 +258,32 @@ def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): evfk[order] = xc_deriv.ud2ts(evfk[order]) return evfk +def __mcfun_fn_eval_xc2(ni, xc_code, xctype, rho, deriv): + t, s = rho + if not isinstance(t, cp.ndarray): + t = cp.asarray(t) + if not isinstance(s, cp.ndarray): + s = cp.asarray(s) + rho = cp.stack([(t + s) * .5, (t - s) * .5]) + spin = 1 + evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype, spin=spin) + evfk = list(evfk) + for order in range(1, deriv+1): + if evfk[order] is not None: + evfk[order] = xc_deriv.ud2ts(evfk[order]) + return evfk + def mcfun_eval_xc_adapter(ni, xc_code): '''Wrapper to generate the eval_xc function required by mcfun''' - try: - import mcfun - except ImportError: - raise ImportError('This feature requires mcfun library.\n' - 'Try install mcfun with `pip install mcfun`') xctype = ni._xc_type(xc_code) - fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) - nproc = lib.num_threads() + fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype) def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None, spin=None): - return mcfun.eval_xc_eff( + return mcfun_gpu.eval_xc_eff( fn_eval_xc, rho, deriv, spin_samples=ni.spin_samples, collinear_threshold=ni.collinear_thrd, - collinear_samples=ni.collinear_samples, workers=nproc) + collinear_samples=ni.collinear_samples) return eval_xc_eff def _mcol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): @@ -300,7 +315,10 @@ def _mcol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) aow = _scale_ao(ao, wr[0]-wmz[0], out=aow) # Mz matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - mat = cp.block([[mataa, matab], [matba, matbb]]) + row1 = cp.concatenate([mataa, matab], axis=1) + row2 = cp.concatenate([matba, matbb], axis=1) + + mat = cp.concatenate([row1, row2], axis=0) return mat def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): @@ -337,7 +355,22 @@ def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) - mat = cp.block([[mataa, matab], [matba, matbb]]) + row1 = cp.concatenate([mataa, matab], axis=1) + row2 = cp.concatenate([matba, matbb], axis=1) + + mat = cp.concatenate([row1, row2], axis=0) + return mat + +def _tau_dot(mol, bra, ket, wv, mask, shls_slice, ao_loc): + '''nabla_ao dot nabla_ao + numpy.einsum('p,xpi,xpj->ij', wv, bra[1:4].conj(), ket[1:4]) + ''' + aow = _scale_ao(ket[1], wv) + mat = _dot_ao_ao(mol, bra[1], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ket[2], wv, aow) + mat += _dot_ao_ao(mol, bra[2], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ket[3], wv, aow) + mat += _dot_ao_ao(mol, bra[3], aow, mask, shls_slice, ao_loc) return mat def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): @@ -382,7 +415,10 @@ def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, herm aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) - mat = cp.block([[mataa, matab], [matba, matbb]]) + row1 = cp.concatenate([mataa, matab], axis=1) + row2 = cp.concatenate([matba, matbb], axis=1) + + mat = cp.concatenate([row1, row2], axis=0) return mat def _mcol_lda_fxc_mat(mol, ao, weight, rho0, rho1, fxc, @@ -417,6 +453,8 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): rbb = cp.einsum('ip,ip->p', bra_b.real, ket_b[nao:].real) rbb+= cp.einsum('ip,ip->p', bra_b.imag, ket_b[nao:].imag) rho_m = cp.empty((4, ngrids)) + print("debug") + print(rho_m.dtype) rho_m[0,:] = raa + rbb # rho rho_m[1,:] = rab.real # mx rho_m[2,:] = rab.imag # my @@ -428,6 +466,7 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) rho_m[1,:] += rba.real rho_m[2,:] -= rba.imag + print(rho_m.dtype) else: raa = cp.einsum('ip,ip->p', bra_a.conj(), ket_a[:nao]) rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) @@ -443,6 +482,8 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): class NumInt2C(lib.StreamObject, numint.LibXCMixin): '''Numerical integration methods for 2-component basis (used by GKS)''' + _keys = {'gdftopt'} + gdftopt = None # collinear schemes: # 'col' (collinear, by default) @@ -456,6 +497,12 @@ class NumInt2C(lib.StreamObject, numint.LibXCMixin): eval_ao = staticmethod(numint.eval_ao) eval_rho = staticmethod(eval_rho) + def build(self, mol, coords): + self.gdftopt = numint._GDFTOpt.from_mol(mol) + self.grid_blksize = None + self.non0ao_idx = {} + return self + def eval_rho1(self, mol, ao, dm, screen_index=None, xctype='LDA', hermi=0, with_lapl=True, cutoff=None, ao_cutoff=None, pair_mask=None, verbose=None): @@ -478,19 +525,6 @@ def eval_rho2(self, mol, ao, mo_coeff, mo_occ, non0tab=None, xctype='LDA', rho = self.eval_rho(mol, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) return rho - #if mo_coeff.dtype == cp.double: - # nao = ao.shape[-1] - # assert nao * 2 == mo_coeff.shape[0] - # mo_aR = mo_coeff[:nao] - # mo_bR = mo_coeff[nao:] - # rho = numint.eval_rho2(mol, ao, mo_aR, mo_occ, non0tab, xctype, with_lapl, verbose) - # rho += numint.eval_rho2(mol, ao, mo_bR, mo_occ, non0tab, xctype, with_lapl, verbose) - #else: - # dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) - # hermi = 1 - # rho = self.eval_rho(mol, dm, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) - #mx = my = mz = None - #return rho, mx, my, mz raise NotImplementedError(self.collinear) def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, @@ -498,92 +532,13 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, '''Compute the 0th order density, Vxc and fxc. They can be used in TDDFT, DFT hessian module etc. ''' - xctype = self._xc_type(xc_code) - if xctype in ('GGA', 'MGGA'): - ao_deriv = 1 - else: - ao_deriv = 0 - n2c = mo_coeff.shape[0] - nao = n2c // 2 - with_lapl = numint.MGGA_DENSITY_LAPL - - if self.collinear[0] in ('m', 'n'): # mcol or ncol - rho = [] - for ao, mask, weight, coords \ - in self.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho.append(self.eval_rho2(mol, ao, mo_coeff, mo_occ, mask, xctype, - with_lapl)) - rho = cp.concatenate(rho,axis=-1) - assert rho.dtype == cp.double - if self.collinear[0] == 'm': # mcol - eval_xc = self.mcfun_eval_xc_adapter(xc_code) - else: - eval_xc = self.eval_xc_eff - vxc, fxc = eval_xc(xc_code, rho, deriv=2, xctype=xctype)[1:3] - else: - # rhoa and rhob must be real - dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) - dm_a = dm[:nao,:nao].real.copy('C') - dm_b = dm[nao:,nao:].real.copy('C') - ni = self._to_numint1c() - hermi = 1 - rhoa = [] - rhob = [] - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - # rhoa and rhob must be real - rhoa.append(ni.eval_rho(mol, ao, dm_a, mask, xctype, hermi, with_lapl)) - rhob.append(ni.eval_rho(mol, ao, dm_b, mask, xctype, hermi, with_lapl)) - rho = cp.stack([cp.concatenate(rhoa,axis=-1), cp.concatenate(rhob,axis=-1)]) - assert rho.dtype == cp.double - vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] - return rho, vxc, fxc + raise NotImplementedError("Kxc calculation is not supported.") def cache_xc_kernel1(self, mol, grids, xc_code, dm, spin=0, max_memory=2000): '''Compute the 0th order density, Vxc and fxc. They can be used in TDDFT, DFT hessian module etc. ''' - xctype = self._xc_type(xc_code) - if xctype in ('GGA', 'MGGA'): - ao_deriv = 1 - else: - ao_deriv = 0 - n2c = dm.shape[0] - nao = n2c // 2 - - if self.collinear[0] in ('m', 'n'): # mcol or ncol - hermi = 1 # rho must be real. We need to assume dm hermitian - with_lapl = False - rho = [] - for ao, mask, weight, coords \ - in self.block_loop(mol, grids, nao, ao_deriv, max_memory): - rho.append(self.eval_rho1(mol, ao, dm, mask, xctype, hermi, with_lapl)) - rho = cp.concatenate(rho,axis=-1) - assert rho.dtype == cp.double - if self.collinear[0] == 'm': # mcol - eval_xc = self.mcfun_eval_xc_adapter(xc_code) - else: - eval_xc = self.eval_xc_eff - vxc, fxc = eval_xc(xc_code, rho, deriv=2, xctype=xctype)[1:3] - else: - # rhoa and rhob must be real. We need to assume dm hermitian - hermi = 1 - # TODO: test if dm[:nao,:nao].imag == 0 - dm_a = dm[:nao,:nao].real.copy('C') - dm_b = dm[nao:,nao:].real.copy('C') - ni = self._to_numint1c() - with_lapl = True - rhoa = [] - rhob = [] - for ao, mask, weight, coords \ - in ni.block_loop(mol, grids, nao, ao_deriv, max_memory): - # rhoa and rhob must be real - rhoa.append(ni.eval_rho(mol, ao, dm_a, mask, xctype, hermi, with_lapl)) - rhob.append(ni.eval_rho(mol, ao, dm_b, mask, xctype, hermi, with_lapl)) - rho = cp.stack([cp.concatenate(rhoa,axis=-1), cp.concatenate(rhob,axis=-1)]) - assert rho.dtype == cp.double - vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] - return rho, vxc, fxc + raise NotImplementedError("Kxc calculation is not supported.") def get_rho(self, mol, dm, grids, max_memory=2000): '''Density in real space @@ -600,9 +555,19 @@ def get_rho(self, mol, dm, grids, max_memory=2000): @lib.with_doc(numint.nr_rks.__doc__) def nr_vxc(self, mol, grids, xc_code, dms, spin=0, relativity=0, hermi=1, max_memory=2000, verbose=None): + if not isinstance(dms, cp.ndarray): + dms = cp.asarray(dms) if self.collinear[0] in ('m', 'n'): # mcol or ncol + opt = getattr(self, 'gdftopt', None) + if opt is None: + self.build(mol, grids.coords) + opt = self.gdftopt + assert dms.ndim == 2 + dms = cp.asarray(dms) + dms = opt.sort_orbitals_2c(dms, axis=[0,1]) n, exc, vmat = self._gks_mcol_vxc(mol, grids, xc_code, dms, relativity, hermi, max_memory, verbose) + vmat = opt.unsort_orbitals_2c(vmat, axis=[0,1]) else: nao = dms.shape[-1] // 2 # ground state density is always real @@ -628,8 +593,10 @@ def nr_fxc(self, mol, grids, xc_code, dm0, dms, spin=0, relativity=0, hermi=0, rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): raise NotImplementedError('non-collinear fxc') get_fxc = nr_gks_fxc = nr_fxc - - eval_xc_eff = _eval_xc_eff + + def _init_xcfuns(self, xc_code, spin=0): + return numint._init_xcfuns(xc_code, spin) + eval_xc_eff = numint.eval_xc_eff mcfun_eval_xc_adapter = mcfun_eval_xc_adapter block_loop = numint.NumInt.block_loop diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 374e10773..5588e6266 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -34,7 +34,7 @@ def setUpModule(): basis = 'ccpvdz', charge = 1, spin = 1, # = 2S = spin_up - spin_down - output = '/dev/null' + # output = '/dev/null' ) np.random.seed(2) @@ -65,45 +65,72 @@ def tearDownModule(): class KnownValues(unittest.TestCase): - def test_eval_rho(self): - np.random.seed(1) - dm = np.random.random(dm0.shape) - ni_gpu = NumInt2C() - ni_cpu = pyscf_numint2c() - for xctype in ('LDA', 'GGA', 'MGGA'): - deriv = 1 - if xctype == 'LDA': - deriv = 0 - ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) - ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + # def test_eval_rho(self): + # np.random.seed(1) + # dm = np.random.random(dm0.shape) + # ni_gpu = NumInt2C() + # ni_cpu = pyscf_numint2c() + # for xctype in ('LDA', 'GGA', 'MGGA'): + # deriv = 1 + # if xctype == 'LDA': + # deriv = 0 + # ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + # ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) - rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) - ref = ni_cpu.eval_rho(mol, ao_cpu, dm, xctype=xctype, hermi=0, with_lapl=False) - self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + # rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) + # ref = ni_cpu.eval_rho(mol, ao_cpu, dm, xctype=xctype, hermi=0, with_lapl=False) + # self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - rho = ni_gpu.eval_rho(mol, ao_gpu, dm0, xctype=xctype, hermi=1, with_lapl=False) - ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) - self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + # rho = ni_gpu.eval_rho(mol, ao_gpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + # ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + # self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - def test_eval_rho2(self): - np.random.seed(1) - mo_coeff_test = np.random.random(mo_coeff.shape) + # def test_eval_rho2(self): + # np.random.seed(1) + # mo_coeff_test = np.random.random(mo_coeff.shape) + # ni_gpu = NumInt2C() + # ni_gpu.collinear='m' + # ni_cpu = pyscf_numint2c() + # ni_cpu.collinear='m' + # for xctype in ('LDA', 'GGA', 'MGGA'): + # deriv = 1 + # if xctype == 'LDA': + # deriv = 0 + # ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + # ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + + # rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + # ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + # self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + def test_eval_xc_eff(self): ni_gpu = NumInt2C() ni_gpu.collinear='m' ni_cpu = pyscf_numint2c() ni_cpu.collinear='m' - for xctype in ('LDA', 'GGA', 'MGGA'): - deriv = 1 - if xctype == 'LDA': - deriv = 0 - ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) - ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) - - rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) - ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) - self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - + np.random.seed(1) + dm = dm0*1.0 + print(dm.shape, mol.nao) + print(dm.dtype) + for xc_code in (MGGA_M06, ): + # xctype = ni_gpu._xc_type(xc_code) + # deriv = 1 + # if xctype == 'LDA': + # deriv = 0 + # ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + # rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) + # eval_xc_eff_gpu = ni_gpu.mcfun_eval_xc_adapter(xc_code) + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, xc_code, dm) + n_cpu, exc_cpu, vmat_cpu = ni_cpu.nr_vxc(mol, grids_cpu, xc_code, dm) + print("n_gpu", n_gpu) + print("n_cpu", n_cpu) + print("exc_gpu", exc_gpu) + print("exc_cpu", exc_cpu) + print("vmat_gpu", vmat_gpu) + print("vmat_cpu", vmat_cpu) + print() if __name__ == "__main__": print("Full Tests for dft numint2c") unittest.main() + From e3d749b53de0cd8fc727e90219ecf6ddeece4df2 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Tue, 4 Nov 2025 13:44:43 +0800 Subject: [PATCH 09/21] finish debug numint2c --- gpu4pyscf/dft/mcfun_gpu.py | 24 ------ gpu4pyscf/dft/numint2c.py | 84 ++------------------ gpu4pyscf/dft/tests/test_numint2c.py | 110 ++++++++++++++------------- 3 files changed, 63 insertions(+), 155 deletions(-) diff --git a/gpu4pyscf/dft/mcfun_gpu.py b/gpu4pyscf/dft/mcfun_gpu.py index c243fd1c1..ed010311c 100644 --- a/gpu4pyscf/dft/mcfun_gpu.py +++ b/gpu4pyscf/dft/mcfun_gpu.py @@ -378,27 +378,3 @@ def _project_spin_paxis(rho_tm, sgridz=None): rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) return rho_ts -def _project_spin_paxis2(rho_tm, sgridz=None): - # ToDo: be written into the function _project_spin_paxis(). - # Because use mz rather than |mz| here - '''Projects spins onto the principal axis''' - rho = rho_tm[0] - mz = rho_tm[1] - - if sgridz is None: - rho_ts = cp.stack([rho, mz]) - else: - ngrids = rho.shape[-1] - nsg = sgridz.shape[0] - if rho_tm.ndim == 2: - rho_ts = cp.empty((2, ngrids, nsg)) - rho_ts[0] = rho[:,cp.newaxis] - rho_ts[1] = mz[:,cp.newaxis] * sgridz - rho_ts = rho_ts.reshape(2, ngrids * nsg) - else: - nvar = rho_tm.shape[1] - rho_ts = cp.empty((2, nvar, ngrids, nsg)) - rho_ts[0] = rho[:,:,cp.newaxis] - rho_ts[1] = mz[:,:,cp.newaxis] * sgridz - rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) - return rho_ts diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index 9fb9580fc..c4183fcf8 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -29,8 +29,6 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, with_lapl=True, verbose=None): nao = ao.shape[-2] - print("in eval_rho") - print(ao.shape, dm.shape, nao, ao.shape) assert dm.ndim == 2 and nao * 2 == dm.shape[0] if not isinstance(dm, cp.ndarray): dm = cp.asarray(dm) @@ -41,12 +39,9 @@ def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, ao_loc = mol.ao_loc if xctype == 'LDA': - print("debug 0", hermi) c0a = _dot_ao_dm(mol, ao, dm[:nao], non0tab, shls_slice, ao_loc) c0b = _dot_ao_dm(mol, ao, dm[nao:], non0tab, shls_slice, ao_loc) rho_m = _contract_rho_m((ao, ao), (c0a, c0b), hermi, True) - print(rho_m.shape) - print(rho_m) elif xctype == 'GGA': # first 4 ~ (rho, m), second 4 ~ (0th order, dx, dy, dz) if hermi: @@ -126,14 +121,13 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, eval_xc = ni.mcfun_eval_xc_adapter(xc_code) else: raise NotImplementedError('locally-collinear vxc is not implemented') - eval_xc = ni.eval_xc_eff for ao, mask, weight, coords \ in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): - rho = eval_rho(_sorted_mol, ao, dms, non0tab=None, xctype=xctype, hermi=hermi, + mask_2c = np.concatenate([mask, mask + nao]) + dm_mask = dms[mask_2c[:,None],mask_2c] + rho = eval_rho(_sorted_mol, ao, dm_mask, non0tab=None, xctype=xctype, hermi=hermi, with_lapl=False, verbose=None) - print(rho.shape) - print(rho) exc, vxc = eval_xc(xc_code, rho, deriv=1, xctype=xctype)[:2] if xctype == 'LDA': den = rho[0] * weight @@ -141,7 +135,7 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, den = rho[0,0] * weight nelec += den.sum() excsum += cp.dot(den, exc) - vmat += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, + vmat += fmat(mol, ao, weight, rho, vxc, mask_2c, shls_slice, ao_loc, hermi) elif xctype == 'HF': @@ -196,69 +190,10 @@ def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi mat = cp.block([[mataa, matab], [matba, matbb]]) return mat -# def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, -# verbose=None, spin=None): -# r'''Returns the derivative tensor against the density parameters - -# [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], -# [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. - -# It differs from the eval_xc method in the derivatives of non-local part. -# The eval_xc method returns the XC functional derivatives to sigma -# (|\nabla \rho|^2) - -# Args: -# rho: 2-dimensional or 3-dimensional array -# Total density and spin density (and their derivatives if GGA or MGGA -# functionals) on grids - -# Kwargs: -# deriv: int -# derivative orders -# omega: float -# define the exponent in the attenuated Coulomb for RSH functional -# ''' -# if omega is None: omega = ni.omega -# if xctype is None: xctype = ni._xc_type(xc_code) -# if ni.collinear[0] == 'c': # collinear -# t = rho[0] -# s = rho[3] -# elif ni.collinear[0] == 'n': # ncol -# t = rho[0] -# m = rho[1:4] -# s = lib.norm(m, axis=0) -# elif ni.collinear[0] == 'm': # mcol -# # called by mcfun.eval_xc_eff which passes (rho, s) only -# t, s = rho -# else: -# raise RuntimeError(f'Unknown collinear scheme {ni.collinear}') - -# rho = cp.stack([(t + s) * .5, (t - s) * .5]) -# if xctype == 'MGGA' and rho.shape[1] == 6: -# rho = cp.asarray(rho[:,[0,1,2,3,5],:], order='C') - -# assert spin is None or spin == 1 -# spin = 1 - -# out = ni.libxc.eval_xc1(xc_code, rho, spin, deriv, omega) -# evfk = [out[0]] -# for order in range(1, deriv+1): -# evfk.append(xc_deriv.transform_xc(rho, out, xctype, spin, order)) -# if deriv < 3: -# # The return has at least [e, v, f, k] terms -# evfk.extend([None] * (3 - deriv)) -# return evfk # * Mcfun requires functional derivatives to total-density and spin-density. # * Make it a global function than a closure so as to be callable by multiprocessing def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): - evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype) - for order in range(1, deriv+1): - if evfk[order] is not None: - evfk[order] = xc_deriv.ud2ts(evfk[order]) - return evfk - -def __mcfun_fn_eval_xc2(ni, xc_code, xctype, rho, deriv): t, s = rho if not isinstance(t, cp.ndarray): t = cp.asarray(t) @@ -277,7 +212,7 @@ def mcfun_eval_xc_adapter(ni, xc_code): '''Wrapper to generate the eval_xc function required by mcfun''' xctype = ni._xc_type(xc_code) - fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype) + fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None, spin=None): return mcfun_gpu.eval_xc_eff( @@ -453,8 +388,6 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): rbb = cp.einsum('ip,ip->p', bra_b.real, ket_b[nao:].real) rbb+= cp.einsum('ip,ip->p', bra_b.imag, ket_b[nao:].imag) rho_m = cp.empty((4, ngrids)) - print("debug") - print(rho_m.dtype) rho_m[0,:] = raa + rbb # rho rho_m[1,:] = rab.real # mx rho_m[2,:] = rab.imag # my @@ -466,7 +399,6 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) rho_m[1,:] += rba.real rho_m[2,:] -= rba.imag - print(rho_m.dtype) else: raa = cp.einsum('ip,ip->p', bra_a.conj(), ket_a[:nao]) rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) @@ -534,12 +466,6 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, ''' raise NotImplementedError("Kxc calculation is not supported.") - def cache_xc_kernel1(self, mol, grids, xc_code, dm, spin=0, max_memory=2000): - '''Compute the 0th order density, Vxc and fxc. They can be used in TDDFT, - DFT hessian module etc. - ''' - raise NotImplementedError("Kxc calculation is not supported.") - def get_rho(self, mol, dm, grids, max_memory=2000): '''Density in real space ''' diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 5588e6266..4f41fe54d 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -18,9 +18,11 @@ import cupy from pyscf import lib, scf from pyscf.dft.numint2c import NumInt2C as pyscf_numint2c +from pyscf.dft.numint import NumInt as pyscf_numint from gpu4pyscf.dft import Grids from gpu4pyscf.dft import numint2c from gpu4pyscf.dft.numint2c import NumInt2C +from gpu4pyscf.dft.numint import NumInt from gpu4pyscf import dft from gpu4pyscf.dft import gen_grid @@ -65,43 +67,59 @@ def tearDownModule(): class KnownValues(unittest.TestCase): - # def test_eval_rho(self): - # np.random.seed(1) - # dm = np.random.random(dm0.shape) - # ni_gpu = NumInt2C() - # ni_cpu = pyscf_numint2c() - # for xctype in ('LDA', 'GGA', 'MGGA'): - # deriv = 1 - # if xctype == 'LDA': - # deriv = 0 - # ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) - # ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + def test_eval_rho(self): + np.random.seed(1) + dm = np.random.random(dm0.shape) + ni_gpu = NumInt2C() + ni_cpu = pyscf_numint2c() + for xctype in ('LDA', 'GGA', 'MGGA'): + deriv = 1 + if xctype == 'LDA': + deriv = 0 + ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) - # rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) - # ref = ni_cpu.eval_rho(mol, ao_cpu, dm, xctype=xctype, hermi=0, with_lapl=False) - # self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - - # rho = ni_gpu.eval_rho(mol, ao_gpu, dm0, xctype=xctype, hermi=1, with_lapl=False) - # ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) - # self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) - - # def test_eval_rho2(self): - # np.random.seed(1) - # mo_coeff_test = np.random.random(mo_coeff.shape) - # ni_gpu = NumInt2C() - # ni_gpu.collinear='m' - # ni_cpu = pyscf_numint2c() - # ni_cpu.collinear='m' - # for xctype in ('LDA', 'GGA', 'MGGA'): - # deriv = 1 - # if xctype == 'LDA': - # deriv = 0 - # ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) - # ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) + ref = ni_cpu.eval_rho(mol, ao_cpu, dm, xctype=xctype, hermi=0, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + rho = ni_gpu.eval_rho(mol, ao_gpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + def test_eval_rho2(self): + np.random.seed(1) + mo_coeff_test = np.random.random(mo_coeff.shape) + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + ni_cpu = pyscf_numint2c() + ni_cpu.collinear='m' + for xctype in ('LDA', 'GGA', 'MGGA'): + deriv = 1 + if xctype == 'LDA': + deriv = 0 + ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) - # rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) - # ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) - # self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + def test_get_rho(self): + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + + np.random.seed(1) + ni_gpu_1c = NumInt() + dm_test = np.random.random(dm0.shape) + dm_test = dm_test + dm_test.T + n2c = dm_test.shape[0] + nao = n2c//2 + dm_1c_test = dm_test[:nao,:nao] + dm_test[nao:,nao:] + rho_gpu = ni_gpu.get_rho(mol, dm_test, grids_gpu) + rho_1c_gpu = ni_gpu_1c.get_rho(mol, dm_1c_test, grids_gpu) + self.assertAlmostEqual(abs(rho_gpu.get() - rho_1c_gpu.get()).max(), 0, 10) + def test_eval_xc_eff(self): ni_gpu = NumInt2C() @@ -110,25 +128,13 @@ def test_eval_xc_eff(self): ni_cpu.collinear='m' np.random.seed(1) dm = dm0*1.0 - print(dm.shape, mol.nao) - print(dm.dtype) - for xc_code in (MGGA_M06, ): - # xctype = ni_gpu._xc_type(xc_code) - # deriv = 1 - # if xctype == 'LDA': - # deriv = 0 - # ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) - # rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) - # eval_xc_eff_gpu = ni_gpu.mcfun_eval_xc_adapter(xc_code) + for xc_code in (LDA, GGA_PBE, MGGA_M06): + ni_gpu.gdftopt = None n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, xc_code, dm) n_cpu, exc_cpu, vmat_cpu = ni_cpu.nr_vxc(mol, grids_cpu, xc_code, dm) - print("n_gpu", n_gpu) - print("n_cpu", n_cpu) - print("exc_gpu", exc_gpu) - print("exc_cpu", exc_cpu) - print("vmat_gpu", vmat_gpu) - print("vmat_cpu", vmat_cpu) - print() + self.assertAlmostEqual(abs(n_gpu.get() - n_cpu).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - exc_cpu).max(), 0, 10) + self.assertAlmostEqual(abs(vmat_gpu.get() - vmat_cpu).max(), 0, 10) if __name__ == "__main__": print("Full Tests for dft numint2c") From c26e035c21114d119d12e80f57699e2fda23b96d Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 5 Nov 2025 13:58:13 +0800 Subject: [PATCH 10/21] finish numint2c --- gpu4pyscf/dft/numint2c.py | 38 +------- gpu4pyscf/dft/tests/test_numint2c.py | 127 +++++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 42 deletions(-) diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index c4183fcf8..00c86dfae 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -135,7 +135,7 @@ def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, den = rho[0,0] * weight nelec += den.sum() excsum += cp.dot(den, exc) - vmat += fmat(mol, ao, weight, rho, vxc, mask_2c, shls_slice, + vmat += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi) elif xctype == 'HF': @@ -155,40 +155,7 @@ def _gks_mcol_fxc(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): '''Vxc matrix of non-collinear LDA''' # NOTE vxc in u/d representation - r, mx, my, mz = rho - vxc = xc_deriv.ud2ts(vxc) - vr, vs = vxc[:,0] - s = lib.norm(rho[1:4], axis=0) - - wv = weight * vr - with cp.errstate(divide='ignore',invalid='ignore'): - ws = vs * weight / s - ws[s < 1e-20] = 0 - - # * .5 because of v+v.conj().T in r_vxc - if hermi: - wv *= .5 - ws *= .5 - aow = None - aow = _scale_ao(ao, ws*mx, out=aow) # Mx - tmpx = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - aow = _scale_ao(ao, ws*my, out=aow) # My - tmpy = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - if hermi: - # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real - matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j - matab = cp.zeros_like(matba) - else: - # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex - matba = tmpx + tmpy * 1j - matab = tmpx - tmpy * 1j - tmpx = tmpy = None - aow = _scale_ao(ao, wv+ws*mz, out=aow) # Mz - mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - aow = _scale_ao(ao, wv-ws*mz, out=aow) # Mz - matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) - mat = cp.block([[mataa, matab], [matba, matbb]]) - return mat + raise NotImplementedError('non-collinear lda vxc mat') # * Mcfun requires functional derivatives to total-density and spin-density. @@ -526,6 +493,7 @@ def _init_xcfuns(self, xc_code, spin=0): mcfun_eval_xc_adapter = mcfun_eval_xc_adapter block_loop = numint.NumInt.block_loop + reset = numint.NumInt.reset def _to_numint1c(self): '''Converts to the associated class to handle collinear systems''' diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 4f41fe54d..7a1e11656 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -21,6 +21,8 @@ from pyscf.dft.numint import NumInt as pyscf_numint from gpu4pyscf.dft import Grids from gpu4pyscf.dft import numint2c +from gpu4pyscf.dft import numint +from pyscf.dft import numint2c as pyscf_numint2c_file from gpu4pyscf.dft.numint2c import NumInt2C from gpu4pyscf.dft.numint import NumInt from gpu4pyscf import dft @@ -69,7 +71,8 @@ class KnownValues(unittest.TestCase): def test_eval_rho(self): np.random.seed(1) - dm = np.random.random(dm0.shape) + dm = np.random.random(dm0.shape) + np.random.random(dm0.shape)*1.0j + dm = dm + dm.conj().T ni_gpu = NumInt2C() ni_cpu = pyscf_numint2c() for xctype in ('LDA', 'GGA', 'MGGA'): @@ -89,7 +92,7 @@ def test_eval_rho(self): def test_eval_rho2(self): np.random.seed(1) - mo_coeff_test = np.random.random(mo_coeff.shape) + mo_coeff_test = np.random.random(mo_coeff.shape) + np.random.random(mo_coeff.shape)*1.0j ni_gpu = NumInt2C() ni_gpu.collinear='m' ni_cpu = pyscf_numint2c() @@ -111,13 +114,14 @@ def test_get_rho(self): np.random.seed(1) ni_gpu_1c = NumInt() - dm_test = np.random.random(dm0.shape) - dm_test = dm_test + dm_test.T + dm_test = np.random.random(dm0.shape) + np.random.random(dm0.shape)*1.0j + dm_test = dm_test + dm_test.T.conj() + n2c = dm_test.shape[0] nao = n2c//2 dm_1c_test = dm_test[:nao,:nao] + dm_test[nao:,nao:] rho_gpu = ni_gpu.get_rho(mol, dm_test, grids_gpu) - rho_1c_gpu = ni_gpu_1c.get_rho(mol, dm_1c_test, grids_gpu) + rho_1c_gpu = ni_gpu_1c.get_rho(mol, dm_1c_test.real, grids_gpu) self.assertAlmostEqual(abs(rho_gpu.get() - rho_1c_gpu.get()).max(), 0, 10) @@ -127,8 +131,8 @@ def test_eval_xc_eff(self): ni_cpu = pyscf_numint2c() ni_cpu.collinear='m' np.random.seed(1) - dm = dm0*1.0 - for xc_code in (LDA, GGA_PBE, MGGA_M06): + dm = dm0*1.0 + 0.0j + for xc_code in (LDA, ): ni_gpu.gdftopt = None n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, xc_code, dm) n_cpu, exc_cpu, vmat_cpu = ni_cpu.nr_vxc(mol, grids_cpu, xc_code, dm) @@ -136,6 +140,115 @@ def test_eval_xc_eff(self): self.assertAlmostEqual(abs(exc_gpu.get() - exc_cpu).max(), 0, 10) self.assertAlmostEqual(abs(vmat_gpu.get() - vmat_cpu).max(), 0, 10) + def test_mcol_lda_vxc_mat(self): + xc_code = 'lda,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .0001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=0, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='LDA', hermi=1, with_lapl=False) + ni_CPU = pyscf_numint2c() + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_cpu = pyscf_numint2c_file._mcol_lda_vxc_mat(mol, ao.transpose(1,0).get(), weight, rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + + def test_mcol_gga_vxc_mat(self): + xc_code = 'pbe,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='GGA', hermi=1, with_lapl=False) + ni_CPU = pyscf_numint2c() + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_cpu = pyscf_numint2c_file._mcol_gga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + + def test_mcol_mgga_vxc_mat(self): + xc_code = 'tpss' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='MGGA', hermi=1, with_lapl=False) + ni_CPU = pyscf_numint2c() + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_cpu = pyscf_numint2c_file._mcol_mgga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + + if __name__ == "__main__": print("Full Tests for dft numint2c") unittest.main() From fce5bafa5511bf18c02e93932bab7cdd3d49bdfb Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 5 Nov 2025 14:01:07 +0800 Subject: [PATCH 11/21] change some codes of GKS --- gpu4pyscf/dft/gks.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/gpu4pyscf/dft/gks.py b/gpu4pyscf/dft/gks.py index 36b3f1ae6..018cf6293 100644 --- a/gpu4pyscf/dft/gks.py +++ b/gpu4pyscf/dft/gks.py @@ -15,7 +15,7 @@ import cupy as cp from pyscf import lib from pyscf.dft import gks -from gpu4pyscf.dft import numint +from gpu4pyscf.dft import numint2c from gpu4pyscf.dft import rks from gpu4pyscf.scf.ghf import GHF from gpu4pyscf.lib import logger @@ -129,16 +129,42 @@ def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): return vxc -class GKS(gks.GKS, GHF): +class GKS(rks.KohnShamDFT, GHF): from gpu4pyscf.lib.utils import to_cpu, to_gpu, device def __init__(self, mol, xc='LDA,VWN'): + raise NotImplementedError("GKS is not implemented") GHF.__init__(self, mol) rks.KohnShamDFT.__init__(self, xc) - # self._numint = NumInt2C() + self._numint = numint2c.NumInt2C() + + def dump_flags(self, verbose=None): + GHF.dump_flags(self, verbose) + rks.KohnShamDFT.dump_flags(self, verbose) + logger.info(self, 'collinear = %s', self._numint.collinear) + if self._numint.collinear[0] == 'm': + logger.info(self, 'mcfun spin_samples = %s', self._numint.spin_samples) + logger.info(self, 'mcfun collinear_thrd = %s', self._numint.collinear_thrd) + logger.info(self, 'mcfun collinear_samples = %s', self._numint.collinear_samples) + return self + + @property + def collinear(self): + return self._numint.collinear + @collinear.setter + def collinear(self, val): + self._numint.collinear = val + + @property + def spin_samples(self): + return self._numint.spin_samples + @spin_samples.setter + def spin_samples(self, val): + self._numint.spin_samples = val get_veff = get_veff reset = rks.RKS.reset energy_elec = rks.RKS.energy_elec nuc_grad_method = NotImplemented to_hf = NotImplemented + From a402df08e3dac6d6a4cbd3ed3654e93b733f0276 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 5 Nov 2025 14:02:08 +0800 Subject: [PATCH 12/21] change some typos --- gpu4pyscf/dft/numint2c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index 00c86dfae..fceca2286 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -21,7 +21,7 @@ import cupy as cp from pyscf import lib from gpu4pyscf.dft import numint, mcfun_gpu -from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao, _tau_dot +from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao from gpu4pyscf.dft import xc_deriv from pyscf import __config__ From 739f594e92a32e27387266695a5c027b4556311e Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 5 Nov 2025 14:30:14 +0800 Subject: [PATCH 13/21] fix a bug --- gpu4pyscf/dft/tests/test_numint2c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 7a1e11656..25e5514b1 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -38,7 +38,7 @@ def setUpModule(): basis = 'ccpvdz', charge = 1, spin = 1, # = 2S = spin_up - spin_down - # output = '/dev/null' + output = '/dev/null' ) np.random.seed(2) From 97bb2cf461bd2ef1ae6af1dc410acb84736ce26a Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 5 Nov 2025 16:50:07 +0800 Subject: [PATCH 14/21] add ghf support for complex --- gpu4pyscf/scf/ghf.py | 34 ++++++++---- gpu4pyscf/scf/hf.py | 4 ++ gpu4pyscf/scf/tests/test_ghf.py | 92 +++++++++++++++++++++++++++++++-- requirements.txt | 3 +- setup.py | 1 + 5 files changed, 119 insertions(+), 15 deletions(-) diff --git a/gpu4pyscf/scf/ghf.py b/gpu4pyscf/scf/ghf.py index cd8336d7f..8e8b3efee 100644 --- a/gpu4pyscf/scf/ghf.py +++ b/gpu4pyscf/scf/ghf.py @@ -85,6 +85,7 @@ def get_jk(self, mol=None, dm=None, hermi=0, with_j=True, with_k=True, return vj, vk def get_j(self, mol=None, dm=None, hermi=1, omega=None): + assert hermi == 1, 'hermi must be 1' dm = asarray(dm) dm_shape = dm.shape nso = dm.shape[-1] @@ -92,8 +93,8 @@ def get_j(self, mol=None, dm=None, hermi=1, omega=None): dm = dm.reshape(-1,nso,nso) n_dm = dm.shape[0] dm = dm[:,:nao,:nao] + dm[:,nao:,nao:] - jtmp = hf.SCF.get_j(self, mol, dm, hermi, omega) - vj = cp.zeros((n_dm,nso,nso)) + jtmp = hf.SCF.get_j(self, mol, dm.real, hermi, omega) + vj = cp.zeros((n_dm,nso,nso), dtype=dm.dtype) vj[:,:nao,:nao] = vj[:,nao:,nao:] = jtmp return vj.reshape(dm_shape) @@ -108,14 +109,27 @@ def get_k(self, mol=None, dm=None, hermi=1, omega=None): dmbb = dm[:,nao:,nao:] dmab = dm[:,:nao,nao:] dmba = dm[:,nao:,:nao] - dm = cp.vstack((dmaa, dmbb, dmab, dmba)) - ktmp = super().get_k(mol, dm, hermi=0, omega=omega) - ktmp = ktmp.reshape(4,n_dm,nao,nao) - vk = cp.zeros((n_dm,nso,nso), dm.dtype) - vk[:,:nao,:nao] = ktmp[0] - vk[:,nao:,nao:] = ktmp[1] - vk[:,:nao,nao:] = ktmp[2] - vk[:,nao:,:nao] = ktmp[3] + if dm.dtype == cp.complex128: + dm_real = cp.vstack((dmaa.real, dmbb.real, dmab.real, dmba.real)) + ktmp_real = super().get_k(mol, dm_real, hermi=0, omega=omega) + ktmp_real = ktmp_real.reshape(4,n_dm,nao,nao) + dm_imag = cp.vstack((dmaa.imag, dmbb.imag, dmab.imag, dmba.imag)) + ktmp_imag = super().get_k(mol, dm_imag, hermi=0, omega=omega) + ktmp_imag = ktmp_imag.reshape(4,n_dm,nao,nao) + vk = cp.zeros((n_dm,nso,nso), dm.dtype) + vk[:,:nao,:nao] = ktmp_real[0] + 1j*ktmp_imag[0] + vk[:,nao:,nao:] = ktmp_real[1] + 1j*ktmp_imag[1] + vk[:,:nao,nao:] = ktmp_real[2] + 1j*ktmp_imag[2] + vk[:,nao:,:nao] = ktmp_real[3] + 1j*ktmp_imag[3] + else: + dm = cp.vstack((dmaa, dmbb, dmab, dmba)) + ktmp = super().get_k(mol, dm, hermi=0, omega=omega) + ktmp = ktmp.reshape(4,n_dm,nao,nao) + vk = cp.zeros((n_dm,nso,nso), dm.dtype) + vk[:,:nao,:nao] = ktmp[0] + vk[:,nao:,nao:] = ktmp[1] + vk[:,:nao,nao:] = ktmp[2] + vk[:,nao:,:nao] = ktmp[3] return vk.reshape(dm_shape) def get_veff(mf, mol=None, dm=None, dm_last=None, vhf_last=None, hermi=1): diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py index f384f78dd..fbce65008 100644 --- a/gpu4pyscf/scf/hf.py +++ b/gpu4pyscf/scf/hf.py @@ -145,6 +145,8 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if dm is None: dm = mf.make_rdm1() s1e = cupy.asarray(s1e) dm = cupy.asarray(dm) + if dm.dtype == cupy.complex128: + s1e = s1e.astype(cupy.complex128) if diis_start_cycle is None: diis_start_cycle = mf.diis_start_cycle if level_shift_factor is None: @@ -661,6 +663,8 @@ def eig(self, fock, s): x = None if hasattr(self, 'overlap_canonical_decomposed_x') and self.overlap_canonical_decomposed_x is not None: x = cupy.asarray(self.overlap_canonical_decomposed_x) + if fock.dtype == cupy.complex128: + s = s.astype(cupy.complex128) if x is None: mo_energy, mo_coeff = eigh(fock, s) return mo_energy, mo_coeff diff --git a/gpu4pyscf/scf/tests/test_ghf.py b/gpu4pyscf/scf/tests/test_ghf.py index 3bea9800a..b8dda3305 100644 --- a/gpu4pyscf/scf/tests/test_ghf.py +++ b/gpu4pyscf/scf/tests/test_ghf.py @@ -14,19 +14,103 @@ import unittest import pyscf +import numpy +import cupy as cp + +def setUpModule(): + global mol, mol1 + mol = pyscf.gto.Mole() + mol.atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''' + mol.spin = None + mol.basis = 'cc-pvdz' + mol.verbose = 7 + mol.output = '/dev/null' + mol.build() + + mol1 = pyscf.gto.M( + verbose = 0, + atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''', + charge = 1, + spin = 1, + basis = 'cc-pvdz', + output = '/dev/null') + +def tearDownModule(): + global mol, mol1 + mol.stdout.close() + mol1.stdout.close() + del mol, mol1 class KnownValues(unittest.TestCase): def test_ghf_scf(self): - mol = pyscf.M(atom=''' -O 0 0 0 -H 0 -0.757 0.587 -H 0 0.757 0.587''', basis = 'cc-pvdz') mf = mol.GHF().to_gpu() assert mf.device == 'gpu' e_tot = mf.kernel() e_ref = mf.to_cpu().kernel() assert abs(e_tot - e_ref) < 1e-5 + def test_ghf_scf(self): + mf = mol.GHF().to_gpu() + assert mf.device == 'gpu' + e_tot = mf.kernel() + dm1 = mf.make_rdm1() + numpy.random.seed(1) + n2c = mol.nao_nr() * 2 + dm_perturb = numpy.random.random((n2c,n2c)) + 1j*numpy.random.random((n2c,n2c)) + dm_perturb = dm_perturb + dm_perturb.T.conj() + dm_perturb = cp.asarray(dm_perturb) + dm_1 = dm1 + dm_perturb*0.001 + e_ref = mf.to_cpu().kernel(dm_1.get()) + e_tot = mf.kernel(dm_1) + assert abs(e_tot - e_ref) < 1e-5 + + def test_get_jk_complex(self): + mf = mol.GHF().to_gpu() + nao = mol.nao_nr()*2 + numpy.random.seed(1) + d1 = numpy.random.random((nao,nao)) + 1j*numpy.random.random((nao,nao)) + d = d1 + d1.T.conj() + d_real = d.real + d_imag = d.imag + vj_gpu = mf.get_j(mol, d_real) + vk_gpu = mf.get_k(mol, d) + + mf_cpu = mf.to_cpu() + vj_cpu = mf_cpu.get_j(mol, d_real) + vk_cpu = mf_cpu.get_k(mol, d) + + assert numpy.allclose(vj_gpu, vj_cpu) + assert numpy.allclose(vk_gpu, vk_cpu) + + def test_get_jk_real(self): + mf = mol.GHF().to_gpu() + + nao = mol.nao_nr()*2 + numpy.random.seed(1) + d1 = numpy.random.random((nao,nao)) + d = d1 + d1.T.conj() + d_real = d.real + d_imag = d.imag + vj_gpu = mf.get_j(mol, d_real) + vk_gpu = mf.get_k(mol, d) + print("vj_gpu", vj_gpu) + print("vk_gpu", vk_gpu) + + mf_cpu = mf.to_cpu() + vj_cpu = mf_cpu.get_j(mol, d_real) + vk_cpu = mf_cpu.get_k(mol, d) + print("vj_cpu", vj_cpu) + print("vk_cpu", vk_cpu) + + assert numpy.allclose(vj_gpu, vj_cpu) + assert numpy.allclose(vk_gpu, vk_cpu) + #def test_ghf_x2c(self): # pass diff --git a/requirements.txt b/requirements.txt index 70660747d..6ef95eab3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ cupy-cuda12x==13.4.1 pyscf==2.8.0 basis-set-exchange==0.11 pyscf-dispersion==1.3.0 -geometric==1.1.0 \ No newline at end of file +geometric==1.1.0 +MCfun==0.2.5 \ No newline at end of file diff --git a/setup.py b/setup.py index 111e4f3c2..2070dfd4b 100755 --- a/setup.py +++ b/setup.py @@ -139,5 +139,6 @@ def initialize_with_default_plat_name(self): f'cupy-cuda{CUDA_VERSION}>=13.0,!=13.4.0', # Due to expm in cupyx.scipy.linalg and cutensor 2.0 'geometric', f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.5', + 'MCfun==0.2.5', ] ) From dab3064b88e7efcda440a4babc583aa288ef93d8 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Wed, 5 Nov 2025 16:53:23 +0800 Subject: [PATCH 15/21] fix some typos --- gpu4pyscf/scf/tests/test_ghf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gpu4pyscf/scf/tests/test_ghf.py b/gpu4pyscf/scf/tests/test_ghf.py index b8dda3305..2a9fce3b6 100644 --- a/gpu4pyscf/scf/tests/test_ghf.py +++ b/gpu4pyscf/scf/tests/test_ghf.py @@ -55,7 +55,7 @@ def test_ghf_scf(self): e_ref = mf.to_cpu().kernel() assert abs(e_tot - e_ref) < 1e-5 - def test_ghf_scf(self): + def test_ghf_scf_complex_dm(self): mf = mol.GHF().to_gpu() assert mf.device == 'gpu' e_tot = mf.kernel() @@ -77,7 +77,6 @@ def test_get_jk_complex(self): d1 = numpy.random.random((nao,nao)) + 1j*numpy.random.random((nao,nao)) d = d1 + d1.T.conj() d_real = d.real - d_imag = d.imag vj_gpu = mf.get_j(mol, d_real) vk_gpu = mf.get_k(mol, d) @@ -96,7 +95,6 @@ def test_get_jk_real(self): d1 = numpy.random.random((nao,nao)) d = d1 + d1.T.conj() d_real = d.real - d_imag = d.imag vj_gpu = mf.get_j(mol, d_real) vk_gpu = mf.get_k(mol, d) print("vj_gpu", vj_gpu) From eb660f9611cca50015fdb750e2b182398e5f1de5 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Thu, 6 Nov 2025 16:44:26 +0800 Subject: [PATCH 16/21] finish GKS tests --- gpu4pyscf/dft/gks.py | 13 ++- gpu4pyscf/dft/numint2c.py | 33 +++--- gpu4pyscf/dft/tests/test_gks.py | 162 ++++++++++++++++++++++++--- gpu4pyscf/dft/tests/test_numint2c.py | 4 +- gpu4pyscf/scf/ghf.py | 11 +- gpu4pyscf/scf/tests/test_ghf.py | 17 ++- 6 files changed, 198 insertions(+), 42 deletions(-) diff --git a/gpu4pyscf/dft/gks.py b/gpu4pyscf/dft/gks.py index 018cf6293..bd9259a6f 100644 --- a/gpu4pyscf/dft/gks.py +++ b/gpu4pyscf/dft/gks.py @@ -19,6 +19,7 @@ from gpu4pyscf.dft import rks from gpu4pyscf.scf.ghf import GHF from gpu4pyscf.lib import logger +from gpu4pyscf.lib import utils from gpu4pyscf.lib.cupy_helper import tag_array @@ -65,6 +66,8 @@ def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): else: max_memory = ks.max_memory - lib.current_memory()[0] ni = ks._numint + if ni.collinear[0].lower() != 'm': + raise NotImplementedError('Only multi-colinear GKS is implemented') n, exc, vxc = ni.get_vxc(mol, ks.grids, ks.xc, dm, hermi=hermi, max_memory=max_memory) logger.debug(ks, 'nelec by numeric integration = %s', n) @@ -130,10 +133,10 @@ def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): class GKS(rks.KohnShamDFT, GHF): - from gpu4pyscf.lib.utils import to_cpu, to_gpu, device + to_gpu = utils.to_gpu + device = utils.device def __init__(self, mol, xc='LDA,VWN'): - raise NotImplementedError("GKS is not implemented") GHF.__init__(self, mol) rks.KohnShamDFT.__init__(self, xc) self._numint = numint2c.NumInt2C() @@ -167,4 +170,8 @@ def spin_samples(self, val): energy_elec = rks.RKS.energy_elec nuc_grad_method = NotImplemented to_hf = NotImplemented - + + def to_cpu(self): + mf = gks.GKS(self.mol) + utils.to_cpu(self, out=mf) + return mf diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index fceca2286..cdc78e087 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -20,9 +20,11 @@ import numpy as np import cupy as cp from pyscf import lib +from pyscf.dft import numint2c from gpu4pyscf.dft import numint, mcfun_gpu from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao from gpu4pyscf.dft import xc_deriv +from gpu4pyscf.lib import utils from pyscf import __config__ @@ -382,6 +384,9 @@ def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): class NumInt2C(lib.StreamObject, numint.LibXCMixin): '''Numerical integration methods for 2-component basis (used by GKS)''' _keys = {'gdftopt'} + to_gpu = utils.to_gpu + device = utils.device + gdftopt = None # collinear schemes: @@ -446,33 +451,23 @@ def get_rho(self, mol, dm, grids, max_memory=2000): _gks_mcol_fxc = _gks_mcol_fxc @lib.with_doc(numint.nr_rks.__doc__) - def nr_vxc(self, mol, grids, xc_code, dms, spin=0, relativity=0, hermi=1, + def nr_vxc(self, mol, grids, xc_code, dms, relativity=0, hermi=1, max_memory=2000, verbose=None): if not isinstance(dms, cp.ndarray): dms = cp.asarray(dms) - if self.collinear[0] in ('m', 'n'): # mcol or ncol + if self.collinear[0] in ('m',): # mcol or ncol opt = getattr(self, 'gdftopt', None) if opt is None: self.build(mol, grids.coords) opt = self.gdftopt - assert dms.ndim == 2 - dms = cp.asarray(dms) - dms = opt.sort_orbitals_2c(dms, axis=[0,1]) + assert dms.ndim == 2 + dms = cp.asarray(dms) + dms = opt.sort_orbitals_2c(dms, axis=[0,1]) n, exc, vmat = self._gks_mcol_vxc(mol, grids, xc_code, dms, relativity, hermi, max_memory, verbose) vmat = opt.unsort_orbitals_2c(vmat, axis=[0,1]) else: - nao = dms.shape[-1] // 2 - # ground state density is always real - dm_a = dms[...,:nao,:nao].real.copy('C') - dm_b = dms[...,nao:,nao:].real.copy('C') - dm1 = (dm_a, dm_b) - ni = self._to_numint1c() - n, exc, v = ni.nr_uks(mol, grids, xc_code, dm1, relativity, - hermi, max_memory, verbose) - vmat = cp.zeros_like(dms) - vmat[...,:nao,:nao] = v[0] - vmat[...,nao:,nao:] = v[1] + raise NotImplementedError("Locally collinear and collinear is not implemented") return n.sum(), exc, vmat get_vxc = nr_gks_vxc = nr_vxc @@ -497,4 +492,8 @@ def _init_xcfuns(self, xc_code, spin=0): def _to_numint1c(self): '''Converts to the associated class to handle collinear systems''' - return self.view(numint.NumInt) \ No newline at end of file + return self.view(numint.NumInt) + + def to_cpu(self): + ni = numint2c.NumInt2C() + return ni \ No newline at end of file diff --git a/gpu4pyscf/dft/tests/test_gks.py b/gpu4pyscf/dft/tests/test_gks.py index 6a5e12d83..d9e420b45 100644 --- a/gpu4pyscf/dft/tests/test_gks.py +++ b/gpu4pyscf/dft/tests/test_gks.py @@ -12,17 +12,153 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pickle -import numpy as np import unittest -import pyscf -from gpu4pyscf.dft import rks - -atom = ''' -O 0.0000000000 -0.0000000000 0.1174000000 -H -0.7570000000 -0.0000000000 -0.4696000000 -H 0.7570000000 0.0000000000 -0.4696000000 -''' -bas='def2-tzvpp' -grids_level = 5 -nlcgrids_level = 2 \ No newline at end of file +from pyscf import gto +from pyscf import lib +from gpu4pyscf.dft import gks +from pyscf.dft import gks as gks_cpu + + +def setUpModule(): + global mol, mol1 + mol = gto.Mole() + mol.atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''' + mol.spin = None + mol.basis = 'sto3g' + mol.output = '/dev/null' + mol.build() + + mol1 = gto.M( + atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''', + charge = 1, + spin = 1, + basis = 'sto3g', + output = '/dev/null' + ) + + +def tearDownModule(): + global mol, mol1 + mol.stdout.close() + mol1.stdout.close() + del mol, mol1 + + +class KnownValues(unittest.TestCase): + def test_mcol_gks_lda(self): + mf_cpu = gks_cpu.GKS(mol) + mf_cpu.xc = 'lda,' + mf_cpu.collinear = 'mcol' + mf_cpu._numint.spin_samples = 6 + eks4_cpu = mf_cpu.kernel() + + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'lda,' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, eks4_cpu) + self.assertAlmostEqual(eks4_gpu, -74.0600297733097, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -26.421986983504258, 5) + + mf_cpu = gks_cpu.GKS(mol1) + mf_cpu.xc = 'lda,vwn' + mf_cpu.collinear = 'mcol' + mf_cpu._numint.spin_samples = 50 + eks4_cpu = mf_cpu.kernel() + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'lda,vwn' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, eks4_cpu, 6) + self.assertAlmostEqual(eks4_gpu, -74.3741809222222, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) + + def test_mcol_gks_gga(self): + mf_cpu = gks_cpu.GKS(mol) + mf_cpu.xc = 'pbe' + mf_cpu.collinear = 'mcol' + mf_cpu._numint.spin_samples = 6 + eks4_cpu = mf_cpu.kernel() + + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'pbe' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, eks4_cpu) + self.assertAlmostEqual(eks4_gpu, -75.2256398121708, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -26.81184613393452, 5) + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'pbe' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.869954771937, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.92164954706164, 5) # pyscf result + + def test_mcol_gks_hyb(self): + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'b3lyp' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -75.312587317089, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.2469582128507, 5) # pyscf result + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'b3lyp' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.9528036305753, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -28.49145406025193, 5) # pyscf result + + def test_mcol_gks_mgga(self): + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'm06l' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -75.3053691716776, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.03099891671804, 5) # pyscf result + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'm06l' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.9468853267496, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -28.188215296679516, 5) # pyscf result + + def test_to_cpu(self): + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'lda,vwn' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + + mf_cpu = mf_gpu.to_cpu() + eks4_cpu = mf_cpu.kernel() + + self.assertAlmostEqual(eks4_gpu, eks4_cpu, 6) + self.assertAlmostEqual(eks4_gpu, -74.3741809222222, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) + + +if __name__ == "__main__": + print("Test GKS") + unittest.main() diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 25e5514b1..e9a69b337 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -124,14 +124,14 @@ def test_get_rho(self): rho_1c_gpu = ni_gpu_1c.get_rho(mol, dm_1c_test.real, grids_gpu) self.assertAlmostEqual(abs(rho_gpu.get() - rho_1c_gpu.get()).max(), 0, 10) - def test_eval_xc_eff(self): ni_gpu = NumInt2C() ni_gpu.collinear='m' ni_cpu = pyscf_numint2c() ni_cpu.collinear='m' np.random.seed(1) - dm = dm0*1.0 + 0.0j + dm = dm0*1.0 + dm0 * 0.1j + dm = dm + dm.T.conj() for xc_code in (LDA, ): ni_gpu.gdftopt = None n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, xc_code, dm) diff --git a/gpu4pyscf/scf/ghf.py b/gpu4pyscf/scf/ghf.py index 8e8b3efee..a2fcc5613 100644 --- a/gpu4pyscf/scf/ghf.py +++ b/gpu4pyscf/scf/ghf.py @@ -22,6 +22,15 @@ from gpu4pyscf.lib.cupy_helper import asarray, return_cupy_array from gpu4pyscf.lib import utils +def _from_rhf_init_dm(dma, breaksym=True): + dma = dma * .5 + dm = block_diag(dma, dma) + if breaksym: + nao = dma.shape[0] + idx, idy = cp.diag_indices(nao) + dm[idx+nao,idy] = dm[idx,idy+nao] = dma.diagonal() * .05 + return dm + class GHF(hf.SCF): to_gpu = utils.to_gpu device = utils.device @@ -55,7 +64,7 @@ class GHF(hf.SCF): def get_init_guess(self, mol=None, key='minao', **kwargs): dma = hf.RHF.get_init_guess(self, mol, key, **kwargs) - return block_diag(dma, dma) + return _from_rhf_init_dm(dma) def get_hcore(self, mol=None): if mol is None: mol = self.mol diff --git a/gpu4pyscf/scf/tests/test_ghf.py b/gpu4pyscf/scf/tests/test_ghf.py index 2a9fce3b6..f65fe15bc 100644 --- a/gpu4pyscf/scf/tests/test_ghf.py +++ b/gpu4pyscf/scf/tests/test_ghf.py @@ -17,6 +17,7 @@ import numpy import cupy as cp + def setUpModule(): global mol, mol1 mol = pyscf.gto.Mole() @@ -41,6 +42,7 @@ def setUpModule(): basis = 'cc-pvdz', output = '/dev/null') + def tearDownModule(): global mol, mol1 mol.stdout.close() @@ -97,20 +99,23 @@ def test_get_jk_real(self): d_real = d.real vj_gpu = mf.get_j(mol, d_real) vk_gpu = mf.get_k(mol, d) - print("vj_gpu", vj_gpu) - print("vk_gpu", vk_gpu) mf_cpu = mf.to_cpu() vj_cpu = mf_cpu.get_j(mol, d_real) vk_cpu = mf_cpu.get_k(mol, d) - print("vj_cpu", vj_cpu) - print("vk_cpu", vk_cpu) assert numpy.allclose(vj_gpu, vj_cpu) assert numpy.allclose(vk_gpu, vk_cpu) - #def test_ghf_x2c(self): - # pass + def test_to_cpu(self): + mf = mol.GHF().to_gpu() + assert mf.device == 'gpu' + e_tot = mf.kernel() + mf = mf.to_cpu() + assert mf.device == 'cpu' + e_ref = mf.kernel() + assert abs(e_tot - e_ref) < 1e-5 + if __name__ == "__main__": print("Full Tests for ghf") From 0d37aee245b68b2f8b87803b2de8184d7a4bac2a Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 7 Nov 2025 10:28:29 +0800 Subject: [PATCH 17/21] fix a bug --- gpu4pyscf/scf/tests/test_ghf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu4pyscf/scf/tests/test_ghf.py b/gpu4pyscf/scf/tests/test_ghf.py index f65fe15bc..d82b77e2e 100644 --- a/gpu4pyscf/scf/tests/test_ghf.py +++ b/gpu4pyscf/scf/tests/test_ghf.py @@ -112,7 +112,7 @@ def test_to_cpu(self): assert mf.device == 'gpu' e_tot = mf.kernel() mf = mf.to_cpu() - assert mf.device == 'cpu' + assert getattr(mf, 'device', None) is None e_ref = mf.kernel() assert abs(e_tot - e_ref) < 1e-5 From 5c36b0539be32c778b5e398e5eb5f54d926642c0 Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 7 Nov 2025 11:27:03 +0800 Subject: [PATCH 18/21] change the tests --- .github/workflows/unittest.yml | 2 +- gpu4pyscf/dft/tests/test_gks.py | 29 +---- gpu4pyscf/dft/tests/test_numint2c.py | 168 ++++++++++++++++++++++++--- requirements.txt | 3 +- setup.py | 1 - 5 files changed, 162 insertions(+), 41 deletions(-) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index bae786e4b..3f93144da 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -33,7 +33,7 @@ jobs: -v $GITHUB_WORKSPACE:/workspace \ -v ~/.cache/pip:/root/.cache/pip \ pyscf/gpu4pyscf-devel:pyscf-2.8 \ - /bin/bash -c "cd /workspace && pip3 install -r requirements.txt && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache" + /bin/bash -c "cd /workspace && pip3 install -r requirements.txt && pip3 install MCfun && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache" multi-gpu: runs-on: [self-hosted, Linux, X64, 2T4] diff --git a/gpu4pyscf/dft/tests/test_gks.py b/gpu4pyscf/dft/tests/test_gks.py index d9e420b45..74a999f93 100644 --- a/gpu4pyscf/dft/tests/test_gks.py +++ b/gpu4pyscf/dft/tests/test_gks.py @@ -17,6 +17,10 @@ from pyscf import lib from gpu4pyscf.dft import gks from pyscf.dft import gks as gks_cpu +try: + import mcfun +except ImportError: + mcfun = None def setUpModule(): @@ -52,53 +56,31 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_mcol_gks_lda(self): - mf_cpu = gks_cpu.GKS(mol) - mf_cpu.xc = 'lda,' - mf_cpu.collinear = 'mcol' - mf_cpu._numint.spin_samples = 6 - eks4_cpu = mf_cpu.kernel() - + mf_gpu = gks.GKS(mol) mf_gpu.xc = 'lda,' mf_gpu.collinear = 'mcol' mf_gpu._numint.spin_samples = 6 eks4_gpu = mf_gpu.kernel() - self.assertAlmostEqual(eks4_gpu, eks4_cpu) self.assertAlmostEqual(eks4_gpu, -74.0600297733097, 6) - self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -26.421986983504258, 5) - mf_cpu = gks_cpu.GKS(mol1) - mf_cpu.xc = 'lda,vwn' - mf_cpu.collinear = 'mcol' - mf_cpu._numint.spin_samples = 50 - eks4_cpu = mf_cpu.kernel() - mf_gpu = gks.GKS(mol1) mf_gpu.xc = 'lda,vwn' mf_gpu.collinear = 'mcol' mf_gpu._numint.spin_samples = 50 eks4_gpu = mf_gpu.kernel() - self.assertAlmostEqual(eks4_gpu, eks4_cpu, 6) self.assertAlmostEqual(eks4_gpu, -74.3741809222222, 6) - self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) def test_mcol_gks_gga(self): - mf_cpu = gks_cpu.GKS(mol) - mf_cpu.xc = 'pbe' - mf_cpu.collinear = 'mcol' - mf_cpu._numint.spin_samples = 6 - eks4_cpu = mf_cpu.kernel() mf_gpu = gks.GKS(mol) mf_gpu.xc = 'pbe' mf_gpu.collinear = 'mcol' mf_gpu._numint.spin_samples = 6 eks4_gpu = mf_gpu.kernel() - self.assertAlmostEqual(eks4_gpu, eks4_cpu) self.assertAlmostEqual(eks4_gpu, -75.2256398121708, 6) - self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -26.81184613393452, 5) mf_gpu = gks.GKS(mol1) @@ -143,6 +125,7 @@ def test_mcol_gks_mgga(self): self.assertAlmostEqual(eks4_gpu, -74.9468853267496, 6) # pyscf result self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -28.188215296679516, 5) # pyscf result + @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_to_cpu(self): mf_gpu = gks.GKS(mol1) mf_gpu.xc = 'lda,vwn' diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index e9a69b337..c73cedbd5 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -27,6 +27,10 @@ from gpu4pyscf.dft.numint import NumInt from gpu4pyscf import dft from gpu4pyscf.dft import gen_grid +try: + import mcfun +except ImportError: + mcfun = None def setUpModule(): global mol, grids_cpu, grids_gpu, dm, dm0, dm1, mo_occ, mo_coeff @@ -124,6 +128,7 @@ def test_get_rho(self): rho_1c_gpu = ni_gpu_1c.get_rho(mol, dm_1c_test.real, grids_gpu) self.assertAlmostEqual(abs(rho_gpu.get() - rho_1c_gpu.get()).max(), 0, 10) + @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_eval_xc_eff(self): ni_gpu = NumInt2C() ni_gpu.collinear='m' @@ -132,14 +137,39 @@ def test_eval_xc_eff(self): np.random.seed(1) dm = dm0*1.0 + dm0 * 0.1j dm = dm + dm.T.conj() - for xc_code in (LDA, ): - ni_gpu.gdftopt = None + for xc_code in (LDA, GGA_PBE, MGGA_M06): n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, xc_code, dm) n_cpu, exc_cpu, vmat_cpu = ni_cpu.nr_vxc(mol, grids_cpu, xc_code, dm) self.assertAlmostEqual(abs(n_gpu.get() - n_cpu).max(), 0, 10) self.assertAlmostEqual(abs(exc_gpu.get() - exc_cpu).max(), 0, 10) self.assertAlmostEqual(abs(vmat_gpu.get() - vmat_cpu).max(), 0, 10) + def test_eval_xc_eff_fp(self): + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + np.random.seed(1) + dm = dm0*1.0 + dm0 * 0.1j + dm = dm + dm.T.conj() + + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, LDA, dm) + self.assertAlmostEqual(abs(n_gpu.get() - 17.9999262659497).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - -1.310501342423071).max(), 0, 10) + self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) + - (-0.20448306536588537-6.75460139752253e-21j)).max(), 0, 10) + + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, GGA_PBE, dm) + self.assertAlmostEqual(abs(n_gpu.get() - 17.9999262659497).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - -0.7237150857425112).max(), 0, 10) + self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) + - (-0.05446425800187435-4.486282070082083e-21j)).max(), 0, 10) + + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, MGGA_M06, dm) + self.assertAlmostEqual(abs(n_gpu.get() - 17.9999262659497).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - -0.7703982586705045).max(), 0, 10) + self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) + - (-0.18688247306409317+7.50400133342109e-20j)).max(), 0, 10) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_mcol_lda_vxc_mat(self): xc_code = 'lda,' @@ -168,14 +198,51 @@ def test_mcol_lda_vxc_mat(self): mask_gpu = cupy.array([i for i in range(mol.nbas)]) shls_slice = (0, mol.nbas) - v0_cpu = pyscf_numint2c_file._mcol_lda_vxc_mat(mol, ao.transpose(1,0).get(), weight, rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) - v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) - v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) + v0_cpu = pyscf_numint2c_file._mcol_lda_vxc_mat(mol, ao.transpose(1,0).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) v1_gpu = v1_gpu + v1_gpu.conj().T - self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + def test_mcol_lda_vxc_mat_fp(self): + xc_code = 'lda,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .0001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=0, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='LDA', hermi=1, with_lapl=False) + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - + (-9.596802282363786+0.000531850990220307j)).max(), 0, 13) + self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - + (-9.596802282363786+0.000531850990220307j)).max(), 0, 13) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_mcol_gga_vxc_mat(self): xc_code = 'pbe,' @@ -204,14 +271,51 @@ def test_mcol_gga_vxc_mat(self): mask_gpu = cupy.array([i for i in range(mol.nbas)]) shls_slice = (0, mol.nbas) - v0_cpu = pyscf_numint2c_file._mcol_gga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) - v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) - v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) + v0_cpu = pyscf_numint2c_file._mcol_gga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) v1_gpu = v1_gpu + v1_gpu.conj().T - self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + def test_mcol_gga_vxc_mat_fp(self): + xc_code = 'pbe,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='GGA', hermi=1, with_lapl=False) + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - + (-9.624252836509262+0.005273283360578891j)).max(), 0, 13) + self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - + (-9.624252836509262+0.005273283360578891j)).max(), 0, 13) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_mcol_mgga_vxc_mat(self): xc_code = 'tpss' @@ -240,14 +344,50 @@ def test_mcol_mgga_vxc_mat(self): mask_gpu = cupy.array([i for i in range(mol.nbas)]) shls_slice = (0, mol.nbas) - v0_cpu = pyscf_numint2c_file._mcol_mgga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) - v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) - v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) + v0_cpu = pyscf_numint2c_file._mcol_mgga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) v1_gpu = v1_gpu + v1_gpu.conj().T - self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + def test_mcol_mgga_vxc_mat_fp(self): + xc_code = 'tpss' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='MGGA', hermi=1, with_lapl=False) + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) + - (-11.359687631112195+6.13828986953566e-22j)).max(), 0, 13) + self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) + - (-11.359687631112195+6.13828986953566e-22j)).max(), 0, 13) + if __name__ == "__main__": print("Full Tests for dft numint2c") diff --git a/requirements.txt b/requirements.txt index 6ef95eab3..70660747d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,4 @@ cupy-cuda12x==13.4.1 pyscf==2.8.0 basis-set-exchange==0.11 pyscf-dispersion==1.3.0 -geometric==1.1.0 -MCfun==0.2.5 \ No newline at end of file +geometric==1.1.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 2070dfd4b..111e4f3c2 100755 --- a/setup.py +++ b/setup.py @@ -139,6 +139,5 @@ def initialize_with_default_plat_name(self): f'cupy-cuda{CUDA_VERSION}>=13.0,!=13.4.0', # Due to expm in cupyx.scipy.linalg and cutensor 2.0 'geometric', f'gpu4pyscf-libxc-cuda{CUDA_VERSION}==0.5', - 'MCfun==0.2.5', ] ) From 44e1ba8a3a186445d1461f7e9daeb9cd812a7e1d Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 7 Nov 2025 11:30:08 +0800 Subject: [PATCH 19/21] fix some typos --- gpu4pyscf/dft/tests/test_numint2c.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index c73cedbd5..4eb5ff678 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -228,7 +228,6 @@ def test_mcol_lda_vxc_mat_fp(self): ni_GPU.collinear = 'mcol' eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] - mask = np.ones((8, mol.nbas), dtype=np.uint8) mask_gpu = cupy.array([i for i in range(mol.nbas)]) shls_slice = (0, mol.nbas) @@ -301,7 +300,6 @@ def test_mcol_gga_vxc_mat_fp(self): ni_GPU.collinear = 'mcol' eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] - mask = np.ones((8, mol.nbas), dtype=np.uint8) mask_gpu = cupy.array([i for i in range(mol.nbas)]) shls_slice = (0, mol.nbas) @@ -374,7 +372,6 @@ def test_mcol_mgga_vxc_mat_fp(self): ni_GPU.collinear = 'mcol' eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] - mask = np.ones((8, mol.nbas), dtype=np.uint8) mask_gpu = cupy.array([i for i in range(mol.nbas)]) shls_slice = (0, mol.nbas) From 2684db5e0bcb8f82a64f47bedad04f6d6048fbfc Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 14 Nov 2025 10:48:13 +0800 Subject: [PATCH 20/21] change the codes as review comments --- gpu4pyscf/dft/gks.py | 37 +++-- gpu4pyscf/dft/numint2c.py | 81 ++++++++++- gpu4pyscf/dft/tests/test_gks.py | 16 +++ gpu4pyscf/dft/tests/test_numint2c.py | 201 ++++++++++----------------- gpu4pyscf/scf/diis.py | 2 + gpu4pyscf/scf/hf.py | 2 - 6 files changed, 187 insertions(+), 152 deletions(-) diff --git a/gpu4pyscf/dft/gks.py b/gpu4pyscf/dft/gks.py index bd9259a6f..cb37c146b 100644 --- a/gpu4pyscf/dft/gks.py +++ b/gpu4pyscf/dft/gks.py @@ -84,47 +84,46 @@ def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): logger.debug(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) - incremental_jk = (ks._eri is None and ks.direct_scf and - getattr(vhf_last, 'vj', None) is not None) - if incremental_jk: - _dm = cp.asarray(dm) - cp.asarray(dm_last) - else: - _dm = dm + + dm_orig = cp.asarray(dm) + vj_last = getattr(vhf_last, 'vj', None) + if vj_last is not None: + dm = cp.asarray(dm) - cp.asarray(dm_last) if not ni.libxc.is_hybrid_xc(ks.xc): vk = None - vj = ks.get_j(mol, _dm, hermi) - if incremental_jk: - vj += vhf_last.vj + vj = ks.get_j(mol, dm, hermi) + if vj_last is not None: + vj += vj_last vxc += vj else: omega, alpha, hyb = ni.rsh_and_hybrid_coeff(ks.xc, spin=mol.spin) if omega == 0: - vj, vk = ks.get_jk(mol, _dm, hermi) + vj, vk = ks.get_jk(mol, dm, hermi) vk *= hyb elif alpha == 0: # LR=0, only SR exchange - vj = ks.get_j(mol, _dm, hermi) - vk = ks.get_k(mol, _dm, hermi, omega=-omega) + vj = ks.get_j(mol, dm, hermi) + vk = ks.get_k(mol, dm, hermi, omega=-omega) vk *= hyb elif hyb == 0: # SR=0, only LR exchange - vj = ks.get_j(mol, _dm, hermi) - vk = ks.get_k(mol, _dm, hermi, omega=omega) + vj = ks.get_j(mol, dm, hermi) + vk = ks.get_k(mol, dm, hermi, omega=omega) vk *= alpha else: # SR and LR exchange with different ratios - vj, vk = ks.get_jk(mol, _dm, hermi) + vj, vk = ks.get_jk(mol, dm, hermi) vk *= hyb - vklr = ks.get_k(mol, _dm, hermi, omega=omega) + vklr = ks.get_k(mol, dm, hermi, omega=omega) vklr *= (alpha - hyb) vk += vklr - if incremental_jk: + if vj_last is not None: vj += vhf_last.vj vk += vhf_last.vk vxc += vj - vk if ground_state: - exc -= cp.einsum('ij,ji', dm, vk).real * .5 + exc -= cp.einsum('ij,ji', dm_orig, vk).real * .5 if ground_state: - ecoul = cp.einsum('ij,ji', dm, vj).real * .5 + ecoul = cp.einsum('ij,ji', dm_orig, vj).real * .5 else: ecoul = None diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py index cdc78e087..9b5b5e9a3 100644 --- a/gpu4pyscf/dft/numint2c.py +++ b/gpu4pyscf/dft/numint2c.py @@ -402,7 +402,7 @@ class NumInt2C(lib.StreamObject, numint.LibXCMixin): eval_rho = staticmethod(eval_rho) def build(self, mol, coords): - self.gdftopt = numint._GDFTOpt.from_mol(mol) + self.gdftopt = _GDFTOpt2C.from_mol(mol) self.grid_blksize = None self.non0ao_idx = {} return self @@ -462,10 +462,10 @@ def nr_vxc(self, mol, grids, xc_code, dms, relativity=0, hermi=1, opt = self.gdftopt assert dms.ndim == 2 dms = cp.asarray(dms) - dms = opt.sort_orbitals_2c(dms, axis=[0,1]) + dms = opt.sort_orbitals(dms, axis=[0,1]) n, exc, vmat = self._gks_mcol_vxc(mol, grids, xc_code, dms, relativity, hermi, max_memory, verbose) - vmat = opt.unsort_orbitals_2c(vmat, axis=[0,1]) + vmat = opt.unsort_orbitals(vmat, axis=[0,1]) else: raise NotImplementedError("Locally collinear and collinear is not implemented") return n.sum(), exc, vmat @@ -496,4 +496,77 @@ def _to_numint1c(self): def to_cpu(self): ni = numint2c.NumInt2C() - return ni \ No newline at end of file + return ni + + +class _GDFTOpt2C(numint._GDFTOpt): + + def sort_orbitals(self, mat, axis=[]): + ''' Transform given axis of a 2-component matrix (GKS) into sorted AO + + This assumes the axes specified in 'axis' have a dimension of 2*nao, + representing alpha (:nao) and beta (nao:) components. Both components + are sorted using the same AO sorting index. + ''' + idx = self._ao_idx + nao = len(idx) + + # Create the 2-component sorting index: + # [sorted_alpha_indices, sorted_beta_indices] + # E.g., if idx = [0, 2, 1] (nao=3), + # idx_2c = [0, 2, 1, 3+0, 3+2, 3+1] = [0, 2, 1, 3, 5, 4] + # We use numpy to build the index, consistent with self._ao_idx + idx_2c = np.concatenate([idx, idx + nao]) + + shape_ones = (1,) * mat.ndim + fancy_index = [] + for dim, n in enumerate(mat.shape): + if dim in axis: + # Check if the dimension matches the 2-component size + if n != 2 * nao: + raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component sorting") + indices = idx_2c + else: + # Use cp.arange for non-sorted axes, as in the original sort_orbitals + indices = cp.arange(n) + + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + + # Perform the sorting using advanced indexing + return mat[tuple(fancy_index)] + + def unsort_orbitals(self, sorted_mat, axis=[], out=None): + ''' Transform given axis of a 2-component matrix from sorted AO to original AO + + This assumes the axes specified in 'axis' have a dimension of 2*nao. + This is the inverse operation of sort_orbitals_2c. + ''' + idx = self._ao_idx + nao = len(idx) + + # The 2-component index is created identically to sort_orbitals_2c + idx_2c = np.concatenate([idx, idx + nao]) + + shape_ones = (1,) * sorted_mat.ndim + fancy_index = [] + for dim, n in enumerate(sorted_mat.shape): + if dim in axis: + # Check if the dimension matches the 2-component size + if n != 2 * nao: + raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component unsorting") + indices = idx_2c + else: + indices = cp.arange(n) + + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + + if out is None: + out = cp.empty_like(sorted_mat) + + # Perform the unsorting assignment + out[tuple(fancy_index)] = sorted_mat + return out + + \ No newline at end of file diff --git a/gpu4pyscf/dft/tests/test_gks.py b/gpu4pyscf/dft/tests/test_gks.py index 74a999f93..9bce0f950 100644 --- a/gpu4pyscf/dft/tests/test_gks.py +++ b/gpu4pyscf/dft/tests/test_gks.py @@ -141,6 +141,22 @@ def test_to_cpu(self): self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) + @unittest.skipIf(mcfun is None, "mcfun library not found.") + def test_to_gpu(self): + mf_cpu = gks_cpu.GKS(mol1) + mf_cpu.xc = 'lda,vwn' + mf_cpu.collinear = 'mcol' + mf_cpu._numint.spin_samples = 50 + eks4_cpu = mf_cpu.kernel() + + mf_gpu = mf_cpu.to_gpu() + eks4_gpu = mf_cpu.kernel() + + self.assertAlmostEqual(eks4_gpu, eks4_cpu, 6) + self.assertAlmostEqual(eks4_gpu, -74.3741809222222, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) + if __name__ == "__main__": print("Test GKS") diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py index 4eb5ff678..e2bb23bda 100644 --- a/gpu4pyscf/dft/tests/test_numint2c.py +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -169,7 +169,6 @@ def test_eval_xc_eff_fp(self): self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) - (-0.18688247306409317+7.50400133342109e-20j)).max(), 0, 10) - @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_mcol_lda_vxc_mat(self): xc_code = 'lda,' @@ -179,69 +178,51 @@ def test_mcol_lda_vxc_mat(self): np.random.seed(12) dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .0001j dm += np.eye(n2c) - dm = dm + dm.T + dm = dm + dm.T.conj() ngrids = 8 coords = np.random.rand(ngrids,3) weight = np.random.rand(ngrids) ao = numint.eval_ao(mol, coords, deriv=0, transpose=False) rho = numint2c.eval_rho(mol, ao, dm, xctype='LDA', hermi=1, with_lapl=False) - ni_CPU = pyscf_numint2c() + + # --- GPU Calculations (Always Run) --- ni_GPU = NumInt2C() ni_GPU.collinear = 'mcol' - ni_CPU.collinear = 'mcol' - eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) - vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] - mask = np.ones((8, mol.nbas), dtype=np.uint8) - mask_gpu = cupy.array([i for i in range(mol.nbas)]) - + mask_gpu = cupy.arange(mol.nbas) shls_slice = (0, mol.nbas) - v0_cpu = pyscf_numint2c_file._mcol_lda_vxc_mat(mol, ao.transpose(1,0).get(), weight, - rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) v1_gpu = v1_gpu + v1_gpu.conj().T - self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) - self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) - def test_mcol_lda_vxc_mat_fp(self): - xc_code = 'lda,' - - nao = mol.nao - n2c = nao * 2 - ao_loc = mol.ao_loc - np.random.seed(12) - dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .0001j - dm += np.eye(n2c) - dm = dm + dm.T - ngrids = 8 - coords = np.random.rand(ngrids,3) - weight = np.random.rand(ngrids) - - ao = numint.eval_ao(mol, coords, deriv=0, transpose=False) - rho = numint2c.eval_rho(mol, ao, dm, xctype='LDA', hermi=1, with_lapl=False) - ni_GPU = NumInt2C() - ni_GPU.collinear = 'mcol' - eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) - vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] - mask_gpu = cupy.array([i for i in range(mol.nbas)]) - - shls_slice = (0, mol.nbas) - v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), - mask_gpu, shls_slice, ao_loc, 0) - v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), - mask_gpu, shls_slice, ao_loc, 1) - v1_gpu = v1_gpu + v1_gpu.conj().T + # --- Assertions (Always Run) --- + # Fingerprint checks (from _fp test) self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - - (-9.596802282363786+0.000531850990220307j)).max(), 0, 13) + (-9.596802359283691+2.969010922568367e-05j)).max(), 0, 13) self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - - (-9.596802282363786+0.000531850990220307j)).max(), 0, 13) + (-9.596802359283691+2.969010922568367e-05j)).max(), 0, 13) + # Internal consistency check + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + + # --- CPU Comparison (Conditional) --- + if mcfun is not None: + ni_CPU = pyscf_numint2c() + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + + v0_cpu = pyscf_numint2c_file._mcol_lda_vxc_mat(mol, ao.transpose(1,0).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + + # CPU vs GPU check + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) - @unittest.skipIf(mcfun is None, "mcfun library not found.") def test_mcol_gga_vxc_mat(self): xc_code = 'pbe,' @@ -251,108 +232,52 @@ def test_mcol_gga_vxc_mat(self): np.random.seed(12) dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .001j dm += np.eye(n2c) - dm = dm + dm.T + dm = dm + dm.T.conj() ngrids = 8 coords = np.random.rand(ngrids,3) weight = np.random.rand(ngrids) ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) rho = numint2c.eval_rho(mol, ao, dm, xctype='GGA', hermi=1, with_lapl=False) - ni_CPU = pyscf_numint2c() - ni_GPU = NumInt2C() - ni_GPU.collinear = 'mcol' - ni_CPU.collinear = 'mcol' - eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) - eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) - vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] - vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] - mask = np.ones((8, mol.nbas), dtype=np.uint8) - mask_gpu = cupy.array([i for i in range(mol.nbas)]) - - shls_slice = (0, mol.nbas) - v0_cpu = pyscf_numint2c_file._mcol_gga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, - rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) - v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), - mask_gpu, shls_slice, ao_loc, 0) - v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), - mask_gpu, shls_slice, ao_loc, 1) - v1_gpu = v1_gpu + v1_gpu.conj().T - self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) - self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) - def test_mcol_gga_vxc_mat_fp(self): - xc_code = 'pbe,' - - nao = mol.nao - n2c = nao * 2 - ao_loc = mol.ao_loc - np.random.seed(12) - dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .001j - dm += np.eye(n2c) - dm = dm + dm.T - ngrids = 8 - coords = np.random.rand(ngrids,3) - weight = np.random.rand(ngrids) - - ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) - rho = numint2c.eval_rho(mol, ao, dm, xctype='GGA', hermi=1, with_lapl=False) + # --- GPU Calculations (Always Run) --- ni_GPU = NumInt2C() ni_GPU.collinear = 'mcol' eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] - mask_gpu = cupy.array([i for i in range(mol.nbas)]) - + mask_gpu = cupy.arange(mol.nbas) shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) v1_gpu = v1_gpu + v1_gpu.conj().T + + # --- Assertions (Always Run) --- + # Fingerprint checks (from _fp test) self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - - (-9.624252836509262+0.005273283360578891j)).max(), 0, 13) + (-9.624260408900755+0.0003122100947141664j)).max(), 0, 13) self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - - (-9.624252836509262+0.005273283360578891j)).max(), 0, 13) - - @unittest.skipIf(mcfun is None, "mcfun library not found.") - def test_mcol_mgga_vxc_mat(self): - xc_code = 'tpss' - - nao = mol.nao - n2c = nao * 2 - ao_loc = mol.ao_loc - np.random.seed(12) - dm = np.random.rand(n2c, n2c) * .001 - dm += np.eye(n2c) - dm = dm + dm.T - ngrids = 8 - coords = np.random.rand(ngrids,3) - weight = np.random.rand(ngrids) + (-9.624260408900755+0.0003122100947141664j)).max(), 0, 13) + # Internal consistency check + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) - ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) - rho = numint2c.eval_rho(mol, ao, dm, xctype='MGGA', hermi=1, with_lapl=False) - ni_CPU = pyscf_numint2c() - ni_GPU = NumInt2C() - ni_GPU.collinear = 'mcol' - ni_CPU.collinear = 'mcol' - eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) - eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) - vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] - vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] - mask = np.ones((8, mol.nbas), dtype=np.uint8) - mask_gpu = cupy.array([i for i in range(mol.nbas)]) + # --- CPU Comparison (Conditional) --- + if mcfun is not None: + ni_CPU = pyscf_numint2c() + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) - shls_slice = (0, mol.nbas) - v0_cpu = pyscf_numint2c_file._mcol_mgga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, - rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) - v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), - mask_gpu, shls_slice, ao_loc, 0) - v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), - mask_gpu, shls_slice, ao_loc, 1) - v1_gpu = v1_gpu + v1_gpu.conj().T - self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) - self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + v0_cpu = pyscf_numint2c_file._mcol_gga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + + # CPU vs GPU check + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) - def test_mcol_mgga_vxc_mat_fp(self): + def test_mcol_mgga_vxc_mat(self): xc_code = 'tpss' nao = mol.nao @@ -361,29 +286,51 @@ def test_mcol_mgga_vxc_mat_fp(self): np.random.seed(12) dm = np.random.rand(n2c, n2c) * .001 dm += np.eye(n2c) - dm = dm + dm.T + dm = dm + dm.T.conj() ngrids = 8 coords = np.random.rand(ngrids,3) weight = np.random.rand(ngrids) ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) rho = numint2c.eval_rho(mol, ao, dm, xctype='MGGA', hermi=1, with_lapl=False) + + # --- GPU Calculations (Always Run) --- ni_GPU = NumInt2C() ni_GPU.collinear = 'mcol' eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] - mask_gpu = cupy.array([i for i in range(mol.nbas)]) - + mask_gpu = cupy.arange(mol.nbas) shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 0) v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), mask_gpu, shls_slice, ao_loc, 1) v1_gpu = v1_gpu + v1_gpu.conj().T + + # --- Assertions (Always Run) --- + # Fingerprint checks (from _fp test) <-- UPDATED self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - (-11.359687631112195+6.13828986953566e-22j)).max(), 0, 13) self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - (-11.359687631112195+6.13828986953566e-22j)).max(), 0, 13) + + # Internal consistency check + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + + # --- CPU Comparison (Conditional) --- + if mcfun is not None: + ni_CPU = pyscf_numint2c() + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + + v0_cpu = pyscf_numint2c_file._mcol_mgga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + + # CPU vs GPU check + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) if __name__ == "__main__": diff --git a/gpu4pyscf/scf/diis.py b/gpu4pyscf/scf/diis.py index 570690600..091cd01d1 100644 --- a/gpu4pyscf/scf/diis.py +++ b/gpu4pyscf/scf/diis.py @@ -48,6 +48,8 @@ def __init__(self, mf=None, filename=None): self.space = 8 def update(self, s, d, f, *args, **kwargs): + if d.dtype == cp.complex128: + s = s.astype(cp.complex128) errvec = self._sdf_err_vec(s, d, f) if self.incore is None: mem_avail = get_avail_mem() diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py index fbce65008..ab49acfa5 100644 --- a/gpu4pyscf/scf/hf.py +++ b/gpu4pyscf/scf/hf.py @@ -145,8 +145,6 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if dm is None: dm = mf.make_rdm1() s1e = cupy.asarray(s1e) dm = cupy.asarray(dm) - if dm.dtype == cupy.complex128: - s1e = s1e.astype(cupy.complex128) if diis_start_cycle is None: diis_start_cycle = mf.diis_start_cycle if level_shift_factor is None: From 856eca1a90c1fe79ce51cb0c76e957ad3ab2f6bb Mon Sep 17 00:00:00 2001 From: "puzhichen.996" Date: Fri, 14 Nov 2025 15:35:11 +0800 Subject: [PATCH 21/21] remove the to_gpu test --- gpu4pyscf/dft/numint.py | 69 +-------------------------------- gpu4pyscf/dft/tests/test_gks.py | 2 +- 2 files changed, 2 insertions(+), 69 deletions(-) diff --git a/gpu4pyscf/dft/numint.py b/gpu4pyscf/dft/numint.py index 4e0345294..9bdc1e935 100644 --- a/gpu4pyscf/dft/numint.py +++ b/gpu4pyscf/dft/numint.py @@ -2428,74 +2428,7 @@ def unsort_orbitals(self, sorted_mat, axis=[], out=None): out = cupy.empty_like(sorted_mat) out[tuple(fancy_index)] = sorted_mat return out - - def sort_orbitals_2c(self, mat, axis=[]): - ''' Transform given axis of a 2-component matrix (GKS) into sorted AO - - This assumes the axes specified in 'axis' have a dimension of 2*nao, - representing alpha (:nao) and beta (nao:) components. Both components - are sorted using the same AO sorting index. - ''' - idx = self._ao_idx - nao = len(idx) - - # Create the 2-component sorting index: - # [sorted_alpha_indices, sorted_beta_indices] - # E.g., if idx = [0, 2, 1] (nao=3), - # idx_2c = [0, 2, 1, 3+0, 3+2, 3+1] = [0, 2, 1, 3, 5, 4] - # We use numpy to build the index, consistent with self._ao_idx - idx_2c = np.concatenate([idx, idx + nao]) - - shape_ones = (1,) * mat.ndim - fancy_index = [] - for dim, n in enumerate(mat.shape): - if dim in axis: - # Check if the dimension matches the 2-component size - if n != 2 * nao: - raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component sorting") - indices = idx_2c - else: - # Use cupy.arange for non-sorted axes, as in the original sort_orbitals - indices = cupy.arange(n) - - idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] - fancy_index.append(indices.reshape(idx_shape)) - - # Perform the sorting using advanced indexing - return mat[tuple(fancy_index)] - - def unsort_orbitals_2c(self, sorted_mat, axis=[], out=None): - ''' Transform given axis of a 2-component matrix from sorted AO to original AO - - This assumes the axes specified in 'axis' have a dimension of 2*nao. - This is the inverse operation of sort_orbitals_2c. - ''' - idx = self._ao_idx - nao = len(idx) - - # The 2-component index is created identically to sort_orbitals_2c - idx_2c = np.concatenate([idx, idx + nao]) - - shape_ones = (1,) * sorted_mat.ndim - fancy_index = [] - for dim, n in enumerate(sorted_mat.shape): - if dim in axis: - # Check if the dimension matches the 2-component size - if n != 2 * nao: - raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component unsorting") - indices = idx_2c - else: - indices = cupy.arange(n) - - idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] - fancy_index.append(indices.reshape(idx_shape)) - - if out is None: - out = cupy.empty_like(sorted_mat) - - # Perform the unsorting assignment - out[tuple(fancy_index)] = sorted_mat - return out + class GTOValEnvVars(ctypes.Structure): _fields_ = [ diff --git a/gpu4pyscf/dft/tests/test_gks.py b/gpu4pyscf/dft/tests/test_gks.py index 9bce0f950..d64b1f13a 100644 --- a/gpu4pyscf/dft/tests/test_gks.py +++ b/gpu4pyscf/dft/tests/test_gks.py @@ -141,7 +141,7 @@ def test_to_cpu(self): self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) - @unittest.skipIf(mcfun is None, "mcfun library not found.") + @unittest.skip("NumInt2C in PySCF has no to_gpu method.") def test_to_gpu(self): mf_cpu = gks_cpu.GKS(mol1) mf_cpu.xc = 'lda,vwn'