Skip to content

Commit 183106a

Browse files
authored
Multi-collinear spin-flip tddft excitation energy calculations using gpu4pyscf functions (#515)
* 1. test the uhf 2. add the get_ab for sf-uhf * Finish the get_ab and test the collinear and multi-collinear sf-tddft * move numint2c to gpu * debugging * finish excitation part, begin to write gradients * copy from the pyscf-forge * add the gradient * remove gradient : * add some comments * fix the codes under reviews * review the codes * change the batch size
1 parent 6af29a4 commit 183106a

File tree

5 files changed

+621
-43
lines changed

5 files changed

+621
-43
lines changed

gpu4pyscf/dft/xc_deriv.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,4 +358,25 @@ def _stack_fggg(fggg, axis=0, rho=None):
358358
fggg = fggg[tuple(slices)]
359359
fggg = _stack_fg(fggg, axis=axis+2, rho=rho)
360360
fggg = _stack_fg(fggg, axis=axis+1, rho=rho)
361-
return _stack_fg(fggg, axis=axis, rho=rho)
361+
return _stack_fg(fggg, axis=axis, rho=rho)
362+
363+
364+
def ud2ts(v_ud):
365+
v_ts = cupy.asarray(v_ud)
366+
order = v_ud.ndim // 2
367+
368+
if order == 0 and v_ts.shape[0] != 2:
369+
raise ValueError("No spin axis found in the input array.")
370+
371+
matrix = cupy.array([[0.5, 0.5],
372+
[0.5, -0.5]])
373+
if order == 1:
374+
v_ts = contract('ra,axg->rxg', matrix, v_ud)
375+
elif order == 2:
376+
v_ts = cupy.einsum('ra,tb,axbyg->rxtyg', matrix, matrix, v_ud)
377+
elif order == 3:
378+
v_ts = cupy.einsum('ra,tb,sc,axbyczg->rxtyszg', matrix, matrix, matrix, v_ud)
379+
else:
380+
raise NotImplementedError(f"Order {order} not implemented.")
381+
382+
return v_ts

gpu4pyscf/tdscf/_uhf_resp_sf.py

Lines changed: 110 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,93 @@
2121
from pyscf import lib
2222
from pyscf.lib import logger
2323
from pyscf.dft import numint2c, xc_deriv
24+
from gpu4pyscf.dft import xc_deriv as xc_deriv_gpu
2425
from gpu4pyscf.scf import hf, uhf
2526
from gpu4pyscf.dft.numint import _scale_ao, _tau_dot, eval_rho, eval_rho2
2627
from gpu4pyscf.lib.cupy_helper import transpose_sum, add_sparse, contract
28+
from concurrent.futures import ThreadPoolExecutor
29+
30+
31+
MAX_GRIDS_PER_TASK = 8192 # Approximately (2,4,2,4,200,8192) ~ 800MB
32+
33+
def _prange(start, end, step):
34+
'''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...'''
35+
if start < end:
36+
for i in range(start, end, step):
37+
yield i, min(i+step, end)
38+
39+
40+
def _make_paxis_samples(spin_samples):
41+
'''Samples on principal axis between [0, 1]'''
42+
rt, wt = np.polynomial.legendre.leggauss(spin_samples)
43+
rt = cp.array(rt)
44+
wt = cp.array(wt)
45+
rt = rt * .5 + .5
46+
wt *= .5 # normalized to 1
47+
return rt, wt
48+
49+
50+
def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200):
51+
assert deriv < 5
52+
if rho_tmz.dtype != cp.double:
53+
raise RuntimeError('rho and mz must be real')
54+
ngrids = rho_tmz.shape[-1]
55+
grids_per_task = MAX_GRIDS_PER_TASK
56+
57+
results = []
58+
for p0, p1 in _prange(0, ngrids, grids_per_task):
59+
r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples)
60+
results.append(r)
61+
62+
return [None if x[0] is None else cp.concatenate(x, axis=-1) for x in zip(*results)]
63+
64+
65+
def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples):
66+
ngrids = rho_tmz.shape[-1]
67+
# samples on z=cos(theta) and their weights between [0, 1]
68+
sgridz, weights = _make_paxis_samples(collinear_samples)
69+
70+
if rho_tmz.ndim == 2:
71+
nvar = 1
72+
else:
73+
nvar = rho_tmz.shape[1]
74+
# spin-flip part
75+
fxc_sf = 0.0
76+
rho = _project_spin_paxis2(rho_tmz, sgridz)
77+
fxc = func(rho, deriv)[2]
78+
fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, weights.size)
79+
if not isinstance(fxc, cp.ndarray):
80+
fxc = cp.array(fxc)
81+
fxc_sf += fxc[1,:,1].dot(weights)
82+
83+
return None,None,fxc_sf
84+
85+
86+
def _project_spin_paxis2(rho_tm, sgridz=None):
87+
# ToDo: be written into the function _project_spin_paxis().
88+
# Because use mz rather than |mz| here
89+
'''Projects spins onto the principal axis'''
90+
rho = rho_tm[0]
91+
mz = rho_tm[1]
92+
93+
if sgridz is None:
94+
rho_ts = cp.stack([rho, mz])
95+
else:
96+
ngrids = rho.shape[-1]
97+
nsg = sgridz.shape[0]
98+
if rho_tm.ndim == 2:
99+
rho_ts = cp.empty((2, ngrids, nsg))
100+
rho_ts[0] = rho[:,cp.newaxis]
101+
rho_ts[1] = mz[:,cp.newaxis] * sgridz
102+
rho_ts = rho_ts.reshape(2, ngrids * nsg)
103+
else:
104+
nvar = rho_tm.shape[1]
105+
rho_ts = cp.empty((2, nvar, ngrids, nsg))
106+
rho_ts[0] = rho[:,:,cp.newaxis]
107+
rho_ts[1] = mz[:,:,cp.newaxis] * sgridz
108+
rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg)
109+
return rho_ts
110+
27111

28112
def gen_uhf_response_sf(mf, mo_coeff=None, mo_occ=None, hermi=0,
29113
collinear='mcol', collinear_samples=200):
@@ -86,33 +170,38 @@ def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv):
86170
evfk[order] = xc_deriv.ud2ts(evfk[order])
87171
return evfk
88172

173+
def __mcfun_fn_eval_xc2(ni, xc_code, xctype, rho, deriv):
174+
t, s = rho
175+
if not isinstance(t, cp.ndarray):
176+
t = cp.asarray(t)
177+
if not isinstance(s, cp.ndarray):
178+
s = cp.asarray(s)
179+
rho = cp.stack([(t + s) * .5, (t - s) * .5])
180+
spin = 1
181+
evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype, spin=spin)
182+
evfk = list(evfk)
183+
for order in range(1, deriv+1):
184+
if evfk[order] is not None:
185+
evfk[order] = xc_deriv_gpu.ud2ts(evfk[order])
186+
return evfk
187+
89188
# Edited based on pyscf.dft.numint2c.mcfun_eval_xc_adapter
90189
def mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples):
91190
'''Wrapper to generate the eval_xc function required by mcfun
92191
'''
93192

94-
try:
95-
import mcfun
96-
except ImportError:
97-
raise ImportError('This feature requires mcfun library.\n'
98-
'Try install mcfun with `pip install mcfun`')
99-
100-
ni = numint2c.NumInt2C()
101-
ni.collinear = 'mcol'
102-
ni.collinear_samples = collinear_samples
103193
xctype = ni._xc_type(xc_code)
104-
fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype)
105-
nproc = lib.num_threads()
194+
fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype)
106195

107196
def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None):
108-
res = mcfun.eval_xc_eff_sf(
109-
fn_eval_xc, rho.get(), deriv,
110-
collinear_samples=collinear_samples, workers=nproc)
197+
res = eval_xc_eff_sf(
198+
fn_eval_xc, rho, deriv,
199+
collinear_samples=collinear_samples)
111200
return [x if x is None else cp.asarray(x) for x in res]
112201
return eval_xc_eff
113202

114203
def cache_xc_kernel_sf(ni, mol, grids, xc_code, mo_coeff, mo_occ,
115-
collinear_samples):
204+
collinear_samples, deriv=2):
116205
'''Compute the fxc_sf, which can be used in SF-TDDFT/TDA
117206
'''
118207
xctype = ni._xc_type(xc_code)
@@ -148,8 +237,12 @@ def cache_xc_kernel_sf(ni, mol, grids, xc_code, mo_coeff, mo_occ,
148237
rho_z = cp.array([rho_ab[0]+rho_ab[1],
149238
rho_ab[0]-rho_ab[1]])
150239
eval_xc_eff = mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples)
151-
vxc, fxc = eval_xc_eff(xc_code, rho_z, deriv=2, xctype=xctype)[1:3]
152-
return rho_ab, vxc, fxc
240+
if deriv == 2:
241+
vxc, fxc = eval_xc_eff(xc_code, rho_z, deriv=2, xctype=xctype)[1:3]
242+
return rho_ab, vxc, fxc
243+
elif deriv == 3:
244+
vxc, fxc, kxc = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[1:4]
245+
return rho_ab, vxc, fxc, kxc
153246

154247
def nr_uks_fxc_sf(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0,
155248
rho0=None, vxc=None, fxc=None):

gpu4pyscf/tdscf/tests/test_sftddft.py

Lines changed: 107 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,20 @@
1717
import cupy as cp
1818
from pyscf import lib, gto, scf
1919
from gpu4pyscf import tdscf
20-
try:
21-
import mcfun
22-
except ImportError:
23-
mcfun = None
20+
21+
22+
def diagonalize_tda(a, nroots=5):
23+
nocc, nvir = a.shape[:2]
24+
nov = nocc * nvir
25+
a = a.reshape(nov, nov)
26+
e, xy = np.linalg.eig(np.asarray(a))
27+
sorted_indices = np.argsort(e)
28+
29+
e_sorted = e[sorted_indices]
30+
xy_sorted = xy[:, sorted_indices]
31+
32+
return e_sorted[:nroots], xy_sorted[:, :nroots]
33+
2434

2535
class KnownValues(unittest.TestCase):
2636
@classmethod
@@ -35,48 +45,126 @@ def setUpClass(cls):
3545
mol.spin = 2
3646
mol.basis = '631g'
3747
cls.mol = mol.build()
38-
cls.mf = mol.UHF().to_gpu().run()
3948

4049
@classmethod
4150
def tearDownClass(cls):
4251
cls.mol.stdout.close()
4352

44-
def test_tda(self):
45-
mf = self.mf
53+
def test_hf_tda(self):
54+
mf = self.mol.UHF().to_gpu().run()
4655
# sftddft not available in pyscf main branch. References are created
4756
# using the sftda module from pyscf-forge
4857
ref = [ 0.46644071, 0.55755649, 1.05310518]
49-
td = mf.SFTDA().run(extype=0, conv_tol=1e-7)
58+
td = mf.SFTDA().run(extype=0, conv_tol=1e-5)
5059
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
60+
a, b = td.get_ab()
61+
e = diagonalize_tda(a[0], nroots=3)[0]
62+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
5163

5264
ref = [-0.21574567, 0.00270390, 0.03143914]
53-
td = mf.SFTDA().run(extype=1, conv_tol=1e-7)
65+
td = mf.SFTDA().run(extype=1, conv_tol=1e-5)
66+
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
67+
e = diagonalize_tda(a[1], nroots=3)[0]
68+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
69+
70+
def test_mcol_svwn_tda(self):
71+
mf = self.mol.UKS(xc='svwn').to_gpu().run()
72+
# sftddft not available in pyscf main branch. References are created
73+
# using the sftda module from pyscf-forge
74+
ref = [0.45022394, 0.57917576, 1.04475443]
75+
td = mf.SFTDA()
76+
td.collinear = 'mcol'
77+
td.extype = 0
78+
td.collinear_samples=200
79+
td.conv_tol = 1e-5
80+
td.kernel()
81+
a, b = td.get_ab()
82+
e = diagonalize_tda(a[0], nroots=3)[0]
83+
84+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
85+
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 5)
86+
87+
ref = [-0.32642984, 0.0003752 , 0.02156706]
88+
td = mf.SFTDA()
89+
td.collinear = 'mcol'
90+
td.extype = 1
91+
td.collinear_samples=200
92+
td.conv_tol = 1e-5
93+
td.kernel()
94+
e = diagonalize_tda(a[1], nroots=3)[0]
95+
96+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
5497
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
5598

56-
@unittest.skipIf(mcfun is None, 'MCfun not available')
5799
def test_mcol_b3lyp_tda(self):
58-
mf = self.mf
100+
mf = self.mol.UKS(xc='b3lyp').to_gpu().run()
59101
# sftddft not available in pyscf main branch. References are created
60102
# using the sftda module from pyscf-forge
61-
ref = [ 0.45941171, 0.57799552, 1.06629265]
62-
td = mf.SFTDA().run(collinear='mcol', extype=0, conv_tol=1e-7)
103+
ref = [0.45941163, 0.57799537, 1.06629197]
104+
td = mf.SFTDA()
105+
td.collinear = 'mcol'
106+
td.extype = 0
107+
td.collinear_samples=200
108+
td.conv_tol = 1e-5
109+
td.kernel()
110+
a, b = td.get_ab()
111+
e = diagonalize_tda(a[0], nroots=3)[0]
112+
113+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
114+
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
115+
116+
ref = [-0.29629126, 0.00067001, 0.0195629 ]
117+
td = mf.SFTDA()
118+
td.collinear = 'mcol'
119+
td.extype = 1
120+
td.collinear_samples=200
121+
td.conv_tol = 1e-5
122+
td.kernel()
123+
e = diagonalize_tda(a[1], nroots=3)[0]
124+
125+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
63126
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
64127

65-
ref = [-0.29629139, 0.00067017, 0.01956306]
66-
td = mf.SFTDA().run(collinear='mcol', extype=1, conv_tol=1e-7)
128+
def test_mcol_tpss_tda(self):
129+
mf = self.mol.UKS(xc='tpss').to_gpu().run()
130+
# sftddft not available in pyscf main branch. References are created
131+
# using the sftda module from pyscf-forge
132+
ref = [0.4498647 , 0.57071842, 1.0544106 ]
133+
td = mf.SFTDA()
134+
td.collinear = 'mcol'
135+
td.extype = 0
136+
td.collinear_samples=200
137+
td.conv_tol = 1e-5
138+
td.kernel()
139+
a, b = td.get_ab()
140+
e = diagonalize_tda(a[0], nroots=3)[0]
141+
142+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
143+
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
144+
145+
ref = [-0.28699899, 0.00063662, 0.0232923 ]
146+
td = mf.SFTDA()
147+
td.collinear = 'mcol'
148+
td.extype = 1
149+
td.collinear_samples=200
150+
td.conv_tol = 1e-5
151+
td.kernel()
152+
e = diagonalize_tda(a[1], nroots=3)[0]
153+
154+
self.assertAlmostEqual(abs(e - td.e).max(), 0, 6)
67155
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
68156

69157
@unittest.skip('Numerical issues encountered in non-hermitian diagonalization')
70158
def test_tdhf(self):
71-
mf = self.mf
159+
mf = self.mol.UHF().to_gpu().run()
72160
ref = [1.74385401, 9.38227395, 14.90168875]
73-
td = mf.SFTDHF().run(extype=0, conv_tol=1e-7)
161+
td = mf.SFTDHF().run(extype=0, conv_tol=1e-5)
74162
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
75163

76164
ref = [0.41701647, 9.59644331, 22.99972711]
77-
td = mf.SFTDHF().run(extype=1, conv_tol=1e-7)
165+
td = mf.SFTDHF().run(extype=1, conv_tol=1e-5)
78166
self.assertAlmostEqual(abs(td.e - ref).max(), 0, 6)
79167

80168
if __name__ == "__main__":
81-
print("Full Tests for spin-flip-TDA and spin-flip-TDDFT")
169+
print("Full Tests for spin-flip-TDA and spin-flip-TDDFT using multi-collinear functionals")
82170
unittest.main()

0 commit comments

Comments
 (0)