|
21 | 21 | from pyscf import lib |
22 | 22 | from pyscf.lib import logger |
23 | 23 | from pyscf.dft import numint2c, xc_deriv |
| 24 | +from gpu4pyscf.dft import xc_deriv as xc_deriv_gpu |
24 | 25 | from gpu4pyscf.scf import hf, uhf |
25 | 26 | from gpu4pyscf.dft.numint import _scale_ao, _tau_dot, eval_rho, eval_rho2 |
26 | 27 | from gpu4pyscf.lib.cupy_helper import transpose_sum, add_sparse, contract |
| 28 | +from concurrent.futures import ThreadPoolExecutor |
| 29 | + |
| 30 | + |
| 31 | +MAX_GRIDS_PER_TASK = 8192 # Approximately (2,4,2,4,200,8192) ~ 800MB |
| 32 | + |
| 33 | +def _prange(start, end, step): |
| 34 | + '''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...''' |
| 35 | + if start < end: |
| 36 | + for i in range(start, end, step): |
| 37 | + yield i, min(i+step, end) |
| 38 | + |
| 39 | + |
| 40 | +def _make_paxis_samples(spin_samples): |
| 41 | + '''Samples on principal axis between [0, 1]''' |
| 42 | + rt, wt = np.polynomial.legendre.leggauss(spin_samples) |
| 43 | + rt = cp.array(rt) |
| 44 | + wt = cp.array(wt) |
| 45 | + rt = rt * .5 + .5 |
| 46 | + wt *= .5 # normalized to 1 |
| 47 | + return rt, wt |
| 48 | + |
| 49 | + |
| 50 | +def eval_xc_eff_sf(func, rho_tmz, deriv=1, collinear_samples=200): |
| 51 | + assert deriv < 5 |
| 52 | + if rho_tmz.dtype != cp.double: |
| 53 | + raise RuntimeError('rho and mz must be real') |
| 54 | + ngrids = rho_tmz.shape[-1] |
| 55 | + grids_per_task = MAX_GRIDS_PER_TASK |
| 56 | + |
| 57 | + results = [] |
| 58 | + for p0, p1 in _prange(0, ngrids, grids_per_task): |
| 59 | + r = _eval_xc_sf(func, rho_tmz[...,p0:p1], deriv, collinear_samples) |
| 60 | + results.append(r) |
| 61 | + |
| 62 | + return [None if x[0] is None else cp.concatenate(x, axis=-1) for x in zip(*results)] |
| 63 | + |
| 64 | + |
| 65 | +def _eval_xc_sf(func, rho_tmz, deriv, collinear_samples): |
| 66 | + ngrids = rho_tmz.shape[-1] |
| 67 | + # samples on z=cos(theta) and their weights between [0, 1] |
| 68 | + sgridz, weights = _make_paxis_samples(collinear_samples) |
| 69 | + |
| 70 | + if rho_tmz.ndim == 2: |
| 71 | + nvar = 1 |
| 72 | + else: |
| 73 | + nvar = rho_tmz.shape[1] |
| 74 | + # spin-flip part |
| 75 | + fxc_sf = 0.0 |
| 76 | + rho = _project_spin_paxis2(rho_tmz, sgridz) |
| 77 | + fxc = func(rho, deriv)[2] |
| 78 | + fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, weights.size) |
| 79 | + if not isinstance(fxc, cp.ndarray): |
| 80 | + fxc = cp.array(fxc) |
| 81 | + fxc_sf += fxc[1,:,1].dot(weights) |
| 82 | + |
| 83 | + return None,None,fxc_sf |
| 84 | + |
| 85 | + |
| 86 | +def _project_spin_paxis2(rho_tm, sgridz=None): |
| 87 | + # ToDo: be written into the function _project_spin_paxis(). |
| 88 | + # Because use mz rather than |mz| here |
| 89 | + '''Projects spins onto the principal axis''' |
| 90 | + rho = rho_tm[0] |
| 91 | + mz = rho_tm[1] |
| 92 | + |
| 93 | + if sgridz is None: |
| 94 | + rho_ts = cp.stack([rho, mz]) |
| 95 | + else: |
| 96 | + ngrids = rho.shape[-1] |
| 97 | + nsg = sgridz.shape[0] |
| 98 | + if rho_tm.ndim == 2: |
| 99 | + rho_ts = cp.empty((2, ngrids, nsg)) |
| 100 | + rho_ts[0] = rho[:,cp.newaxis] |
| 101 | + rho_ts[1] = mz[:,cp.newaxis] * sgridz |
| 102 | + rho_ts = rho_ts.reshape(2, ngrids * nsg) |
| 103 | + else: |
| 104 | + nvar = rho_tm.shape[1] |
| 105 | + rho_ts = cp.empty((2, nvar, ngrids, nsg)) |
| 106 | + rho_ts[0] = rho[:,:,cp.newaxis] |
| 107 | + rho_ts[1] = mz[:,:,cp.newaxis] * sgridz |
| 108 | + rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) |
| 109 | + return rho_ts |
| 110 | + |
27 | 111 |
|
28 | 112 | def gen_uhf_response_sf(mf, mo_coeff=None, mo_occ=None, hermi=0, |
29 | 113 | collinear='mcol', collinear_samples=200): |
@@ -86,33 +170,38 @@ def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): |
86 | 170 | evfk[order] = xc_deriv.ud2ts(evfk[order]) |
87 | 171 | return evfk |
88 | 172 |
|
| 173 | +def __mcfun_fn_eval_xc2(ni, xc_code, xctype, rho, deriv): |
| 174 | + t, s = rho |
| 175 | + if not isinstance(t, cp.ndarray): |
| 176 | + t = cp.asarray(t) |
| 177 | + if not isinstance(s, cp.ndarray): |
| 178 | + s = cp.asarray(s) |
| 179 | + rho = cp.stack([(t + s) * .5, (t - s) * .5]) |
| 180 | + spin = 1 |
| 181 | + evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype, spin=spin) |
| 182 | + evfk = list(evfk) |
| 183 | + for order in range(1, deriv+1): |
| 184 | + if evfk[order] is not None: |
| 185 | + evfk[order] = xc_deriv_gpu.ud2ts(evfk[order]) |
| 186 | + return evfk |
| 187 | + |
89 | 188 | # Edited based on pyscf.dft.numint2c.mcfun_eval_xc_adapter |
90 | 189 | def mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples): |
91 | 190 | '''Wrapper to generate the eval_xc function required by mcfun |
92 | 191 | ''' |
93 | 192 |
|
94 | | - try: |
95 | | - import mcfun |
96 | | - except ImportError: |
97 | | - raise ImportError('This feature requires mcfun library.\n' |
98 | | - 'Try install mcfun with `pip install mcfun`') |
99 | | - |
100 | | - ni = numint2c.NumInt2C() |
101 | | - ni.collinear = 'mcol' |
102 | | - ni.collinear_samples = collinear_samples |
103 | 193 | xctype = ni._xc_type(xc_code) |
104 | | - fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) |
105 | | - nproc = lib.num_threads() |
| 194 | + fn_eval_xc = functools.partial(__mcfun_fn_eval_xc2, ni, xc_code, xctype) |
106 | 195 |
|
107 | 196 | def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, verbose=None): |
108 | | - res = mcfun.eval_xc_eff_sf( |
109 | | - fn_eval_xc, rho.get(), deriv, |
110 | | - collinear_samples=collinear_samples, workers=nproc) |
| 197 | + res = eval_xc_eff_sf( |
| 198 | + fn_eval_xc, rho, deriv, |
| 199 | + collinear_samples=collinear_samples) |
111 | 200 | return [x if x is None else cp.asarray(x) for x in res] |
112 | 201 | return eval_xc_eff |
113 | 202 |
|
114 | 203 | def cache_xc_kernel_sf(ni, mol, grids, xc_code, mo_coeff, mo_occ, |
115 | | - collinear_samples): |
| 204 | + collinear_samples, deriv=2): |
116 | 205 | '''Compute the fxc_sf, which can be used in SF-TDDFT/TDA |
117 | 206 | ''' |
118 | 207 | xctype = ni._xc_type(xc_code) |
@@ -148,8 +237,12 @@ def cache_xc_kernel_sf(ni, mol, grids, xc_code, mo_coeff, mo_occ, |
148 | 237 | rho_z = cp.array([rho_ab[0]+rho_ab[1], |
149 | 238 | rho_ab[0]-rho_ab[1]]) |
150 | 239 | eval_xc_eff = mcfun_eval_xc_adapter_sf(ni, xc_code, collinear_samples) |
151 | | - vxc, fxc = eval_xc_eff(xc_code, rho_z, deriv=2, xctype=xctype)[1:3] |
152 | | - return rho_ab, vxc, fxc |
| 240 | + if deriv == 2: |
| 241 | + vxc, fxc = eval_xc_eff(xc_code, rho_z, deriv=2, xctype=xctype)[1:3] |
| 242 | + return rho_ab, vxc, fxc |
| 243 | + elif deriv == 3: |
| 244 | + vxc, fxc, kxc = eval_xc_eff(xc_code, rho_z, deriv=3, xctype=xctype)[1:4] |
| 245 | + return rho_ab, vxc, fxc, kxc |
153 | 246 |
|
154 | 247 | def nr_uks_fxc_sf(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, |
155 | 248 | rho0=None, vxc=None, fxc=None): |
|
0 commit comments