Skip to content

Commit 6af29a4

Browse files
authored
PBC Coulomb matrix using MD J engine (#547)
* Reduce shared memory requirements in MD J engine * Bugfixes * Optimize md_j by caching Rt2 indices * Add pbc_md_contract_j cuda kernel * Update j_engine * Bugfix for multi-dm md-j * Off-diagonal elements were incorrectly scaled by 0.5 * Debug pbc md-j * Fix small errors in q_cond * almost work * dtype changes cause rsjk crash * Fix kpts Coulomb matrix; Add tests * Lint * j_engine_3c2e bug due to syncthreads * pickle serialization error
1 parent 99eed65 commit 6af29a4

25 files changed

+9803
-4939
lines changed

gpu4pyscf/df/j_engine_3c2e.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,13 @@ def build(self, cutoff=1e-14):
6666
else:
6767
q_cond = np.empty((nbas,nbas))
6868
intor = prim_mol._add_suffix('int2e')
69-
_vhf.libcvhf.CVHFnr_int2e_q_cond(
70-
getattr(_vhf.libcvhf, intor), lib.c_null_ptr(),
71-
q_cond.ctypes, ao_loc.ctypes,
72-
prim_mol._atm.ctypes, ctypes.c_int(prim_mol.natm),
73-
prim_mol._bas.ctypes, ctypes.c_int(prim_mol.nbas),
74-
prim_mol._env.ctypes)
69+
with prim_mol.with_integral_screen(1e-26):
70+
_vhf.libcvhf.CVHFnr_int2e_q_cond(
71+
getattr(_vhf.libcvhf, intor), lib.c_null_ptr(),
72+
q_cond.ctypes, ao_loc.ctypes,
73+
prim_mol._atm.ctypes, ctypes.c_int(prim_mol.natm),
74+
prim_mol._bas.ctypes, ctypes.c_int(prim_mol.nbas),
75+
prim_mol._env.ctypes)
7576
q_cond = np.log(q_cond + 1e-300).astype(np.float32)
7677
log.timer('Initialize q_cond', *cput0)
7778

gpu4pyscf/df/tests/test_df_int3c2e.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_contract_int3c2e():
159159
H 0.757 4. -0.4696
160160
C 1. 1. 0.
161161
''',
162-
basis=('ccpvtz', [[1, [3.7, 1]], [1, [2., 1]], [1, [.8, 1]]])
162+
basis=('ccpvtz', [[1, [3.7, 1, .1]], [1, [2., .5, .3]], [1, [.8, .5, .8]]])
163163
)
164164
auxmol = mol.copy()
165165
auxmol.basis = ('weigend', [[3, [2, 1, .5], [1, .2, 1]]])

gpu4pyscf/hessian/tests/test_rhf_hessian.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ def test_jk_mix(self):
180180
C H
181181
0.4 1
182182
''',
183-
output = '/dev/null'
184183
)
185184
nao = mol1.nao
186185
mo_coeff = cupy.random.rand(nao, nao)
@@ -197,7 +196,6 @@ def test_jk_mix(self):
197196
vk_cpu = (mo_coeff.T @ vk @ mocc).reshape(1,-1)
198197
assert cupy.linalg.norm(vj_cpu - vj_mo) < 1e-5
199198
assert cupy.linalg.norm(vk_cpu - vk_mo) < 1e-5
200-
mol1.stdout.close()
201199

202200
@unittest.skipIf(num_devices > 1, '')
203201
def test_ecp_hess(self):

gpu4pyscf/lib/cupy_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ def takebak(out, a, indices, axis=-1):
520520

521521
def transpose_sum(a, stream=None):
522522
'''
523-
return a + a.transpose(0,2,1)
523+
perform a + a.transpose(0,2,1) inplace
524524
'''
525525
assert isinstance(a, cupy.ndarray)
526526
assert a.flags.c_contiguous

gpu4pyscf/lib/gvhf-md/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_library(gvhf_md SHARED
44
md_contract_j.cu unrolled_md_j.cu unrolled_md_j_4dm.cu
55
md_indices.cu md_j_driver.cu md_pairdata.c
66
contract_int3c2e.cu
7+
pbc_md_contract_j.cu
78
)
89

910
#option(BUILD_SHARED_LIBS "build shared libraries" 1)

gpu4pyscf/lib/gvhf-md/contract_int3c2e.cu

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -461,8 +461,6 @@ void unrolled_contract_int3c2e(Int3c2eEnvVars envs, JKMatrix jk, BDiv3c2eBounds
461461
double fac = ck/(aij*ak*sqrt(aij+ak));
462462
if (pair_ij >= shl_pair1) {
463463
fac = 0;
464-
} else if (ish == jsh) {
465-
fac *= .5;
466464
}
467465
boys_fn(gamma_inc, theta, rr, jk.omega, fac, order, sp_id, nsp_per_block);
468466
Rt[0] = gamma_inc[sp_id+order*nsp_per_block];
@@ -529,10 +527,11 @@ void unrolled_contract_int3c2e(Int3c2eEnvVars envs, JKMatrix jk, BDiv3c2eBounds
529527
__shared__ typename BlockReduceT::TempStorage temp_storage;
530528
#pragma unroll
531529
for (int k = 0; k < nfk; k++) {
532-
vj_aux[k] = BlockReduceT(temp_storage).Sum(vj_aux[k]);
530+
double sum_jaux = BlockReduceT(temp_storage).Sum(vj_aux[k]);
533531
if (thread_id == 0) {
534-
atomicAdd(vj+k, vj_aux[k]);
532+
atomicAdd(vj+k, sum_jaux);
535533
}
534+
__syncthreads();
536535
}
537536
}
538537
}

0 commit comments

Comments
 (0)