Skip to content

Commit 99eed65

Browse files
committed
For very auxiliary diffuse basis, small errors in pbc-rsdf_builder 2c2e integrals causes linear dependency issue
1 parent 52bd613 commit 99eed65

File tree

4 files changed

+49
-10
lines changed

4 files changed

+49
-10
lines changed

gpu4pyscf/lib/pbc/fill_int3c2e_v2.cu

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
#define GOUT_WIDTH 54
2727
#define PAGE_SIZE 30
2828
#define REMOTE_THRESHOLD 50
29-
#define PAGES_PER_BLOCK 524288
30-
// approximately, 4000 images in each ijk shell triplet for 256 threads
31-
#define KTASKS_PER_BLOCK 16
29+
#define PAGES_PER_BLOCK 1048576
30+
// approximately, 15000 images in each ijk shell triplet for 256 threads
31+
#define KTASKS_PER_BLOCK 8
3232

3333
typedef struct {
3434
int pair_ij;
@@ -44,8 +44,6 @@ __device__ __forceinline__ unsigned get_smid() {
4444
return smid;
4545
}
4646

47-
#define allocate_page() (page_pool + atomicAdd(&num_pages, 1))
48-
4947
__device__ __forceinline__
5048
void _filter_images(int& num_pages, // is stored in shm
5149
ImgIdxPage *page_pool, PBCIntEnvVars &envs,
@@ -163,7 +161,12 @@ void _filter_images(int& num_pages, // is stored in shm
163161
if (page != NULL) {
164162
page->nimgs = PAGE_SIZE;
165163
}
166-
page = allocate_page();
164+
int page_offset = atomicAdd(&num_pages, 1);
165+
if (page_offset >= PAGES_PER_BLOCK) {
166+
printf("Page overflow\n");
167+
__trap();
168+
}
169+
page = page_pool + page_offset;
167170
page->pair_ij = pair_ij;
168171
page->ksh = ksh;
169172
counts = 0;
@@ -204,7 +207,6 @@ void pbc_int3c2e_latsum23_kernel(double *out, PBCIntEnvVars envs, PBCInt3c2eBoun
204207
int nksh_per_block = k_per_block * KTASKS_PER_BLOCK;
205208
int kcount0 = ksh_block_id * nksh_per_block;
206209
int kcount1 = min(ncells*bounds.nksh, kcount0 + nksh_per_block);
207-
int nimgs = envs.nimgs;
208210

209211
int li = bounds.li;
210212
int lj = bounds.lj;
@@ -248,7 +250,6 @@ void pbc_int3c2e_latsum23_kernel(double *out, PBCIntEnvVars envs, PBCInt3c2eBoun
248250
kcount0, kcount1, log_cutoff);
249251
__syncthreads();
250252
if (num_pages >= PAGES_PER_BLOCK) {
251-
printf("Page overflow\n");
252253
__trap();
253254
}
254255
__shared__ int img_max;

gpu4pyscf/pbc/df/int3c2e.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
L_AUX_MAX = 6
5757
GOUT_WIDTH = 54
5858
THREADS = 256
59-
PAGES_PER_BLOCK = 524288
59+
PAGES_PER_BLOCK = 1048576
6060
PAGE_SIZE = 32 * 4 # Bytes
6161

6262
def sr_aux_e2(cell, auxcell, omega, kpts=None, bvk_kmesh=None, j_only=False):

gpu4pyscf/pbc/df/rsdf_builder.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,11 @@ def eigenvalue_decomposed_metric(j2c, linear_dep_threshold=LINEAR_DEP_THR):
332332
v1 = v[:,mask]
333333
v1 *= w[mask]**-.5
334334
j2c = v1
335-
idx = cp.where(w < -linear_dep_threshold)[0]
335+
# linear_dep_threshold for negative eigenvalues are too tight. Small errors
336+
# in 2c2e metric would lead to small negative eigenvalues. They can be
337+
# safely filtered.
338+
#idx = cp.where(w < -linear_dep_threshold)[0]
339+
idx = cp.where(w < -1e-4)[0]
336340
j2c_negative = None
337341
if len(idx) > 0:
338342
j2c_negative = (v[:,idx] * (-w[idx])**-.5)

gpu4pyscf/pbc/df/tests/test_rsdf_builder.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,3 +638,37 @@ def test_2c2e():
638638
dat = rsdf_builder._get_2c2e(cell, kpts, omega, with_long_range=True)
639639
ref = _get_2c2e_slow(cell, kpts, omega, with_long_range=True)
640640
assert abs(dat - cp.asarray(ref)).max() < 1e-10
641+
642+
def test_kpts_compressed_linear_dep():
643+
from pyscf.pbc.df import df as df_cpu
644+
cell = pyscf.M(
645+
atom='''
646+
C 0.0 0.0 0.0
647+
C 0.0 1.8 1.8
648+
C 1.8 0.0 1.8
649+
C 1.8 1.8 0.0''', a=np.eye(3) * 3.6,
650+
basis=[[0, [4., 1.]],
651+
[0, [.1, 1.]],
652+
[0, [.035, 1.]]])
653+
auxcell = df_cpu.make_auxcell(cell)
654+
nao = cell.nao
655+
kmesh = [2, 1, 1]
656+
kpts = cell.make_kpts(kmesh)
657+
dat, dat_neg, idx = rsdf_builder.compressed_cderi_kk(
658+
cell, auxcell, kpts=kpts)
659+
ref = build_cderi(cell, auxcell, kpts)[0]
660+
kk_conserv = k2gamma.double_translation_indices(kmesh)
661+
bvkmesh_Ls = k2gamma.translation_vectors_for_kmesh(cell, kmesh, True)
662+
expLk = cp.exp(1j*cp.asarray(bvkmesh_Ls.dot(kpts.T)))
663+
for kp in sorted(dat):
664+
out = rsdf_builder.unpack_cderi(dat[kp], idx, kp, kk_conserv, expLk, nao)
665+
ki_idx, kj_idx = np.where(kk_conserv == kp)
666+
for ki, kj in zip(ki_idx, kj_idx):
667+
if (ki, kj) in ref:
668+
_ref = ref[ki, kj]
669+
else:
670+
_ref = ref[kj, ki].conj().transpose(0,2,1)
671+
_ref = np.einsum('pij,plk->ijkl', _ref, _ref.conj(), optimize=True)
672+
_dat = np.einsum('pij,plk->ijkl', out[ki], out[ki].conj(), optimize=True)
673+
print(ki, kj)
674+
assert abs(_ref - _dat).max() < 3e-7

0 commit comments

Comments
 (0)