Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions gpu4pyscf/lib/gvhf-md/md_contract_j.cu
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ int md_j_4dm_unrolled(RysIntEnvVars *envs, JKMatrix *jk, MDBoundsInfo *bounds, d

extern "C" {
int MD_build_j(double *vj, double *dm, int n_dm, int dm_size,
RysIntEnvVars envs, int *scheme, int *shls_slice,
RysIntEnvVars *envs, int *scheme, int *shls_slice,
int npairs_ij, int npairs_kl,
int *pair_ij_mapping, int *pair_kl_mapping,
int *pair_ij_loc, int *pair_kl_loc,
Expand Down Expand Up @@ -935,23 +935,23 @@ int MD_build_j(double *vj, double *dm, int n_dm, int dm_size,
pRt2_kl_ij += offset_for_Rt2_idx(lij, lkl);
efg_phase += offset_for_Rt2_idx(0, lkl);
if (n_dm == 1) {
if (!md_j_unrolled(&envs, &jk, &bounds, omega)) {
if (!md_j_unrolled(envs, &jk, &bounds, omega)) {
bounds.qd_ij_max = qd_ij_max + qd_offset_for_threads(npairs_ij, threads_ij);
bounds.qd_kl_max = qd_kl_max + qd_offset_for_threads(npairs_kl, threads_kl);
md_j_1dm_kernel<<<blocks, threads, buflen>>>(
envs, jk, bounds, threads_ij, threads_kl, tilex, tiley,
*envs, jk, bounds, threads_ij, threads_kl, tilex, tiley,
pRt2_kl_ij, efg_phase);
}
} else {
if (!md_j_4dm_unrolled(&envs, &jk, &bounds, omega, dm_size)) {
if (!md_j_4dm_unrolled(envs, &jk, &bounds, omega, dm_size)) {
bounds.qd_ij_max = qd_ij_max + qd_offset_for_threads(npairs_ij, threads_ij);
bounds.qd_kl_max = qd_kl_max + qd_offset_for_threads(npairs_kl, threads_kl);
for (int dm_offset = 0; dm_offset < n_dm; dm_offset+=4) {
jk.vj = vj + dm_offset * dm_size;
jk.dm = dm + dm_offset * dm_size;
jk.n_dm = n_dm - dm_offset;
md_j_4dm_kernel<<<blocks, threads, buflen>>>(
envs, jk, bounds, threads_ij, threads_kl, tilex, tiley, dm_size,
*envs, jk, bounds, threads_ij, threads_kl, tilex, tiley, dm_size,
pRt2_kl_ij, efg_phase);
}
}
Expand Down
6 changes: 3 additions & 3 deletions gpu4pyscf/lib/gvhf-md/pbc_md_contract_j.cu
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ void pbc_md_j_kernel(RysIntEnvVars envs, JKMatrix jmat, MDBoundsInfo bounds,
extern "C" {
int PBC_build_j(double *vj, double *dm, int n_dm,
int dm_xyz_size, int nimgs_uniq_pair,
RysIntEnvVars envs, int *scheme, int *shls_slice,
RysIntEnvVars *envs, int *scheme, int *shls_slice,
int npairs_ij, int npairs_kl,
int *pair_ij_mapping, int *pair_kl_mapping,
int *pair_ij_loc, int *pair_kl_loc,
Expand Down Expand Up @@ -386,11 +386,11 @@ int PBC_build_j(double *vj, double *dm, int n_dm,
int dm_size = dm_xyz_size * nimgs_uniq_pair;
for (int i_dm = 0; i_dm < n_dm; ++i_dm) {
JKMatrix jmat = {vj+i_dm*dm_size, NULL, dm+i_dm*dm_size, n_dm, 0, omega};
if (1){//!pbc_md_j_unrolled(&envs, &jmat, &bounds, omega)) {
if (1){//!pbc_md_j_unrolled(envs, &jmat, &bounds, omega)) {
bounds.qd_ij_max = qd_ij_max + qd_offset_for_threads(npairs_ij, threads_ij);
bounds.qd_kl_max = qd_kl_max + qd_offset_for_threads(npairs_kl, threads_kl);
pbc_md_j_kernel<<<blocks, threads, buflen>>>(
envs, jmat, bounds, threads_ij, threads_kl, tilex, tiley,
*envs, jmat, bounds, threads_ij, threads_kl, tilex, tiley,
pRt2_kl_ij, efg_phase);
}
}
Expand Down
8 changes: 4 additions & 4 deletions gpu4pyscf/lib/gvhf-rys/rys_contract_j.cu
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ extern int rys_j_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds,

extern "C" {
int RYS_build_j(double *vj, double *dm, int n_dm, int nao,
RysIntEnvVars envs, int *scheme, int *shls_slice,
RysIntEnvVars *envs, int *scheme, int *shls_slice,
int npairs_ij, int npairs_kl, int *pair_ij_mapping, int *pair_kl_mapping,
float *q_cond, float *s_estimator, float *dm_cond, float cutoff,
int *pool, int *atm, int natm, int *bas, int nbas, double *env)
Expand Down Expand Up @@ -834,7 +834,7 @@ int RYS_build_j(double *vj, double *dm, int n_dm, int nao,

JKMatrix jk = {vj, NULL, dm, n_dm, 0, omega};

if (!rys_j_unrolled(&envs, &jk, &bounds, pool)) {
if (!rys_j_unrolled(envs, &jk, &bounds, pool)) {
int quartets_per_block = scheme[0];
int gout_stride = scheme[1];
int with_gout = scheme[2];
Expand All @@ -846,11 +846,11 @@ int RYS_build_j(double *vj, double *dm, int n_dm, int nao,
int buflen = (nroots*2 + g_size*3 + 6) * quartets_per_block + iprim*jprim;
if (with_gout) {
buflen += nf3_ij*nf3_kl * quartets_per_block;
rys_j_with_gout_kernel<<<npairs_ij, threads, buflen*sizeof(double)>>>(envs, jk, bounds, pool);
rys_j_with_gout_kernel<<<npairs_ij, threads, buflen*sizeof(double)>>>(*envs, jk, bounds, pool);
} else {
buflen += (nf3_ij+nf3_kl*2+(lij+1)*(lkl+1)*(nmax+2)) * quartets_per_block;
buflen += nf3_ij; // dm_ij_cache
rys_j_kernel<<<npairs_ij, threads, buflen*sizeof(double)>>>(envs, jk, bounds, pool);
rys_j_kernel<<<npairs_ij, threads, buflen*sizeof(double)>>>(*envs, jk, bounds, pool);
}
}
cudaError_t err = cudaGetLastError();
Expand Down
10 changes: 5 additions & 5 deletions gpu4pyscf/lib/gvhf-rys/rys_contract_jk.cu
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ extern int rys_jk_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds

extern "C" {
int RYS_build_jk(double *vj, double *vk, double *dm, int n_dm, int nao,
RysIntEnvVars envs, int *shls_slice, int shm_size,
RysIntEnvVars *envs, int *shls_slice, int shm_size,
int npairs_ij, int npairs_kl,
uint32_t *pair_ij_mapping, uint32_t *pair_kl_mapping,
float *q_cond, float *s_estimator, float *dm_cond, float cutoff,
Expand Down Expand Up @@ -567,7 +567,7 @@ int RYS_build_jk(double *vj, double *vk, double *dm, int n_dm, int nao,
ntiles_i, ntiles_j, ntiles_k, ntiles_l};

JKMatrix jk = {vj, vk, dm, n_dm, 0, omega};
if (!rys_jk_unrolled(&envs, &jk, &bounds, pool)) {
if (!rys_jk_unrolled(envs, &jk, &bounds, pool)) {
GXYZOffset* p_gxyz_offset = RYS_make_gxyz_offset(bounds);
int gout_pattern = (((li == 0) >> 3) |
((lj == 0) >> 2) |
Expand All @@ -579,7 +579,7 @@ int RYS_build_jk(double *vj, double *vk, double *dm, int n_dm, int nao,
int reserved_shm_size = (buflen - cart_idx_size*4)/8;

rys_jk_kernel<<<npairs_ij, threads, buflen>>>(
envs, jk, bounds, pool, p_gxyz_offset,
*envs, jk, bounds, pool, p_gxyz_offset,
gout_pattern, reserved_shm_size);

int n_tiles = ntiles_i * ntiles_j * ntiles_k * ntiles_l;
Expand All @@ -588,7 +588,7 @@ int RYS_build_jk(double *vj, double *vk, double *dm, int n_dm, int nao,
min(256, n_tiles-256));
int reserved_shm_size = (buflen - cart_idx_size*4)/8;
rys_jk_kernel<<<npairs_ij, threads, buflen>>>(
envs, jk, bounds, pool, p_gxyz_offset+256,
*envs, jk, bounds, pool, p_gxyz_offset+256,
gout_pattern, reserved_shm_size);
}

Expand All @@ -597,7 +597,7 @@ int RYS_build_jk(double *vj, double *vk, double *dm, int n_dm, int nao,
min(256, n_tiles-512));
int reserved_shm_size = (buflen - cart_idx_size*4)/8;
rys_jk_kernel<<<npairs_ij, threads, buflen>>>(
envs, jk, bounds, pool, p_gxyz_offset+512,
*envs, jk, bounds, pool, p_gxyz_offset+512,
gout_pattern, reserved_shm_size);
}
}
Expand Down
10 changes: 5 additions & 5 deletions gpu4pyscf/lib/gvhf-rys/rys_contract_k.cu
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ extern int rys_k_unrolled(RysIntEnvVars *envs, JKMatrix *kmat, BoundsInfo *bound

extern "C" {
int RYS_build_k(double *vk, double *dm, int n_dm, int nao,
RysIntEnvVars envs, int *shls_slice, int shm_size,
RysIntEnvVars *envs, int *shls_slice, int shm_size,
int npairs_ij, int npairs_kl,
uint32_t *pair_ij_mapping, uint32_t *pair_kl_mapping,
float *q_cond, float *s_estimator, float *dm_cond, float cutoff,
Expand Down Expand Up @@ -664,7 +664,7 @@ int RYS_build_k(double *vk, double *dm, int n_dm, int nao,
kmat.sr_factor = 1;
}

if (!rys_k_unrolled(&envs, &kmat, &bounds, pool)) {
if (!rys_k_unrolled(envs, &kmat, &bounds, pool)) {
GXYZOffset* p_gxyz_offset = RYS_make_gxyz_offset(bounds);
int gout_pattern = (((li == 0) >> 3) |
((lj == 0) >> 2) |
Expand All @@ -676,7 +676,7 @@ int RYS_build_k(double *vk, double *dm, int n_dm, int nao,
int reserved_shm_size = (buflen - cart_idx_size*4)/8;

rys_k_kernel<<<npairs_ij, threads, buflen>>>(
envs, kmat, bounds, pool, p_gxyz_offset,
*envs, kmat, bounds, pool, p_gxyz_offset,
gout_pattern, reserved_shm_size);

int n_tiles = ntiles_i * ntiles_j * ntiles_k * ntiles_l;
Expand All @@ -685,7 +685,7 @@ int RYS_build_k(double *vk, double *dm, int n_dm, int nao,
min(256, n_tiles-256));
int reserved_shm_size = (buflen - cart_idx_size*4)/8;
rys_k_kernel<<<npairs_ij, threads, buflen>>>(
envs, kmat, bounds, pool, p_gxyz_offset+256,
*envs, kmat, bounds, pool, p_gxyz_offset+256,
gout_pattern, reserved_shm_size);
}

Expand All @@ -694,7 +694,7 @@ int RYS_build_k(double *vk, double *dm, int n_dm, int nao,
min(256, n_tiles-512));
int reserved_shm_size = (buflen - cart_idx_size*4)/8;
rys_k_kernel<<<npairs_ij, threads, buflen>>>(
envs, kmat, bounds, pool, p_gxyz_offset+512,
*envs, kmat, bounds, pool, p_gxyz_offset+512,
gout_pattern, reserved_shm_size);
}
}
Expand Down
Loading
Loading