diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 3736e2d39..aa248821a 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -188,6 +188,15 @@ set_target_properties (clib_dsrg PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_NAME "dsrg") +# Build the LNO library +set (LNO_SOURCE_FILES "lno/ccsd_t.c") +add_library (clib_lno SHARED ${LNO_SOURCE_FILES}) +target_link_libraries (clib_lno cc ao2mo cvhf np_helper ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) +set_target_properties (clib_lno PROPERTIES + CLEAN_DIRECT_OUTPUT 1 + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_NAME "lno") + # Build the CSFstring shared library set (CSF_SOURCE_FILES "csf/csfstring.c") add_library (clib_csf SHARED ${CSF_SOURCE_FILES}) diff --git a/pyscf/lib/lno/ccsd_t.c b/pyscf/lib/lno/ccsd_t.c new file mode 100644 index 000000000..1131fa0dd --- /dev/null +++ b/pyscf/lib/lno/ccsd_t.c @@ -0,0 +1,616 @@ +/* Copyright 2014-2018 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Hong-Zhou Ye + */ + +#include +#include +#include +#include +#include "config.h" +#include "np_helper/np_helper.h" +#include "vhf/fblas.h" + +typedef struct { + void *cache[6]; + short a; + short b; + short c; + short _padding; +} CacheJob; + +size_t _ccsd_t_gen_jobs(CacheJob *jobs, int nocc, int nvir, + int a0, int a1, int b0, int b1, + void *cache_row_a, void *cache_col_a, + void *cache_row_b, void *cache_col_b, size_t stride); + +void _make_permute_indices(int *idx, int n); + +/* FIXME: + reuse functions such as `_ccsd_t_gen_jobs` from pyscf/lib/cc/ccsd_t.c +*/ + + + +/* For LNO-CCSD(T) + + CCSD(T) energy normalized to a set of localized orbitals (LOs). + The canonical (T) energy + E = 1/3 * \sum_{ijk,abc} (4 W_{ijk,abc} + W_{ijk,bca} + W_{ijk,cab}) * + (V_{ijk,abc} - V_{ijk,cba}) / D_{ijk,abc} + is first rewritten using w = W / sqrt(D), v = V / sqrt(D) + E = 1/3 * \sum_{ijk,abc} (4 w_{ijk,abc} + w_{ijk,bca} + w_{ijk,cab}) * + (v_{ijk,abc} - v_{ijk,cba}) + and then rewritten using 6-fold permutational symmetry and ij being dummy variables + E = 1/3 * \sum_{a>=b>=c} \sum_{ijk} + w[ijk] * ( +8*v[ijk] -5*v[ikj] -2*v[jik] +2*v[jki] +2*v[kij] -5*v[kji] ) + + w[ikj] * ( -5*v[ijk] +8*v[ikj] +2*v[jik] -2*v[jki] -5*v[kij] +2*v[kji] ) + + w[kij] * ( +2*v[ijk] -5*v[ikj] -5*v[jik] +2*v[jki] +8*v[kij] -2*v[kji] ) + (we omitted abc indices above, which are "abc" for all of them). + Finally, the local (T) energy for a localized orbital K is + E^K = 1/3 * \sum_{a>=b>=c} * \sum_{ij} + w[ijK] * ( +8*v[ijK] -5*v[iKj] -2*v[jiK] +2*v[jKi] +2*v[Kij] -5*v[Kji] ) + + w[iKj] * ( -5*v[ijK] +8*v[iKj] +2*v[jiK] -2*v[jKi] -5*v[Kij] +2*v[Kji] ) + + w[Kij] * ( +2*v[ijK] -5*v[iKj] -5*v[jiK] +2*v[jKi] +8*v[Kij] -2*v[Kji] ) + where + w_{ijK,abc} = \sum_{k} ulo_{Kk} * w_{ijk,abc} + v_{ijK,abc} = \sum_{k} ulo_{Kk} * v_{ijk,abc} +*/ + +// copied from pyscf/lib/cc/ccsd_t.c +static void get_wv(double *w, double *v, double *cache, + double *fvohalf, double *vooo, + double *vv_op, double *t1Thalf, double *t2T, + int nocc, int nvir, int a, int b, int c, int *idx) +{ + const double D0 = 0; + const double D1 = 1; + const double DN1 =-1; + const char TRANS_N = 'N'; + const int nmo = nocc + nvir; + const int noo = nocc * nocc; + const size_t nooo = nocc * noo; + const size_t nvoo = nvir * noo; + int i, j, k, n; + double *pt2T; + + dgemm_(&TRANS_N, &TRANS_N, &noo, &nocc, &nvir, + &D1, t2T+c*nvoo, &noo, vv_op+nocc, &nmo, + &D0, cache, &noo); + dgemm_(&TRANS_N, &TRANS_N, &nocc, &noo, &nocc, + &DN1, t2T+c*nvoo+b*noo, &nocc, vooo+a*nooo, &nocc, + &D1, cache, &nocc); + + pt2T = t2T + b * nvoo + a * noo; + for (n = 0, i = 0; i < nocc; i++) { + for (j = 0; j < nocc; j++) { + for (k = 0; k < nocc; k++, n++) { + w[idx[n]] += cache[n]; + v[idx[n]] +=(vv_op[i*nmo+j] * t1Thalf[c*nocc+k] + + pt2T[i*nocc+j] * fvohalf[c*nocc+k]); + } } } +} + +static void sym_wv(double *w, double *v, double *cache, + double *fvohalf, double *vooo, + double *vv_op, double *t1Thalf, double *t2T, + int nocc, int nvir, int a, int b, int c, int nirrep, + int *o_ir_loc, int *v_ir_loc, int *oo_ir_loc, int *orbsym, + int *idx) +{ + const double D0 = 0; + const double D1 = 1; + const char TRANS_N = 'N'; + const int nmo = nocc + nvir; + const int noo = nocc * nocc; + const size_t nooo = nocc * noo; + const size_t nvoo = nvir * noo; + int a_irrep = orbsym[nocc+a]; + int b_irrep = orbsym[nocc+b]; + int c_irrep = orbsym[nocc+c]; + int ab_irrep = a_irrep ^ b_irrep; + int bc_irrep = c_irrep ^ b_irrep; + int i, j, k, n; + int fr, f0, f1, df, mr, m0, m1, dm, mk0; + int ir, i0, i1, di, kr, k0, k1, dk, jr; + int ijr, ij0, ij1, dij, jkr, jk0, jk1, djk; + double *pt2T; + +/* symmetry adapted + * w = numpy.einsum('if,fjk->ijk', ov, t2T[c]) */ + pt2T = t2T + c * nvoo; + for (ir = 0; ir < nirrep; ir++) { + i0 = o_ir_loc[ir]; + i1 = o_ir_loc[ir+1]; + di = i1 - i0; + if (di > 0) { + fr = ir ^ ab_irrep; + f0 = v_ir_loc[fr]; + f1 = v_ir_loc[fr+1]; + df = f1 - f0; + if (df > 0) { + jkr = fr ^ c_irrep; + jk0 = oo_ir_loc[jkr]; + jk1 = oo_ir_loc[jkr+1]; + djk = jk1 - jk0; + if (djk > 0) { + + dgemm_(&TRANS_N, &TRANS_N, &djk, &di, &df, + &D1, pt2T+f0*noo+jk0, &noo, vv_op+i0*nmo+nocc+f0, &nmo, + &D0, cache, &djk); + for (n = 0, i = o_ir_loc[ir]; i < o_ir_loc[ir+1]; i++) { + for (jr = 0; jr < nirrep; jr++) { + kr = jkr ^ jr; + for (j = o_ir_loc[jr]; j < o_ir_loc[jr+1]; j++) { + for (k = o_ir_loc[kr]; k < o_ir_loc[kr+1]; k++, n++) { + w[idx[i*noo+j*nocc+k]] += cache[n]; + } } + } } + } + } + } + } + +/* symmetry adapted + * w-= numpy.einsum('ijm,mk->ijk', eris_vooo[a], t2T[c,b]) */ + pt2T = t2T + c * nvoo + b * noo; + vooo += a * nooo; + mk0 = oo_ir_loc[bc_irrep]; + for (mr = 0; mr < nirrep; mr++) { + m0 = o_ir_loc[mr]; + m1 = o_ir_loc[mr+1]; + dm = m1 - m0; + if (dm > 0) { + kr = mr ^ bc_irrep; + k0 = o_ir_loc[kr]; + k1 = o_ir_loc[kr+1]; + dk = k1 - k0; + if (dk > 0) { + ijr = mr ^ a_irrep; + ij0 = oo_ir_loc[ijr]; + ij1 = oo_ir_loc[ijr+1]; + dij = ij1 - ij0; + if (dij > 0) { + + dgemm_(&TRANS_N, &TRANS_N, &dk, &dij, &dm, + &D1, pt2T+mk0, &dk, vooo+ij0*nocc+m0, &nocc, + &D0, cache, &dk); + for (n = 0, ir = 0; ir < nirrep; ir++) { + jr = ijr ^ ir; + for (i = o_ir_loc[ir]; i < o_ir_loc[ir+1]; i++) { + for (j = o_ir_loc[jr]; j < o_ir_loc[jr+1]; j++) { + for (k = o_ir_loc[kr]; k < o_ir_loc[kr+1]; k++, n++) { + w[idx[i*noo+j*nocc+k]] -= cache[n]; + } } + } } + } + mk0 += dm * dk; + } + } + } + + pt2T = t2T + b * nvoo + a * noo; + for (n = 0, i = 0; i < nocc; i++) { + for (j = 0; j < nocc; j++) { + for (k = 0; k < nocc; k++, n++) { + v[idx[n]] +=(vv_op[i*nmo+j] * t1Thalf[c*nocc+k] + + pt2T[i*nocc+j] * fvohalf[c*nocc+k]); + } } } +} + +static void zget_wv(double complex *w, double complex *v, + double complex *cache, double complex *fvohalf, + double complex *vooo, double complex *vv_op, + double complex *t1Thalf, double complex *t2T, + int nocc, int nvir, int a, int b, int c, int *idx) +{ + const double complex D0 = 0; + const double complex D1 = 1; + const double complex DN1 =-1; + const char TRANS_N = 'N'; + const int nmo = nocc + nvir; + const int noo = nocc * nocc; + const size_t nooo = nocc * noo; + const size_t nvoo = nvir * noo; + int i, j, k, n; + double complex *pt2T; + + zgemm_(&TRANS_N, &TRANS_N, &noo, &nocc, &nvir, + &D1, t2T+c*nvoo, &noo, vv_op+nocc, &nmo, + &D0, cache, &noo); + zgemm_(&TRANS_N, &TRANS_N, &nocc, &noo, &nocc, + &DN1, t2T+c*nvoo+b*noo, &nocc, vooo+a*nooo, &nocc, + &D1, cache, &nocc); + + pt2T = t2T + b * nvoo + a * noo; + for (n = 0, i = 0; i < nocc; i++) { + for (j = 0; j < nocc; j++) { + for (k = 0; k < nocc; k++, n++) { + w[idx[n]] += cache[n]; + v[idx[n]] +=(vv_op[i*nmo+j] * t1Thalf[c*nocc+k] + + pt2T[i*nocc+j] * fvohalf[c*nocc+k]); + } } } +} +// end copy + +static double _ccsd_t_get_energy_lo(double *w, double *v, double *mo_energy, + double *cache, double *ulo, int nlo, // <--- extra args + int nocc, int a, int b, int c, double fac) +{ + int i, j, k, mu; + double abc = mo_energy[nocc+a] + mo_energy[nocc+b] + mo_energy[nocc+c]; + double *tij1 = cache; + double *tij2 = tij1 + nocc; + double *tij3 = tij2 + nocc; + double *vij1 = tij3 + nocc; + double *vij2 = vij1 + nocc; + double *vij3 = vij2 + nocc; + double t3lo, vlo; + double et = 0; + + int ijk, ikj, jik, jki, kij, kji; + int n = nocc; + int nn = n * n; + double denom; + double *ulo_mu; + + for (i = 0; i < nocc; i++) { + for (j = 0; j < nocc; j++) { + for (k = 0; k < nocc; k++) { + ijk = i*nn + j*n + k; + ikj = i*nn + k*n + j; + jik = j*nn + i*n + k; + jki = j*nn + k*n + i; + kij = k*nn + i*n + j; + kji = k*nn + j*n + i; + denom = abc - (mo_energy[i] + mo_energy[j] + mo_energy[k]); + denom = 1/sqrt(denom); + tij1[k] = w[ijk] * denom; + tij2[k] = w[ikj] * denom; + tij3[k] = w[kij] * denom; + vij1[k] = (+8*v[ijk] -5*v[ikj] -2*v[jik] +2*v[jki] +2*v[kij] -5*v[kji]) * denom; + vij2[k] = (-5*v[ijk] +8*v[ikj] +2*v[jik] -2*v[jki] -5*v[kij] +2*v[kji]) * denom; + vij3[k] = (+2*v[ijk] -5*v[ikj] -5*v[jik] +2*v[jki] +8*v[kij] -2*v[kji]) * denom; + } + for (mu = 0; mu < nlo; mu++) { + ulo_mu = ulo + mu*nocc; + + t3lo = vlo = 0.; + for (k = 0; k < nocc; k++) { + t3lo += tij1[k] * ulo_mu[k]; + vlo += vij1[k] * ulo_mu[k]; + } + et += t3lo * vlo; + + t3lo = vlo = 0.; + for (k = 0; k < nocc; k++) { + t3lo += tij2[k] * ulo_mu[k]; + vlo += vij2[k] * ulo_mu[k]; + } + et += t3lo * vlo; + + t3lo = vlo = 0.; + for (k = 0; k < nocc; k++) { + t3lo += tij3[k] * ulo_mu[k]; + vlo += vij3[k] * ulo_mu[k]; + } + et += t3lo * vlo; + } + } + } + et *= - fac / 6.; + + return et; +} +static double contract6_lo(int nocc, int nvir, int a, int b, int c, + double *mo_energy, double *t1T, double *t2T, + double *ulo, int nlo, // <--- extra args + int nirrep, int *o_ir_loc, int *v_ir_loc, + int *oo_ir_loc, int *orbsym, double *fvo, + double *vooo, double *cache1, void **cache, + int *permute_idx, double fac) +{ + int nooo = nocc * nocc * nocc; + int *idx0 = permute_idx; + int *idx1 = idx0 + nooo; + int *idx2 = idx1 + nooo; + int *idx3 = idx2 + nooo; + int *idx4 = idx3 + nooo; + int *idx5 = idx4 + nooo; + double *v0 = cache1; + double *w0 = v0 + nooo; + double *z0 = w0 + nooo; + double *cache2 = z0 + nooo; + double *wtmp = z0; + int i; + + for (i = 0; i < nooo; i++) { + w0[i] = 0; + v0[i] = 0; + } + + if (nirrep == 1) { + get_wv(w0, v0, wtmp, fvo, vooo, cache[0], t1T, t2T, nocc, nvir, a, b, c, idx0); + get_wv(w0, v0, wtmp, fvo, vooo, cache[1], t1T, t2T, nocc, nvir, a, c, b, idx1); + get_wv(w0, v0, wtmp, fvo, vooo, cache[2], t1T, t2T, nocc, nvir, b, a, c, idx2); + get_wv(w0, v0, wtmp, fvo, vooo, cache[3], t1T, t2T, nocc, nvir, b, c, a, idx3); + get_wv(w0, v0, wtmp, fvo, vooo, cache[4], t1T, t2T, nocc, nvir, c, a, b, idx4); + get_wv(w0, v0, wtmp, fvo, vooo, cache[5], t1T, t2T, nocc, nvir, c, b, a, idx5); + } else { + sym_wv(w0, v0, wtmp, fvo, vooo, cache[0], t1T, t2T, nocc, nvir, a, b, c, + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, idx0); + sym_wv(w0, v0, wtmp, fvo, vooo, cache[1], t1T, t2T, nocc, nvir, a, c, b, + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, idx1); + sym_wv(w0, v0, wtmp, fvo, vooo, cache[2], t1T, t2T, nocc, nvir, b, a, c, + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, idx2); + sym_wv(w0, v0, wtmp, fvo, vooo, cache[3], t1T, t2T, nocc, nvir, b, c, a, + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, idx3); + sym_wv(w0, v0, wtmp, fvo, vooo, cache[4], t1T, t2T, nocc, nvir, c, a, b, + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, idx4); + sym_wv(w0, v0, wtmp, fvo, vooo, cache[5], t1T, t2T, nocc, nvir, c, b, a, + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, idx5); + } + for (i = 0; i < nooo; i++) { + v0[i] += w0[i]; + } + + double et; + if (a == c) { + et = _ccsd_t_get_energy_lo(w0, v0, mo_energy, + cache2, ulo, nlo, // <--- extra args + nocc, a, b, c, 1./6); + } else if (a == b || b == c) { + et = _ccsd_t_get_energy_lo(w0, v0, mo_energy, + cache2, ulo, nlo, // <--- extra args + nocc, a, b, c, .5); + } else { + et = _ccsd_t_get_energy_lo(w0, v0, mo_energy, + cache2, ulo, nlo, // <--- extra args + nocc, a, b, c, 1.); + } + return et; +} +void CCsd_t_contract_lo(double *e_tot, + double *mo_energy, double *t1T, double *t2T, + double *vooo, double *fvo, + double *ulo, int nlo, // <--- extra args + int nocc, int nvir, int a0, int a1, int b0, int b1, + int nirrep, int *o_ir_loc, int *v_ir_loc, + int *oo_ir_loc, int *orbsym, + void *cache_row_a, void *cache_col_a, + void *cache_row_b, void *cache_col_b) +{ + int da = a1 - a0; + int db = b1 - b0; + CacheJob *jobs = malloc(sizeof(CacheJob) * da*db*b1); + size_t njobs = _ccsd_t_gen_jobs(jobs, nocc, nvir, a0, a1, b0, b1, + cache_row_a, cache_col_a, + cache_row_b, cache_col_b, sizeof(double)); + int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); + _make_permute_indices(permute_idx, nocc); +#pragma omp parallel default(none) \ + shared(njobs, nocc, nvir, nlo, ulo, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ + v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx) +{ + int a, b, c; + size_t k; + // extra 6*nocc for tij and vij in :func:`_ccsd_t_get_energy_lo` + double *cache1 = malloc(sizeof(double) * (nocc*nocc*nocc*3+2+6*nocc)); + double *t1Thalf = malloc(sizeof(double) * nvir*nocc * 2); + double *fvohalf = t1Thalf + nvir*nocc; + for (k = 0; k < nvir*nocc; k++) { + t1Thalf[k] = t1T[k] * .5; + fvohalf[k] = fvo[k] * .5; + } + double e = 0; +#pragma omp for schedule (dynamic, 4) + for (k = 0; k < njobs; k++) { + a = jobs[k].a; + b = jobs[k].b; + c = jobs[k].c; + e += contract6_lo(nocc, nvir, a, b, c, mo_energy, t1Thalf, t2T, + ulo, nlo, // <--- extra args + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, + fvohalf, vooo, cache1, jobs[k].cache, permute_idx, + 1.0); + } + free(t1Thalf); + free(cache1); +#pragma omp critical + *e_tot += e; +} + free(permute_idx); + free(jobs); +} + + +static double _ccsd_t_zget_energy_lo(double complex *w, double complex *v, double *mo_energy, + double complex *cache, double complex *ulo, int nlo, + int nocc, int a, int b, int c, double fac) +{ + int i, j, k, mu; + double abc = mo_energy[nocc+a] + mo_energy[nocc+b] + mo_energy[nocc+c]; + double complex *tij1 = cache; + double complex *tij2 = tij1 + nocc; + double complex *tij3 = tij2 + nocc; + double complex *vij1 = tij3 + nocc; + double complex *vij2 = vij1 + nocc; + double complex *vij3 = vij2 + nocc; + double complex t3lo, vlo; + double et = 0; + + int ijk, ikj, jik, jki, kij, kji; + int n = nocc; + int nn = n * n; + double denom; + double complex *ulo_mu; + + for (i = 0; i < nocc; i++) { + for (j = 0; j < nocc; j++) { + for (k = 0; k < nocc; k++) { + ijk = i*nn + j*n + k; + ikj = i*nn + k*n + j; + jik = j*nn + i*n + k; + jki = j*nn + k*n + i; + kij = k*nn + i*n + j; + kji = k*nn + j*n + i; + denom = abc - (mo_energy[i] + mo_energy[j] + mo_energy[k]); + denom = 1/sqrt(denom); + tij1[k] = w[ijk] * denom; + tij2[k] = w[ikj] * denom; + tij3[k] = w[kij] * denom; + vij1[k] = (+8*v[ijk] -5*v[ikj] -2*v[jik] +2*v[jki] +2*v[kij] -5*v[kji]) * denom; + vij2[k] = (-5*v[ijk] +8*v[ikj] +2*v[jik] -2*v[jki] -5*v[kij] +2*v[kji]) * denom; + vij3[k] = (+2*v[ijk] -5*v[ikj] -5*v[jik] +2*v[jki] +8*v[kij] -2*v[kji]) * denom; + } + for (mu = 0; mu < nlo; mu++) { + ulo_mu = ulo + mu*nocc; + + t3lo = vlo = 0; + for (k = 0; k < nocc; k++) { + t3lo += tij1[k] * conj(ulo_mu[k]); + vlo += vij1[k] * conj(ulo_mu[k]); + } + et += t3lo * conj(vlo); + + t3lo = vlo = 0; + for (k = 0; k < nocc; k++) { + t3lo += tij2[k] * conj(ulo_mu[k]); + vlo += vij2[k] * conj(ulo_mu[k]); + } + et += t3lo * conj(vlo); + + t3lo = vlo = 0; + for (k = 0; k < nocc; k++) { + t3lo += tij3[k] * conj(ulo_mu[k]); + vlo += vij3[k] * conj(ulo_mu[k]); + } + et += t3lo * conj(vlo); + } + } + } + et *= - fac / 6.; + + return et; +} +static double complex +zcontract6_lo(int nocc, int nvir, int a, int b, int c, + double *mo_energy, double complex *t1T, double complex *t2T, + double complex *ulo, int nlo, // <--- extra args + int nirrep, int *o_ir_loc, int *v_ir_loc, + int *oo_ir_loc, int *orbsym, double complex *fvo, + double complex *vooo, double complex *cache1, void **cache, + int *permute_idx, double fac) +{ + int nooo = nocc * nocc * nocc; + int *idx0 = permute_idx; + int *idx1 = idx0 + nooo; + int *idx2 = idx1 + nooo; + int *idx3 = idx2 + nooo; + int *idx4 = idx3 + nooo; + int *idx5 = idx4 + nooo; + double complex *v0 = cache1; + double complex *w0 = v0 + nooo; + double complex *z0 = w0 + nooo; + double complex *cache2 = z0 + nooo; + double complex *wtmp = z0; + int i; + + for (i = 0; i < nooo; i++) { + w0[i] = 0; + v0[i] = 0; + } + + zget_wv(w0, v0, wtmp, fvo, vooo, cache[0], t1T, t2T, nocc, nvir, a, b, c, idx0); + zget_wv(w0, v0, wtmp, fvo, vooo, cache[1], t1T, t2T, nocc, nvir, a, c, b, idx1); + zget_wv(w0, v0, wtmp, fvo, vooo, cache[2], t1T, t2T, nocc, nvir, b, a, c, idx2); + zget_wv(w0, v0, wtmp, fvo, vooo, cache[3], t1T, t2T, nocc, nvir, b, c, a, idx3); + zget_wv(w0, v0, wtmp, fvo, vooo, cache[4], t1T, t2T, nocc, nvir, c, a, b, idx4); + zget_wv(w0, v0, wtmp, fvo, vooo, cache[5], t1T, t2T, nocc, nvir, c, b, a, idx5); + for (i = 0; i < nooo; i++) { + v0[i] += w0[i]; + } + + double complex et; + if (a == c) { + et = _ccsd_t_zget_energy_lo(w0, v0, mo_energy, + cache2, ulo, nlo, // <--- extra args + nocc, a, b, c, 1./6); + } else if (a == b || b == c) { + et = _ccsd_t_zget_energy_lo(w0, v0, mo_energy, + cache2, ulo, nlo, // <--- extra args + nocc, a, b, c, .5); + } else { + et = _ccsd_t_zget_energy_lo(w0, v0, mo_energy, + cache2, ulo, nlo, // <--- extra args + nocc, a, b, c, 1.); + } + return et; +} + +void CCsd_t_zcontract_lo(double complex *e_tot, + double *mo_energy, double complex *t1T, double complex *t2T, + double complex *vooo, double complex *fvo, + double complex *ulo, int nlo, // <--- extra args + int nocc, int nvir, int a0, int a1, int b0, int b1, + int nirrep, int *o_ir_loc, int *v_ir_loc, + int *oo_ir_loc, int *orbsym, + void *cache_row_a, void *cache_col_a, + void *cache_row_b, void *cache_col_b) +{ + int da = a1 - a0; + int db = b1 - b0; + CacheJob *jobs = malloc(sizeof(CacheJob) * da*db*b1); + size_t njobs = _ccsd_t_gen_jobs(jobs, nocc, nvir, a0, a1, b0, b1, + cache_row_a, cache_col_a, + cache_row_b, cache_col_b, + sizeof(double complex)); + int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); + _make_permute_indices(permute_idx, nocc); +#pragma omp parallel default(none) \ + shared(njobs, nocc, nvir, nlo, ulo, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ + v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx) +{ + int a, b, c; + size_t k; + // extra 6*nocc for tij and vij in :func:`_ccsd_t_zget_energy_lo` + double complex *cache1 = malloc(sizeof(double complex) * (nocc*nocc*nocc*3+2+6*nocc)); + double complex *t1Thalf = malloc(sizeof(double complex) * nvir*nocc * 2); + double complex *fvohalf = t1Thalf + nvir*nocc; + for (k = 0; k < nvir*nocc; k++) { + t1Thalf[k] = t1T[k] * .5; + fvohalf[k] = fvo[k] * .5; + } + double complex e = 0; +#pragma omp for schedule (dynamic, 4) + for (k = 0; k < njobs; k++) { + a = jobs[k].a; + b = jobs[k].b; + c = jobs[k].c; + e += zcontract6_lo(nocc, nvir, a, b, c, mo_energy, t1Thalf, t2T, + ulo, nlo, // <--- extra args + nirrep, o_ir_loc, v_ir_loc, oo_ir_loc, orbsym, + fvohalf, vooo, cache1, jobs[k].cache, permute_idx, + 1.0); + } + free(t1Thalf); + free(cache1); +#pragma omp critical + *e_tot += e; +} + free(permute_idx); + free(jobs); +} diff --git a/pyscf/lno/__init__.py b/pyscf/lno/__init__.py new file mode 100644 index 000000000..29de391e7 --- /dev/null +++ b/pyscf/lno/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Hong-Zhou Ye +# Chenghan Li +# Xing Zhang +# + +""" +Local Natural Orbital methods +""" +from .lno import LNO +from .lnoccsd import LNOCCSD, LNOCCSD_T +from .ulnoccsd import ULNOCCSD, ULNOCCSD_T diff --git a/pyscf/lno/lno.py b/pyscf/lno/lno.py new file mode 100644 index 000000000..feb9f96ee --- /dev/null +++ b/pyscf/lno/lno.py @@ -0,0 +1,749 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + +''' Generic framework for spin-restricted local natural orbital (LNO)-based methods. + This code can be used to implement LNO-based local correlation approximation to + many correlated wavefunction methods. See `lnoccsd.py` for the implementation of + LNO-CCSD as an example. + + - Original publication by Kállay and co-workers: + Rolik and Kállay, J. Chem. Phys. 135, 104111 (2011) + + - Publication for this implementation by Ye and Berkelbach: + Ye and Berkelbach, J. Chem. Theory Comput. 2024, 20, 20, 8948–8959 +''' + +import sys +import numbers +from collections.abc import Iterable +from functools import reduce + +import numpy as np +import h5py + +from pyscf import mp +from pyscf.lib import logger +from pyscf import lib +from pyscf import __config__ + +from pyscf.lno.make_lno_rdm1 import make_lo_rdm1_occ, make_lo_rdm1_vir + +einsum = lib.einsum + +DEBUG_BLKSIZE = getattr(__config__, 'lnocc_DEBUG_BLKSIZE', False) + + +r''' TODO's +[ ] chkfile / restart +''' + + +def kernel(mlno, lo_coeff, frag_lolist, lno_type, lno_thresh=None, lno_pct_occ=None, + lno_norb=None, eris=None): + r''' Kernel function for LNO-based methods. + + Args: + lo_coeff (np.ndarray): + Column vectors are the AO coefficients for a set of local(ized) orbitals. + These LOs must span at least the occupied space but can span none, part, or + full of the virtual space. Thus, lo_coeff.shape[1] >= nmo. + frag_lolist (list of list): + Fragment definition in terms of the LOs specified by 'lo_coeff'. E.g., + [[0,1,2],[3,5],[4],[6,7,8,9]...] + means + fragment 1 consists of LO 0, 1, and 2, + fragment 2 consists of LO 3 and 5, + fragment 3 consists of LO 4, + fragment 4 consists of LO 6, 7, 8, and 9, + ... + ''' + nfrag = len(frag_lolist) + if lno_pct_occ is None: + lno_pct_occ = [None, None] + if lno_norb is None: + lno_norb = [[None,None]] * nfrag + mf = mlno._scf + + log = logger.new_logger(mlno) + + cput0 = (logger.process_clock(), logger.perf_counter()) + + if eris is None: eris = mlno.ao2mo() + + cput2 = cput1 = (logger.process_clock(), logger.perf_counter()) + +# Loop over fragment + frag_res = [None] * nfrag + for ifrag,loidx in enumerate(frag_lolist): + if len(loidx) == 2 and isinstance(loidx[0], Iterable): # Unrestricted + orbloc = [lo_coeff[0][:,loidx[0]], lo_coeff[1][:,loidx[1]]] + lno_param = [ + [ + { + 'thresh': ( + lno_thresh[i][s] if isinstance(lno_thresh[i], Iterable) + else lno_thresh[i] + ), + 'pct_occ': ( + lno_pct_occ[i][s] if isinstance(lno_pct_occ[i], Iterable) + else lno_pct_occ[i] + ), + 'norb': ( + lno_norb[ifrag][i][s] if isinstance(lno_norb[ifrag][i], Iterable) + else lno_norb[ifrag][i] + ), + } for i in [0, 1] + ] for s in range(2) + ] + + else: + orbloc = lo_coeff[:,loidx] + lno_param = [{'thresh': lno_thresh[i], 'pct_occ': lno_pct_occ[i], + 'norb': lno_norb[ifrag][i]} for i in [0,1]] + + mo_coeff, frozen, uocc_loc, frag_msg = mlno.make_las(eris, orbloc, lno_type, lno_param) + cput2 = log.timer('Fragment %d make las'%(ifrag+1), *cput2) + log.info('Fragment %d/%d LAS: %s', ifrag+1, nfrag, frag_msg) + frag_res[ifrag], frag_msg = mlno.impurity_solve(mf, mo_coeff, uocc_loc, eris, frozen=frozen) + cput2 = log.timer('Fragment %d imp sol '%(ifrag+1), *cput2) + log.info('Fragment %d/%d Sol: %s', ifrag+1, nfrag, frag_msg) + cput1 = log.timer('Fragment %d'%(ifrag+1)+' '*(8-len(str(ifrag+1))), *cput1) + + classname = mlno.__class__.__name__ + cput0 = log.timer(classname+' '*(17-len(classname)), *cput0) + + return frag_res + + +def make_las(mlno, eris, orbloc, lno_type, lno_param): + log = logger.new_logger(mlno) + cput1 = (logger.process_clock(), logger.perf_counter()) + + s1e = mlno.s1e + + orboccfrz_core, orbocc, orbvir, orbvirfrz_core = mlno.split_mo_coeff() + moeocc, moevir = mlno.split_mo_energy()[1:3] + + ''' Projection of LO onto occ and vir + ''' + uocc_loc = reduce(np.dot, (orbloc.T.conj(), s1e, orbocc)) + uocc_loc, uocc_std, uocc_orth = \ + projection_construction(uocc_loc, mlno.lo_proj_thresh, mlno.lo_proj_thresh_active) + if uocc_loc.shape[1] == 0: + log.error('LOs do not overlap with occupied space. This could be caused ' + 'by either a bad fragment choice or too high of `lo_proj_thresh_active` ' + '(current value: %s).', mlno.lo_proj_thresh_active) + raise RuntimeError + log.info('LO occ proj: %d active | %d standby | %d orthogonal', + *[u.shape[1] for u in [uocc_loc,uocc_std,uocc_orth]]) + + uvir_loc = reduce(np.dot, (orbloc.T.conj(), s1e, orbvir)) + uvir_loc, uvir_std, uvir_orth = \ + projection_construction(uvir_loc, mlno.lo_proj_thresh, mlno.lo_proj_thresh_active) + log.info('LO vir proj: %d active | %d standby | %d orthogonal', + *[u.shape[1] for u in [uvir_loc,uvir_std,uvir_orth]]) + if uvir_loc.shape[1] == 0: + uvir_loc = uvir_std = uvir_orth = None + + ''' LNO construction + ''' + dmoo = mlno.make_lo_rdm1_occ(eris, moeocc, moevir, uocc_loc, uvir_loc, lno_type[0]) + if mlno._match_oldcode: dmoo *= 0.5 # TO MATCH OLD LNO CODE + dmoo = reduce(np.dot, (uocc_orth.T.conj(), dmoo, uocc_orth)) + if lno_param[0]['norb'] is not None: + lno_param[0]['norb'] -= uocc_loc.shape[1] + uocc_std.shape[1] + uoccact_orth, uoccfrz_orth = natorb_select(dmoo, uocc_orth, **lno_param[0]) + orboccfrz = np.hstack((orboccfrz_core, np.dot(orbocc, uoccfrz_orth))) + uoccact = subspace_eigh(np.diag(moeocc), np.hstack((uoccact_orth, uocc_std, uocc_loc)))[1] + orboccact = np.dot(orbocc, uoccact) + uoccact_loc = np.linalg.multi_dot((orboccact.T.conj(), s1e, orbloc)) + cput1 = log.timer_debug1('make_lo_rdm1_occ', *cput1) + + dmvv = mlno.make_lo_rdm1_vir(eris, moeocc, moevir, uocc_loc, uvir_loc, lno_type[1]) + if mlno._match_oldcode: dmvv *= 0.5 # TO MATCH OLD LNO CODE + if uvir_orth is not None: + dmvv = reduce(np.dot, (uvir_orth.T.conj(), dmvv, uvir_orth)) + if lno_param[1]['norb'] is not None: + lno_param[1]['norb'] -= uvir_loc.shape[1] + uvir_std.shape[1] + uviract_orth, uvirfrz_orth = natorb_select(dmvv, uvir_orth, **lno_param[1]) + orbvirfrz = np.hstack((np.dot(orbvir, uvirfrz_orth), orbvirfrz_core)) + uviract = subspace_eigh(np.diag(moevir), np.hstack((uviract_orth, uvir_std, uvir_loc)))[1] + orbviract = np.dot(orbvir, uviract) + else: + orbviract, orbvirfrz = natorb_select(dmvv, orbvir, **lno_param[1]) + orbvirfrz = np.hstack((orbvirfrz, orbvirfrz_core)) + uviract = reduce(np.dot, (orbvir.T.conj(), s1e, orbviract)) + uviract = subspace_eigh(np.diag(moevir), uviract)[1] + orbviract = np.dot(orbvir, uviract) + cput1 = log.timer_debug1('make_lo_rdm1_vir', *cput1) + + ''' LAS construction + ''' + orbfragall = [orboccfrz, orboccact, orbviract, orbvirfrz] + orbfrag = np.hstack(orbfragall) + norbfragall = np.asarray([x.shape[1] for x in orbfragall]) + locfragall = np.cumsum([0] + norbfragall.tolist()).astype(int) + frzfrag = np.concatenate(( + np.arange(locfragall[0], locfragall[1]), + np.arange(locfragall[3], locfragall[4]))).astype(int) + frag_msg = '%d/%d Occ | %d/%d Vir | %d/%d MOs' % ( + norbfragall[1], sum(norbfragall[:2]), + norbfragall[2], sum(norbfragall[2:4]), + sum(norbfragall[1:3]), sum(norbfragall) + ) + if len(frzfrag) == 0: + frzfrag = 0 + + return orbfrag, frzfrag, uoccact_loc, frag_msg + +def projection_construction(M, thresh, thresh_act=None): + r''' Given M_{mu,i} = the ovlp between two orthonormal basis, find + the unitary rotation |j'> = u_ij |i> so that {|j'>} significantly ovlp with + {|mu>}. + + Three subsets will be returned: + active : singular value > thresh_act + standby : singular value <= thresh_act but > thresh + frozen : singular value <= thresh + ''' + n, m = M.shape + e, u = np.linalg.eigh(np.dot(M.T.conj(), M)) + if thresh_act is None: thresh_act = thresh + assert( thresh_act >= thresh ) + mask_act = abs(e) > thresh_act + mask_std = np.logical_and(abs(e) > thresh, ~mask_act) + mask_frz = abs(e) <= thresh + return u[:,mask_act], u[:,mask_std], u[:,mask_frz] + +def subspace_eigh(fock, orb): + f = reduce(np.dot, (orb.T.conj(), fock, orb)) + if orb.shape[1] == 1: + moe = np.array([f[0,0]]) + else: + moe, u = np.linalg.eigh(f) + orb = np.dot(orb, u) + return moe, orb + +def natorb_select(dm, orb, thresh, pct_occ=None, norb=None): + e, u = np.linalg.eigh(dm) + e = abs(e) + order = np.argsort(e)[::-1] + e = e[order] + u = u[:,order] + if norb is None: + if pct_occ is None: + nkeep = np.count_nonzero(e > thresh) + else: + nkeep = np.count_nonzero(np.cumsum(e)/np.sum(e) <= pct_occ) + else: + nkeep = min(max(norb, 0), e.size) + + idx = np.arange(0, nkeep, dtype=int) + idxc = np.arange(nkeep, e.size, dtype=int) + orbx = np.dot(orb, u) + orb1x = sub_colspace(orbx, idx) + orb0x = sub_colspace(orbx, idxc) + return orb1x, orb0x + +def sub_colspace(A, idx): + if idx.size == 0: + return np.zeros([A.shape[0],0]) + else: + return A[:,idx] + +def get_fragment_energy(oovv, t2, uloc): + m = np.dot(uloc, uloc.T.conj()) + ed = einsum('ijab,kjab,ik->', t2, oovv, m) * 2 + ex = -einsum('ijab,kjba,ik->', t2, oovv, m) + ed = ed.real + ex = ex.real + ess = ed*0.5 + ex + eos = ed*0.5 + return lib.tag_array(ess+eos, spin_comp=np.array((ess, eos))) + + +class LNO(lib.StreamObject): + + r''' Base class for LNO-based methods + + This base class provides common functions for constructing LNO subspace. + Specific LNO-based methods (e.g., LNO-CCSD, LNO-CCSD(T)) can be implemented as + derived classes from this base class with appropriately defined method + `impurity_solve`. + + Input: + mf (PySCF SCF object): + Mean-field object. + lo_coeff (np.ndarray): + AO coefficient matrix of LOs. LOs must span the occupied space. + frag_lolist (nested list): + Fragment assignment in terms of LO index. E.g., [[0,2], [1], ...] means + frag 1 consists of LO 0 and 2, frag 2 consists of LO 1, etc. + lno_type (len-2 list): + lno_type = [occ_lno_type, vir_lno_type], where 'occ_lno_type' can be + '1h', '1p', or '2p' and 'vir_lno_type' can be '1p', '1h', '2h'. + Default is ['1h','1h']. + lno_thresh (float of len-2 list): + Thresholds for LNO truncation. Use a len-2 list to specify thresh for + occ and vir separately. Default is [1e-5,1e-6]. + frozen (int or list): + Same as the `frozen` attr in MP2/CCSD etc. modules. + ''' + + def __init__(self, mf, lo_coeff, frag_lolist, lno_type=None, lno_thresh=None, frozen=None): + + self.mol = mf.mol + self._scf = mf + if hasattr(self._scf, 'with_df'): + self.with_df = self._scf.with_df + else: + self.with_df = None + self.verbose = self.mol.verbose + self.stdout = self.mol.stdout + self.max_memory = mf.max_memory + + self.lo_coeff = lo_coeff + self.frag_lolist = frag_lolist + self.frozen = frozen + + # for LNO construction + self.lno_type = ['1h','1h'] if lno_type is None else lno_type + self.lno_thresh = [1e-5, 1e-6] if lno_thresh is None else lno_thresh + self.lno_pct_occ = None + self.lno_norb = None + self.lo_proj_thresh = 1e-10 + self.lo_proj_thresh_active = 0.1 + + # extra parameters + self.frag_wghtlist = None + self.verbose_imp = 0 # allow separate verbose level for `impurity_solve` + + # df eri + self._ovL = None + self._ovL_to_save = None + self.force_outcore_ao2mo = False + + # reverse compatibility + self._match_oldcode = False # if True, MP2 dm for LNO generation is multiplied by 0.5 + + # Not input options + self._nmo = None + self._nocc = None + self._s1e = None + + self._mo_occ = None + self._mo_coeff = None + self._mo_energy = None + + @property + def nfrag(self): + return len(self.frag_lolist) + + @property + def s1e(self): + if self._s1e is None: + self._s1e = self._scf.get_ovlp() + return self._s1e + + @property + def mo_occ(self): + if self._mo_occ is None: + return self._scf.mo_occ + else: + return self._mo_occ + @mo_occ.setter + def mo_occ(self, x): + self._mo_occ = x + + @property + def mo_coeff(self): + if self._mo_coeff is None: + return self._scf.mo_coeff + else: + return self._mo_coeff + @mo_coeff.setter + def mo_coeff(self, x): + self._mo_coeff = x + + @property + def mo_energy(self): + if self._mo_energy is None: + return self._scf.mo_energy + else: + return self._mo_energy + @mo_energy.setter + def mo_energy(self, x): + self._mo_energy = x + + def dump_flags(self, verbose=None): + log = logger.new_logger(self, verbose) + log.info('') + log.info('******** %s ********', self.__class__) + log.info('nocc = %s, nmo = %s', self.nocc, self.nmo) + if self.frozen is not None: + log.info('frozen orbitals %s', self.frozen) + log.info('max_memory %d MB (current use %d MB)', + self.max_memory, lib.current_memory()[0]) + try: + nlo = self.lo_coeff.shape[1] + except AttributeError: + nlo = [self.lo_coeff[0].shape[1], self.lo_coeff[1].shape[1]] + log.info('nfrag = %d nlo = %s', self.nfrag, nlo) + log.info('frag_lolist = %s', self.frag_lolist) + log.info('frag_wghtlist = %s', self.frag_wghtlist) + log.info('lno_type = %s', self.lno_type) + log.info('lno_thresh = %s', self.lno_thresh) + log.info('lno_pct_occ = %s', self.lno_pct_occ) + log.info('lno_norb = %s', self.lno_norb) + log.info('lo_proj_thresh = %s', self.lo_proj_thresh) + log.info('lo_proj_thresh_active = %s', self.lo_proj_thresh_active) + log.info('verbose_imp = %s', self.verbose_imp) + log.info('_ovL = %s', self._ovL) + log.info('_ovL_to_save = %s', self._ovL_to_save) + log.info('force_outcore_ao2mo = %s', self.force_outcore_ao2mo) + log.info('_match_oldcode = %s', self._match_oldcode) + return self + + # def mo_splitter(self, kind='mask'): + # r''' Return index arrays that split MOs into + # - frozen occupieds + # - active occupieds + # - active virtuals + # - frozen virtuals + # + # Args: + # kind (str): + # 'mask' : return masks each of length nmo + # 'index' : return index arrays + # 'idx' : same as 'index' + # ''' + # maskact = self.get_frozen_mask() + # maskocc = self.mo_occ > 1e-10 + # return mo_splitter(maskact, maskocc, kind=kind) + # + # def split_mo_coeff(self): + # r''' Return the four components of MOs specified in :func:`mo_splitter` + # ''' + # mo = self.mo_coeff + # masks = self.mo_splitter() + # return [mo[:,m] for m in masks] + # + # def split_mo_energy(self): + # moe = self.mo_energy + # masks = self.mo_splitter() + # return [moe[m] for m in masks] + + def kernel(self, eris=None): + '''The LNO calculation driver. + ''' + self.dump_flags() + + log = logger.new_logger(self) + cput0 = (logger.process_clock(), logger.perf_counter()) + + frag_wghtlist = self.frag_wghtlist + nfrag = self.nfrag + + # frag weights + if frag_wghtlist is None: + frag_wghtlist = np.ones(nfrag) + elif isinstance(frag_wghtlist, numbers.Number): + frag_wghtlist = np.ones(nfrag) * frag_wghtlist + elif isinstance(frag_wghtlist, Iterable): + try: + frag_wghtlist = np.asarray(frag_wghtlist).ravel() + if len(frag_wghtlist) != nfrag: + log.error('Input frag_wghtlist has wrong length (expecting %d; ' + 'got %d).', nfrag, len(frag_wghtlist)) + raise ValueError + except Exception: + raise ValueError + else: + log.error('Input frag_wghtlist has wrong data type (expecting ' + 'array-like; got %s)', type(frag_wghtlist)) + raise ValueError + + # dump info + log.info('Regularized frag_wghtlist = %s', frag_wghtlist) + + log.timer('LO and fragment ', *cput0) + + self._precompute() + + frag_res = kernel(self, self.lo_coeff, self.frag_lolist, + self.lno_type, self.lno_thresh, + self.lno_pct_occ, self.lno_norb, eris=eris) + + self._post_proc(frag_res, frag_wghtlist) + + self._finalize() + + return self.e_corr + + def ao2mo(self): + log = logger.new_logger(self) + + if self.with_df is None: + log.error('DF is not found. Rerun SCF with DF.') + raise NotImplementedError + else: + cput0 = (logger.process_clock(), logger.perf_counter()) + orbocc, orbvir = self.split_mo_coeff()[1:3] + dsize = orbocc.itemsize + nocc = orbocc.shape[1] + nvir = orbvir.shape[1] + # FIXME: more accurate mem estimate + mem_now = self.max_memory - lib.current_memory()[0] + naux = self.with_df.get_naoaux() + mem_df = nocc*nvir*naux*dsize/1024**2. + log.debug('ao2mo est mem= %.2f MB avail mem= %.2f MB', mem_df, mem_now) + if ( (self._ovL_to_save is not None) or (self._ovL is not None) or + self.force_outcore_ao2mo or (mem_df > mem_now*0.5) ): + eris = _LNODFOUTCOREERIS(self.with_df, orbocc, orbvir, self.max_memory, + ovL=self._ovL, ovL_to_save=self._ovL_to_save, + verbose=self.verbose, stdout=self.stdout) + else: + eris = _LNODFINCOREERIS(self.with_df, orbocc, orbvir, self.max_memory, + verbose=self.verbose, stdout=self.stdout) + eris.build() + log.timer('Integral xform ', *cput0) + + return eris + + def make_lo_rdm1_occ(self, eris, moeocc, moevir, uocc_loc, uvir_loc, occ_lno_type): + return make_lo_rdm1_occ(eris, moeocc, moevir, uocc_loc, uvir_loc, occ_lno_type) + + def make_lo_rdm1_vir(self, eris, moeocc, moevir, uocc_loc, uvir_loc, vir_lno_type): + return make_lo_rdm1_vir(eris, moeocc, moevir, uocc_loc, uvir_loc, vir_lno_type) + + def _precompute(self, *args, **kwargs): + pass + + get_frozen_mask = mp.mp2.get_frozen_mask + get_nocc = mp.mp2.get_nocc + get_nmo = mp.mp2.get_nmo + split_mo_coeff = mp.dfmp2.DFMP2.split_mo_coeff + split_mo_energy = mp.dfmp2.DFMP2.split_mo_energy + split_mo_occ = mp.dfmp2.DFMP2.split_mo_occ + make_las = make_las + + @property + def nocc(self): + return self.get_nocc() + + @property + def nmo(self): + return self.get_nmo() + + ''' The following methods need to be implemented for derived LNO classes. + ''' + def impurity_solve(self, mf, mo_coeff, uocc_loc, eris=None, frozen=None, log=None): + log = logger.new_logger(self) + log.error('You are calling the base LNO class! Please call the method-specific ' + 'LNO classes.') + raise NotImplementedError + + def _post_proc(self, frag_res, frag_wghtlist): + pass + + def _finalize(self): + pass + + +class _LNODFINCOREERIS: + def __init__(self, with_df, orbocc, orbvir, max_memory, verbose=None, stdout=None): + self.with_df = with_df + self.orbocc = orbocc + self.orbvir = orbvir + + self.max_memory = max_memory + self.verbose = verbose + self.stdout = stdout + + self.dtype = self.orbocc.dtype + self.dsize = self.orbocc.itemsize + + self.ovL = None + + @property + def nocc(self): + return self.orbocc.shape[1] + @property + def nvir(self): + return self.orbvir.shape[1] + @property + def naux(self): + return self.with_df.get_naoaux() + + def build(self): + log = logger.new_logger(self) + self.ovL = _init_mp_df_eris(self.with_df, self.orbocc, self.orbvir, + self.max_memory, ovL=self.ovL, log=log) + + def get_occ_blk(self, i0,i1): + return np.asarray(self.ovL[i0:i1], order='C') + + def get_vir_blk(self, a0,a1): + return np.asarray(self.ovL[:,a0:a1], order='C') + + def xform_occ(self, u): + # return lib.einsum('iax,iI->Iax', self.ovL, u.conj()) + nocc, nvir, naux = self.nocc, self.nvir, self.naux + nOcc = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*naux) - nOcc)))) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + OvL = np.empty((nOcc,nvir,naux), dtype=self.dtype) + for iblk,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + if iblk == 0: + OvL[:] = lib.einsum('iax,iI->Iax', self.get_occ_blk(i0,i1), u[i0:i1].conj()) + else: + OvL[:] += lib.einsum('iax,iI->Iax', self.get_occ_blk(i0,i1), u[i0:i1].conj()) + return OvL + + def xform_vir(self, u): + # return lib.einsum('iax,aA->iAx', self.ovL, u) + nocc, nvir, naux = self.nocc, self.nvir, self.naux + nVir = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*naux) - nocc*nVir/float(nvir))))) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + oVL = np.empty((nocc,nVir,naux), dtype=self.dtype) + for i0,i1 in lib.prange(0,nocc,occblksize): + oVL[i0:i1] = lib.einsum('iax,aA->iAx', self.get_occ_blk(i0,i1), u) + return oVL + + +class _LNODFOUTCOREERIS(_LNODFINCOREERIS): + def __init__(self, with_df, orbocc, orbvir, max_memory, ovL=None, ovL_to_save=None, + verbose=None, stdout=None): + _LNODFINCOREERIS.__init__(self, with_df, orbocc, orbvir, max_memory, verbose, stdout) + + self._ovL = ovL + self._ovL_to_save = ovL_to_save + + def build(self): + log = logger.new_logger(self) + nocc,nvir,naux = self.nocc,self.nvir,self.naux + ovL_shape = (nocc,nvir,naux) + if self._ovL is None: + if isinstance(self._ovL_to_save, str): + self.feri = h5py.File(self._ovL_to_save, 'w') + else: + self.feri = lib.H5TmpFile() + log.info('ovL is saved to %s', self.feri.filename) + self.ovL = self.feri.create_dataset('ovL', ovL_shape, dtype=self.dtype, + chunks=(1,*ovL_shape[1:])) + _init_mp_df_eris(self.with_df, self.orbocc, self.orbvir, self.max_memory, + ovL=self.ovL, log=log) + elif isinstance(self._ovL, str): + self.feri = h5py.File(self._ovL, 'r') + log.info('ovL is read from %s', self.feri.filename) + assert( 'ovL' in self.feri ) + assert( self.feri['ovL'].shape == ovL_shape ) + self.ovL = self.feri['ovL'] + else: + raise RuntimeError + +def _init_mp_df_eris(with_df, occ_coeff, vir_coeff, max_memory, ovL=None, log=None): + from pyscf.ao2mo import _ao2mo + + if log is None: log = logger.Logger(sys.stdout, 3) + + nao,nocc = occ_coeff.shape + nvir = vir_coeff.shape[1] + nmo = nocc + nvir + nao_pair = nao**2 + naux = with_df.get_naoaux() + + dtype = occ_coeff.dtype + dsize = occ_coeff.itemsize + + mo = np.asarray(np.hstack((occ_coeff,vir_coeff)), order='F') + ijslice = (0, nocc, nocc, nmo) + + if ovL is None: + ovL = np.empty((nocc,nvir,naux), dtype=dtype) + + mem_avail = max_memory - lib.current_memory()[0] + + if dtype == np.float64: + def loop_df(blksize): + for Lpq in with_df.loop(blksize=blksize): + yield Lpq + Lpq = None + def ao2mo_df(Lpq, mo, ijslice, out): + return _ao2mo.nr_e2(Lpq, mo, ijslice, aosym='s2', out=out) + else: + def loop_df(blksize): + kpti_kptj = [with_df.kpts[0]]*2 + for LpqR, LpqI, sign in with_df.sr_loop(blksize=aux_blksize, + kpti_kptj=kpti_kptj): + Lpq = LpqR + LpqI*1j + LpqR = LpqI = None + if Lpq.shape[1] != nao_pair: + Lpq = lib.unpack_tril(Lpq).astype(dtype) + yield Lpq + Lpq = None + def ao2mo_df(Lpq, mo, ijslice, out): + return _ao2mo.r_e2(Lpq, mo, ijslice, [], None, aosym='s1', out=out) + + if isinstance(ovL, np.ndarray): + # incore: batching aux (OV + Nao_pair) * [X] = M + mem_auxblk = (nao_pair+nocc*nvir) * dsize/1e6 + aux_blksize = min(naux, max(1, int(np.floor(mem_avail*0.5 / mem_auxblk)))) + if DEBUG_BLKSIZE: aux_blksize = max(1,naux//2) + log.debug('aux blksize for incore ao2mo: %d/%d', aux_blksize, naux) + buf = np.empty(aux_blksize*nocc*nvir, dtype=dtype) + ijslice = (0,nocc,nocc,nmo) + + p1 = 0 + for Lpq in loop_df(aux_blksize): + p0, p1 = p1, p1+Lpq.shape[0] + out = ao2mo_df(Lpq, mo, ijslice, buf) + ovL[:,:,p0:p1] = out.reshape(-1,nocc,nvir).transpose(1,2,0) + Lpq = out = None + buf = None + else: + # outcore: batching occ [O]XV and aux ([O]V + Nao_pair)*[X] + mem_occblk = naux*nvir * dsize/1e6 + occ_blksize = min(nocc, max(1, int(np.floor(mem_avail*0.6 / mem_occblk)))) + if DEBUG_BLKSIZE: occ_blksize = max(1,nocc//2) + mem_auxblk = (occ_blksize*nvir+nao_pair) * dsize/1e6 + aux_blksize = min(naux, max(1, int(np.floor(mem_avail*0.3 / mem_auxblk)))) + if DEBUG_BLKSIZE: aux_blksize = max(1,naux//2) + log.debug('occ blksize for outcore ao2mo: %d/%d', occ_blksize, nocc) + log.debug('aux blksize for outcore ao2mo: %d/%d', aux_blksize, naux) + buf = np.empty(naux*occ_blksize*nvir, dtype=dtype) + buf2 = np.empty(aux_blksize*occ_blksize*nvir, dtype=dtype) + + for i0,i1 in lib.prange(0,nocc,occ_blksize): + nocci = i1-i0 + ijslice = (i0,i1,nocc,nmo) + p1 = 0 + OvL = np.ndarray((nocci,nvir,naux), dtype=dtype, buffer=buf) + for Lpq in loop_df(aux_blksize): + p0, p1 = p1, p1+Lpq.shape[0] + out = ao2mo_df(Lpq, mo, ijslice, buf2) + OvL[:,:,p0:p1] = out.reshape(-1,nocci,nvir).transpose(1,2,0) + Lpq = out = None + ovL[i0:i1] = OvL # this avoids slow operations like ovL[i0:i1,:,p0:p1] = ... + OvL = None + buf = buf2 = None + + return ovL diff --git a/pyscf/lno/lnoccsd.py b/pyscf/lno/lnoccsd.py new file mode 100644 index 000000000..c61f29062 --- /dev/null +++ b/pyscf/lno/lnoccsd.py @@ -0,0 +1,923 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + + +''' LNO-RCCSD and LNO-CCSD(T) (for both molecule and pbc w/ Gamma-point BZ sampling) + + - Original publication by Kállay and co-workers: + Rolik and Kállay, J. Chem. Phys. 135, 104111 (2011) + + - Publication for this implementation by Ye and Berkelbach: + Ye and Berkelbach, J. Chem. Theory Comput. 2024, 20, 20, 8948–8959 +''' + + +import sys +import numpy as np +from functools import reduce + +from pyscf.lib import logger +from pyscf import lib + +from pyscf.lno import LNO + +_fdot = np.dot +fdot = lambda *args: reduce(_fdot, args) +einsum = lib.einsum + + +r''' TODO's +''' + +''' Beginning of modification of PySCF's (DF)CCSD class + + The following functions are modified from pyscf.cc module + + In PySCF, 1e integrals (s1e, h1e, vhf) are calculated whenever a CCSD object is + initialized. In LNOCCSD, this means that the same set of 1e integrals are evaluated + for every fragment. For PBC calculations, evaluating 1e integrals (especially h1e + and vhf) can be very slow in PySCF's current implementation. + + The following modification forces the CCSD class to take precomputed 1e integrals + and thus can lead to significant amount of time saving in PBC LNOCCSD calculations. +''' +from pyscf.cc import ccsd, dfccsd, rccsd +def CCSD(mf, frozen=None, mo_coeff=None, mo_occ=None): + import numpy + from pyscf import lib + from pyscf.soscf import newton_ah + from pyscf import scf + + log = logger.new_logger(mf) + + if isinstance(mf, newton_ah._CIAH_SOSCF) or not isinstance(mf, scf.hf.RHF): + mf = scf.addons.convert_to_rhf(mf) + + if getattr(mf, 'with_df', None): + ''' auto-choose if using DFCCSD (storing Lvv) or CCSD (storing vvvv) by memory + ''' + naux = mf.with_df.get_naoaux() + if mo_occ is None: mo_occ = mf.mo_occ + maskocc = mo_occ > 1e-10 + frozen, maskact = get_maskact(frozen, len(mo_occ)) + nvir = np.count_nonzero(~maskocc & maskact) + nvir_pair = nvir*(nvir+1)//2 + mem_avail = mf.max_memory - lib.current_memory()[0] + mem_need = nvir_pair**2*8/1024**2. + log.debug1('naux= %d nvir_pair= %d mem_avail= %.1f mem_vvvv= %.1f', + naux, nvir_pair, mem_avail, mem_need) + + if np.iscomplexobj(mf.mo_coeff): + if naux > nvir_pair or mem_need < mem_avail * 0.7: + log.debug1('Using complex CCSD') + return MODIFIED_CCSD_complex(mf, frozen, mo_coeff, mo_occ) + else: + log.debug1('Using complex DFCCSD') + raise NotImplementedError('LNO-DFCCSD not implemented for complex orbitals.') + return MODIFIED_DFCCSD_complex(mf, frozen, mo_coeff, mo_occ) + else: + if naux > nvir_pair or mem_need < mem_avail * 0.7: + log.debug1('Using CCSD') + return MODIFIED_CCSD(mf, frozen, mo_coeff, mo_occ) + else: + log.debug1('Using DFCCSD') + return MODIFIED_DFCCSD(mf, frozen, mo_coeff, mo_occ) + else: + raise NotImplementedError('LNO-CCSD not implemented for 4c eris. Use DF-SCF instead.') + +def is_unitary_related(c1, c2, s=None, thresh=1e-8): + if c1.shape != c2.shape: + return False + if s is None: + u = fdot(c1.T.conj(), c2) + else: + u = np.linalg.multi_dot((c1.T.conj(), s, c2)) + return abs(fdot(u.T.conj(), u) - np.eye(u.shape[1])).max() < thresh + +def get_e_hf(self, mo_coeff=None): + ''' Fragment CC does not need HF energy. We here just return e_tot from SCF to avoid + any recomputation of integrals. + ''' + return self._scf.e_tot + +class MODIFIED_CCSD(ccsd.CCSD): + get_e_hf = get_e_hf + + def ao2mo(self, mo_coeff=None): + # Pseudo code how eris are implemented: + # nocc = self.nocc + # nmo = self.nmo + # nvir = nmo - nocc + # eris = _ChemistsERIs() + # eri = ao2mo.incore.full(self._scf._eri, mo_coeff) + # eri = ao2mo.restore(1, eri, nmo) + # eris.oooo = eri[:nocc,:nocc,:nocc,:nocc].copy() + # eris.ovoo = eri[:nocc,nocc:,:nocc,:nocc].copy() + # eris.ovvo = eri[nocc:,:nocc,nocc:,:nocc].copy() + # eris.ovov = eri[nocc:,:nocc,:nocc,nocc:].copy() + # eris.oovv = eri[:nocc,:nocc,nocc:,nocc:].copy() + # ovvv = eri[:nocc,nocc:,nocc:,nocc:].copy() + # eris.ovvv = lib.pack_tril(ovvv.reshape(-1,nvir,nvir)) + # eris.vvvv = ao2mo.restore(4, eri[nocc:,nocc:,nocc:,nocc:], nvir) + # eris.fock = np.diag(self._scf.mo_energy) + # return eris + + nmo = self.nmo + nao = self.mo_coeff.shape[0] + nmo_pair = nmo * (nmo+1) // 2 + nao_pair = nao * (nao+1) // 2 + mem_incore = (max(nao_pair**2, nmo**4) + nmo_pair**2) * 8/1e6 + mem_now = lib.current_memory()[0] + if (self._scf._eri is not None and + (mem_incore+mem_now < self.max_memory or self.incore_complete)): + return _make_eris_incore(self, mo_coeff) + + elif getattr(self._scf, 'with_df', None): + return _make_df_eris_outcore(self, mo_coeff) + + else: + raise NotImplementedError # LNO without DF should never happen + +class _ChemistsERIs(ccsd._ChemistsERIs): + def _common_init_(self, mycc, mo_coeff=None): + from pyscf.mp.mp2 import _mo_without_core + + mymf = mycc._scf + + if mo_coeff is None: + mo_coeff = mycc.mo_coeff + self.mo_coeff = mo_coeff = _mo_without_core(mycc, mo_coeff) + +# Note: Recomputed fock matrix and HF energy since SCF may not be fully converged. + ''' This block is modified to take precomputed 1e integrals + ''' + s1e = getattr(mycc, '_s1e', None) + h1e = getattr(mycc, '_h1e', None) + vhf = getattr(mycc, '_vhf', None) + dm = mymf.make_rdm1(mycc.mo_coeff, mycc.mo_occ) + if vhf is None: vhf = self.get_vhf(mymf, dm, h1e=h1e, s1e=s1e) + fockao = mymf.get_fock(vhf=vhf, dm=dm, h1e=h1e, s1e=s1e) + self.fock = reduce(np.dot, (mo_coeff.conj().T, fockao, mo_coeff)) + self.e_hf = mymf.energy_tot(dm=dm, vhf=vhf, h1e=h1e) + nocc = self.nocc = mycc.nocc + self.mol = mycc.mol + + # Note self.mo_energy can be different to fock.diagonal(). + # self.mo_energy is used in the initial guess function (to generate + # MP2 amplitudes) and CCSD update_amps preconditioner. + # fock.diagonal() should only be used to compute the expectation value + # of Slater determinants. + self.mo_energy = self.fock.diagonal().real + # vhf is assumed to be computed with exxdiv=None and mo_energy is not + # exxdiv-corrected. We add the correction back for MP2 energy if + # mymf.exxdiv is 'ewald'. + # FIXME: Should we correct it for other exxdiv options (e.g., 'vcut_sph')? + if hasattr(mymf, 'exxdiv') and mymf.exxdiv == 'ewald': # PBC HF object + from pyscf.pbc.cc.ccsd import _adjust_occ + from pyscf.pbc import tools + madelung = tools.madelung(mymf.cell, mymf.kpt) + self.mo_energy = _adjust_occ(self.mo_energy, self.nocc, -madelung) + mo_e = self.mo_energy + try: + gap = abs(mo_e[:nocc,None] - mo_e[None,nocc:]).min() + if gap < 1e-5: + logger.warn(mycc, 'HOMO-LUMO gap %s too small for CCSD.\n' + 'CCSD may be difficult to converge. Increasing ' + 'CCSD Attribute level_shift may improve ' + 'convergence.', gap) + except ValueError: # gap.size == 0 + pass + return self + def get_vhf(self, mymf, dm, h1e=None, s1e=None): + ''' Build vhf from input dm. + + NOTE 1: + If the input dm is the same as the SCF dm, vhf is built directly from the SCF + MO and MO energy; otherwise, scf.get_vhf is called. + NOTE 2: + For PBC, exxdiv = None will be used for building vhf. + ''' + dm0 = mymf.make_rdm1() + errdm = abs(dm0-dm).max() + if errdm < 1e-6: + if h1e is None: h1e = mymf.get_hcore() + vhf = fock_from_mo(mymf, s1e=s1e, force_exxdiv_none=True) - h1e + else: + if hasattr(mymf, 'exxdiv'): # PBC CC requires exxdiv=None + with lib.temporary_env(mymf, exxdiv=None): + vhf = mymf.get_veff(mymf.mol, dm) + else: + vhf = mymf.get_veff(mymf.mol, dm) + return vhf + +def _make_eris_incore(mycc, mo_coeff=None): + from pyscf import ao2mo + + cput0 = (logger.process_clock(), logger.perf_counter()) + eris = _ChemistsERIs() + eris._common_init_(mycc, mo_coeff) + nocc = eris.nocc + nmo = eris.fock.shape[0] + nvir = nmo - nocc + + eri1 = ao2mo.incore.full(mycc._scf._eri, eris.mo_coeff) + #:eri1 = ao2mo.restore(1, eri1, nmo) + #:eris.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy() + #:eris.ovoo = eri1[:nocc,nocc:,:nocc,:nocc].copy() + #:eris.ovvo = eri1[:nocc,nocc:,nocc:,:nocc].copy() + #:eris.ovov = eri1[:nocc,nocc:,:nocc,nocc:].copy() + #:eris.oovv = eri1[:nocc,:nocc,nocc:,nocc:].copy() + #:ovvv = eri1[:nocc,nocc:,nocc:,nocc:].copy() + #:eris.ovvv = lib.pack_tril(ovvv.reshape(-1,nvir,nvir)).reshape(nocc,nvir,-1) + #:eris.vvvv = ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir) + + if eri1.ndim == 4: + eri1 = ao2mo.restore(4, eri1, nmo) + + nvir_pair = nvir * (nvir+1) // 2 + eris.oooo = np.empty((nocc,nocc,nocc,nocc)) + eris.ovoo = np.empty((nocc,nvir,nocc,nocc)) + eris.ovvo = np.empty((nocc,nvir,nvir,nocc)) + eris.ovov = np.empty((nocc,nvir,nocc,nvir)) + eris.ovvv = np.empty((nocc,nvir,nvir_pair)) + eris.vvvv = np.empty((nvir_pair,nvir_pair)) + + ij = 0 + outbuf = np.empty((nmo,nmo,nmo)) + oovv = np.empty((nocc,nocc,nvir,nvir)) + for i in range(nocc): + buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) + for j in range(i+1): + eris.oooo[i,j] = eris.oooo[j,i] = buf[j,:nocc,:nocc] + oovv[i,j] = oovv[j,i] = buf[j,nocc:,nocc:] + ij += i + 1 + eris.oovv = oovv + oovv = None + + ij1 = 0 + for i in range(nocc,nmo): + buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) + eris.ovoo[:,i-nocc] = buf[:nocc,:nocc,:nocc] + eris.ovvo[:,i-nocc] = buf[:nocc,nocc:,:nocc] + eris.ovov[:,i-nocc] = buf[:nocc,:nocc,nocc:] + eris.ovvv[:,i-nocc] = lib.pack_tril(buf[:nocc,nocc:,nocc:]) + dij = i - nocc + 1 + lib.pack_tril(buf[nocc:i+1,nocc:,nocc:], + out=eris.vvvv[ij1:ij1+dij]) + ij += i + 1 + ij1 += dij + logger.timer(mycc, 'CCSD integral transformation', *cput0) + return eris +def _make_df_eris_outcore(mycc, mo_coeff=None): + from pyscf.ao2mo import _ao2mo + + cput0 = (logger.process_clock(), logger.perf_counter()) + log = logger.Logger(mycc.stdout, mycc.verbose) + eris = _ChemistsERIs() + eris._common_init_(mycc, mo_coeff) + + mo_coeff = np.asarray(eris.mo_coeff, order='F') + nocc = eris.nocc + nao, nmo = mo_coeff.shape + nvir = nmo - nocc + nvir_pair = nvir*(nvir+1)//2 + + naux = mycc._scf.with_df.get_naoaux() + Loo = np.empty((naux,nocc,nocc)) + Lov = np.empty((naux,nocc,nvir)) + Lvo = np.empty((naux,nvir,nocc)) + Lvv = np.empty((naux,nvir_pair)) + ijslice = (0, nmo, 0, nmo) + Lpq = None + p1 = 0 + for eri1 in mycc._scf.with_df.loop(): + Lpq = _ao2mo.nr_e2(eri1, mo_coeff, ijslice, aosym='s2', out=Lpq).reshape(-1,nmo,nmo) + p0, p1 = p1, p1 + Lpq.shape[0] + Loo[p0:p1] = Lpq[:,:nocc,:nocc] + Lov[p0:p1] = Lpq[:,:nocc,nocc:] + Lvo[p0:p1] = Lpq[:,nocc:,:nocc] + Lvv[p0:p1] = lib.pack_tril(Lpq[:,nocc:,nocc:].reshape(-1,nvir,nvir)) + Loo = Loo.reshape(naux,nocc*nocc) + Lov = Lov.reshape(naux,nocc*nvir) + Lvo = Lvo.reshape(naux,nocc*nvir) + + eris.feri1 = lib.H5TmpFile() + eris.oooo = eris.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') + eris.oovv = eris.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', chunks=(nocc,nocc,1,nvir)) + eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc)) + eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc)) + eris.ovov = eris.feri1.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8', chunks=(nocc,1,nocc,nvir)) + eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc,nvir,nvir_pair), 'f8') + eris.vvvv = eris.feri1.create_dataset('vvvv', (nvir_pair,nvir_pair), 'f8') + eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc,nocc,nocc,nocc) + eris.ovoo[:] = lib.ddot(Lov.T, Loo).reshape(nocc,nvir,nocc,nocc) + eris.oovv[:] = lib.unpack_tril(lib.ddot(Loo.T, Lvv)).reshape(nocc,nocc,nvir,nvir) + eris.ovvo[:] = lib.ddot(Lov.T, Lvo).reshape(nocc,nvir,nvir,nocc) + eris.ovov[:] = lib.ddot(Lov.T, Lov).reshape(nocc,nvir,nocc,nvir) + eris.ovvv[:] = lib.ddot(Lov.T, Lvv).reshape(nocc,nvir,nvir_pair) + eris.vvvv[:] = lib.ddot(Lvv.T, Lvv) + log.timer('CCSD integral transformation', *cput0) + return eris + +class _ChemistsERIs_complex(_ChemistsERIs): + def get_ovvv(self, *slices): + '''To access a subblock of ovvv tensor''' + if slices: + return self.ovvv[slices] + else: + return self.ovvv + +class MODIFIED_CCSD_complex(rccsd.RCCSD): + get_e_hf = get_e_hf + + def ao2mo(self, mo_coeff=None): + from pyscf.pbc import tools + from pyscf.pbc import mp + from pyscf.pbc.cc.ccsd import _adjust_occ + ao2mofn = mp.mp2._gen_ao2mofn(self._scf) + # _scf.exxdiv affects eris.fock. HF exchange correction should be + # excluded from the Fock matrix. + with lib.temporary_env(self._scf, exxdiv=None): + eris = _make_eris_incore_complex(self, mo_coeff, ao2mofn=ao2mofn) + + # eris.mo_energy so far is just the diagonal part of the Fock matrix + # without the exxdiv treatment. Here to add the exchange correction to + # get better orbital energies. It is important for the low-dimension + # systems since their occupied and the virtual orbital energies may + # overlap which may lead to numerical issue in the CCSD iterations. + #if mo_coeff is self._scf.mo_coeff: + # eris.mo_energy = self._scf.mo_energy[self.get_frozen_mask()] + #else: + + # Add the HFX correction of Ewald probe charge method. + # FIXME: Whether to add this correction for other exxdiv treatments? + # Without the correction, MP2 energy may be largely off the + # correct value. + madelung = tools.madelung(self._scf.cell, self._scf.kpt) + eris.mo_energy = _adjust_occ(eris.mo_energy, eris.nocc, -madelung) + return eris + +def _make_eris_incore_complex(mycc, mo_coeff=None, ao2mofn=None): + cput0 = (logger.process_clock(), logger.perf_counter()) + eris = _ChemistsERIs_complex() + eris._common_init_(mycc, mo_coeff) + nocc = eris.nocc + nmo = eris.fock.shape[0] + + if callable(ao2mofn): + eri1 = ao2mofn(eris.mo_coeff).reshape([nmo]*4) + else: + from pyscf import ao2mo + eri1 = ao2mo.incore.full(mycc._scf._eri, eris.mo_coeff) + eri1 = ao2mo.restore(1, eri1, nmo) + eris.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy() + eris.ovoo = eri1[:nocc,nocc:,:nocc,:nocc].copy() + eris.ovov = eri1[:nocc,nocc:,:nocc,nocc:].copy() + eris.oovv = eri1[:nocc,:nocc,nocc:,nocc:].copy() + eris.ovvo = eri1[:nocc,nocc:,nocc:,:nocc].copy() + eris.ovvv = eri1[:nocc,nocc:,nocc:,nocc:].copy() + eris.vvvv = eri1[nocc:,nocc:,nocc:,nocc:].copy() + logger.timer(mycc, 'CCSD integral transformation', *cput0) + return eris + + +class MODIFIED_DFCCSD(dfccsd.RCCSD): + get_e_hf = get_e_hf + + def ao2mo(self, mo_coeff=None): + return _make_df_eris(self, mo_coeff) + +class _DFChemistsERIs(_ChemistsERIs): + def _contract_vvvv_t2(self, mycc, t2, direct=False, out=None, verbose=None): + assert(not direct) + return dfccsd._contract_vvvv_t2(mycc, self.mol, self.vvL, t2, out, verbose) +def _make_df_eris(cc, mo_coeff=None): + from pyscf.ao2mo import _ao2mo + + eris = _DFChemistsERIs() + eris._common_init_(cc, mo_coeff) + nocc = eris.nocc + nmo = eris.fock.shape[0] + nvir = nmo - nocc + nvir_pair = nvir*(nvir+1)//2 + with_df = cc.with_df + naux = eris.naux = with_df.get_naoaux() + + eris.feri = lib.H5TmpFile() + eris.oooo = eris.feri.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') + eris.ovoo = eris.feri.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc)) + eris.ovov = eris.feri.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8', chunks=(nocc,1,nocc,nvir)) + eris.ovvo = eris.feri.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc)) + eris.oovv = eris.feri.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', chunks=(nocc,nocc,1,nvir)) + # nrow ~ 4e9/8/blockdim to ensure hdf5 chunk < 4GB + chunks = (min(nvir_pair,int(4e8/with_df.blockdim)), min(naux,with_df.blockdim)) + eris.vvL = eris.feri.create_dataset('vvL', (nvir_pair,naux), 'f8', chunks=chunks) + + Loo = np.empty((naux,nocc,nocc)) + Lov = np.empty((naux,nocc,nvir)) + mo = np.asarray(eris.mo_coeff, order='F') + ijslice = (0, nmo, 0, nmo) + p1 = 0 + Lpq = None + for k, eri1 in enumerate(with_df.loop()): + Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', mosym='s1', out=Lpq) + p0, p1 = p1, p1 + Lpq.shape[0] + Lpq = Lpq.reshape(p1-p0,nmo,nmo) + Loo[p0:p1] = Lpq[:,:nocc,:nocc] + Lov[p0:p1] = Lpq[:,:nocc,nocc:] + Lvv = lib.pack_tril(Lpq[:,nocc:,nocc:]) + eris.vvL[:,p0:p1] = Lvv.T + Lpq = Lvv = None + Loo = Loo.reshape(naux,nocc**2) + #Lvo = Lov.transpose(0,2,1).reshape(naux,nvir*nocc) + Lov = Lov.reshape(naux,nocc*nvir) + eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc,nocc,nocc,nocc) + eris.ovoo[:] = lib.ddot(Lov.T, Loo).reshape(nocc,nvir,nocc,nocc) + ovov = lib.ddot(Lov.T, Lov).reshape(nocc,nvir,nocc,nvir) + eris.ovov[:] = ovov + eris.ovvo[:] = ovov.transpose(0,1,3,2) + ovov = None + + mem_now = lib.current_memory()[0] + max_memory = max(0, cc.max_memory - mem_now) + blksize = max(ccsd.BLKMIN, int((max_memory*.9e6/8-nocc**2*nvir_pair)/(nocc**2+naux))) + oovv_tril = np.empty((nocc*nocc,nvir_pair)) + for p0, p1 in lib.prange(0, nvir_pair, blksize): + oovv_tril[:,p0:p1] = lib.ddot(Loo.T, _cp(eris.vvL[p0:p1]).T) + eris.oovv[:] = lib.unpack_tril(oovv_tril).reshape(nocc,nocc,nvir,nvir) + oovv_tril = Loo = None + + Lov = Lov.reshape(naux,nocc,nvir) + vblk = max(nocc, int((max_memory*.15e6/8)/(nocc*nvir_pair))) + vvblk = int(min(nvir_pair, 4e8/nocc, max(4, (max_memory*.8e6/8)/(vblk*nocc+naux)))) + eris.ovvv = eris.feri.create_dataset('ovvv', (nocc,nvir,nvir_pair), 'f8', + chunks=(nocc,1,vvblk)) + for q0, q1 in lib.prange(0, nvir_pair, vvblk): + vvL = _cp(eris.vvL[q0:q1]) + for p0, p1 in lib.prange(0, nvir, vblk): + tmpLov = _cp(Lov[:,:,p0:p1]).reshape(naux,-1) + eris.ovvv[:,p0:p1,q0:q1] = lib.ddot(tmpLov.T, vvL.T).reshape(nocc,p1-p0,q1-q0) + vvL = None + return eris +def _cp(a): + return np.array(a, copy=False, order='C') + +class MODIFIED_DFCCSD_complex: + pass +''' End of modification of PySCF's CCSD class +''' + +''' impurity solver for LNO-based CCSD/CCSD_T +''' +def impurity_solve(mcc, mo_coeff, uocc_loc, mo_occ, maskact, eris, + ccsd_t=False, log=None, verbose_imp=None, + max_las_size_ccsd=1000, max_las_size_ccsd_t=1000): + r''' Solve impurity problem and calculate local correlation energy. + + Args: + mo_coeff (np.ndarray): + MOs where the impurity problem is solved. + uocc_loc (np.ndarray): + where i is semi-canonical occ LNOs and I is LO. + ccsd_t (bool): + If True, CCSD(T) energy is calculated and returned as the third + item (0 is returned otherwise). + frozen (int or list; optional): + Same syntax as `frozen` in MP2, CCSD, etc. + + Return: + e_loc_corr_pt2, e_loc_corr_ccsd, e_loc_corr_ccsd_t: + Local correlation energy at MP2, CCSD, and CCSD(T) level. Note that + the CCSD(T) energy is 0 unless 'ccsd_t' is set to True. + ''' + log = logger.new_logger(mcc if log is None else log) + cput1 = (logger.process_clock(), logger.perf_counter()) + + maskocc = mo_occ>1e-10 + nmo = mo_occ.size + + orbfrzocc = mo_coeff[:,~maskact & maskocc] + orbactocc = mo_coeff[:, maskact & maskocc] + orbactvir = mo_coeff[:, maskact & ~maskocc] + orbfrzvir = mo_coeff[:,~maskact & ~maskocc] + nfrzocc, nactocc, nactvir, nfrzvir = [orb.shape[1] + for orb in [orbfrzocc,orbactocc, + orbactvir,orbfrzvir]] + nlo = uocc_loc.shape[1] + nactmo = nactocc + nactvir + log.debug(' impsol: %d LOs %d/%d MOs %d occ %d vir', + nlo, nactmo, nmo, nactocc, nactvir) + + if nactocc == 0 or nactvir == 0: + elcorr_pt2 = elcorr_cc = lib.tag_array(0., spin_comp=np.array((0., 0.))) + elcorr_cc_t = 0. + else: + + if nactmo > max_las_size_ccsd: + log.warn('Number of active space orbitals (%d) exceed ' + '`_max_las_size_ccsd` (%d). Impurity CCSD calculations ' + 'will NOT be performed.', nactmo, max_las_size_ccsd) + elcorr_pt2 = elcorr_cc = lib.tag_array(0., spin_comp=np.array((0.,0.))) + elcorr_cc_t = 0. + else: + # solve impurity problem + imp_eris = mcc.ao2mo() + if isinstance(imp_eris.ovov, np.ndarray): + ovov = imp_eris.ovov + else: + ovov = imp_eris.ovov[()] + oovv = ovov.reshape(nactocc,nactvir,nactocc,nactvir).transpose(0,2,1,3) + ovov = None + cput1 = log.timer_debug1('imp sol - eri ', *cput1) + + # MP2 fragment energy + t1, t2 = mcc.init_amps(eris=imp_eris)[1:] + cput1 = log.timer_debug1('imp sol - mp2 amp', *cput1) + elcorr_pt2 = get_fragment_energy(oovv, t2, uocc_loc).real + cput1 = log.timer_debug1('imp sol - mp2 ene', *cput1) + + # CCSD fragment energy + t1, t2 = mcc.kernel(eris=imp_eris, t1=t1, t2=t2)[1:] + if not mcc.converged: + log.warn('Impurity CCSD did not converge, please be careful of the results.') + + cput1 = log.timer_debug1('imp sol - cc amp', *cput1) + t2 += einsum('ia,jb->ijab',t1,t1) + elcorr_cc = get_fragment_energy(oovv, t2, uocc_loc) + cput1 = log.timer_debug1('imp sol - cc ene', *cput1) + + # CCSD(T) fragment energy + if ccsd_t: + if nactmo > max_las_size_ccsd_t: + log.warn('Number of active space orbitals (%d) exceed ' + '`_max_las_size_ccsd_t` (%d). Impurity CCSD(T) calculations ' + 'will NOT be performed.', nactmo, max_las_size_ccsd_t) + elcorr_cc_t = 0. + else: + from pyscf.lno.lnoccsd_t import kernel as CCSD_T + t2 -= einsum('ia,jb->ijab',t1,t1) # restore t2 + elcorr_cc_t = CCSD_T(mcc, imp_eris, uocc_loc, t1=t1, t2=t2, verbose=verbose_imp) + cput1 = log.timer_debug1('imp sol - cc (T)', *cput1) + else: + elcorr_cc_t = 0. + + t1 = t2 = oovv = imp_eris = mcc = None + + frag_msg = ' '.join([f'E_corr(MP2) = {elcorr_pt2:.15g}', + f'E_corr(CCSD) = {elcorr_cc:.15g}', + f'E_corr(CCSD(T)) = {elcorr_cc_t:.15g}']) + + return (elcorr_pt2, elcorr_cc, elcorr_cc_t), frag_msg + +def get_maskact(frozen, nmo): + # Convert frozen to 0 bc PySCF solvers do not support frozen=None or empty list + if frozen is None: + frozen = 0 + elif isinstance(frozen, (list,tuple,np.ndarray)) and len(frozen) == 0: + frozen = 0 + + if isinstance(frozen, (int,np.integer)): + maskact = np.hstack([np.zeros(frozen,dtype=bool), + np.ones(nmo-frozen,dtype=bool)]) + elif isinstance(frozen, (list,tuple,np.ndarray)): + maskact = np.array([i not in frozen for i in range(nmo)]) + else: + raise RuntimeError + + return frozen, maskact + +def get_fragment_energy(oovv, t2, uocc_loc): + m = fdot(uocc_loc, uocc_loc.T.conj()) + # return einsum('ijab,kjab,ik->',t2,2*oovv-oovv.transpose(0,1,3,2),m) + ed = einsum('ijab,kjab,ik->', t2, oovv, m) * 2 + ex = -einsum('ijab,kjba,ik->', t2, oovv, m) + ed = ed.real + ex = ex.real + ess = ed*0.5 + ex + eos = ed*0.5 + return lib.tag_array(ess+eos, spin_comp=np.array((ess, eos))) + + + +class LNOCCSD(LNO): + + ''' Use the following _max_las_size arguments to avoid calculations that have no + hope of finishing. This may ease scanning thresholds. + ''' + _max_las_size_ccsd = 1000 + _max_las_size_ccsd_t = 1000 + + def __init__(self, mf, lo_coeff, frag_lolist, lno_type=None, lno_thresh=None, frozen=None): + + super().__init__(mf, lo_coeff, frag_lolist, lno_type=lno_type, + lno_thresh=lno_thresh, frozen=frozen) + + self.efrag_cc = None + self.efrag_pt2 = None + self.efrag_cc_t = None + self.efrag_cc_spin_comp = None + self.efrag_pt2_spin_comp = None + self.ccsd_t = False + + # args for impurity solver + self.kwargs_imp = None + self.verbose_imp = 2 # ERROR and WARNING + + # args for precompute + self._h1e = None + self._vhf = None + + @property + def h1e(self): + if self._h1e is None: + self._h1e = self._scf.get_hcore() + return self._h1e + + def dump_flags(self, verbose=None): + super().dump_flags(verbose=verbose) + log = logger.new_logger(self, verbose) + log.info('_max_las_size_ccsd = %s', self._max_las_size_ccsd) + log.info('_max_las_size_ccsd_t = %s', self._max_las_size_ccsd_t) + return self + + def _precompute(self): + log = logger.new_logger(self) + + mf = self._scf + s1e = self.s1e + h1e = self.h1e + if self._vhf is None: + log.warn('Input vhf is not found. Building vhf from SCF MO.') + self._vhf = fock_from_mo(mf, s1e=s1e, force_exxdiv_none=True) - h1e + elif hasattr(mf, 'exxdiv'): + log.warn('Input vhf is detected while using PBC HF. Make sure ' + 'that the input vhf was computed with exxdiv=None, or ' + 'the MP2 and CCSD energy can be both wrong when compared ' + 'to k-point MP2 and CCSD results.') + + def impurity_solve(self, mf, mo_coeff, uocc_loc, eris, frozen=None, log=None): + if log is None: log = logger.new_logger(self) + mo_occ = self.mo_occ + frozen, maskact = get_maskact(frozen, mo_occ.size) + mcc = CCSD(mf, mo_coeff=mo_coeff, frozen=frozen).set(verbose=self.verbose_imp) + mcc._s1e = self._s1e + mcc._h1e = self._h1e + mcc._vhf = self._vhf + + if self.kwargs_imp is not None: + mcc = mcc.set(**self.kwargs_imp) + + return impurity_solve(mcc, mo_coeff, uocc_loc, mo_occ, maskact, eris, log=log, + ccsd_t=self.ccsd_t, verbose_imp=self.verbose_imp, + max_las_size_ccsd=self._max_las_size_ccsd, + max_las_size_ccsd_t=self._max_las_size_ccsd_t) + + def _post_proc(self, frag_res, frag_wghtlist): + ''' Post processing results returned by `impurity_solve` collected in `frag_res`. + ''' + # TODO: add spin-component for CCSD(T) + nfrag = len(frag_res) + efrag_pt2 = np.zeros(nfrag) + efrag_cc = np.zeros(nfrag) + efrag_cc_t = np.zeros(nfrag) + efrag_pt2_spin_comp = np.zeros((nfrag,2)) + efrag_cc_spin_comp = np.zeros((nfrag,2)) + + for i in range(nfrag): + ept2, ecc, ecc_t = frag_res[i] + efrag_pt2[i] = float(ept2) + efrag_cc[i] = float(ecc) + efrag_cc_t[i] = float(ecc_t) + efrag_pt2_spin_comp[i] = ept2.spin_comp + efrag_cc_spin_comp[i] = ecc.spin_comp + self.efrag_pt2 = efrag_pt2 * frag_wghtlist + self.efrag_cc = efrag_cc * frag_wghtlist + self.efrag_cc_t = efrag_cc_t * frag_wghtlist + self.efrag_pt2_spin_comp = efrag_pt2_spin_comp * frag_wghtlist[:,None] + self.efrag_cc_spin_comp = efrag_cc_spin_comp * frag_wghtlist[:,None] + + def _finalize(self): + r''' Hook for dumping results and clearing up the object.''' + logger.note(self, 'E(%s) = %.15g E_corr = %.15g', + 'LNOMP2', self.e_tot_pt2, self.e_corr_pt2) + logger.note(self, 'E(%s) = %.15g E_corr = %.15g', + 'LNOCCSD', self.e_tot, self.e_corr) + if self.ccsd_t: + logger.note(self, 'E(%s) = %.15g E_corr = %.15g', + 'LNOCCSD_T', self.e_tot_ccsd_t, self.e_corr_ccsd_t) + logger.note(self, 'Summary by spin components') + logger.note(self, 'LNOMP2 Ess = %.15g Eos = %.15g Escs = %.15g', + self.e_corr_pt2_ss, self.e_corr_pt2_os, self.e_corr_pt2_scs) + logger.note(self, 'LNOCCSD Ess = %.15g Eos = %.15g Escs = %.15g', + self.e_corr_ccsd_ss, self.e_corr_ccsd_os, self.e_corr_ccsd_scs) + return self + + @property + def e_tot_scf(self): + return self._scf.e_tot + + @property + def e_corr(self): + return self.e_corr_ccsd + + @property + def e_tot(self): + return self.e_corr + self.e_tot_scf + + @property + def e_corr_ccsd(self): + e_corr = np.sum(self.efrag_cc) + return e_corr + + @property + def e_corr_ccsd_ss(self): + e_corr = np.sum(self.efrag_cc_spin_comp[:,0]) + return e_corr + + @property + def e_corr_ccsd_os(self): + e_corr = np.sum(self.efrag_cc_spin_comp[:,1]) + return e_corr + + @property + def e_corr_ccsd_scs(self): + e_corr = 0.333*self.e_corr_ccsd_ss + 1.2*self.e_corr_ccsd_os + return e_corr + + @property + def e_corr_pt2(self): + e_corr = np.sum(self.efrag_pt2) + return e_corr + + @property + def e_corr_pt2_ss(self): + e_corr = np.sum(self.efrag_pt2_spin_comp[:,0]) + return e_corr + + @property + def e_corr_pt2_os(self): + e_corr = np.sum(self.efrag_pt2_spin_comp[:,1]) + return e_corr + + @property + def e_corr_pt2_scs(self): + e_corr = 0.333*self.e_corr_pt2_ss + 1.2*self.e_corr_pt2_os + return e_corr + + @property + def e_corr_ccsd_t(self): + e_corr = np.sum(self.efrag_cc_t) + return e_corr + self.e_corr_ccsd + + @property + def e_tot_ccsd(self): + return self.e_corr_ccsd + self.e_tot_scf + + @property + def e_tot_ccsd_t(self): + return self.e_corr_ccsd_t + self.e_tot_ccsd + + @property + def e_tot_pt2(self): + return self.e_corr_pt2 + self.e_tot_scf + + def e_corr_pt2corrected(self, ept2): + return self.e_corr - self.e_corr_pt2 + ept2 + + def e_tot_pt2corrected(self, ept2): + return self.e_tot_scf + self.e_corr_pt2corrected(ept2) + + def e_corr_ccsd_pt2corrected(self, ept2): + return self.e_corr_ccsd - self.e_corr_pt2 + ept2 + + def e_tot_ccsd_pt2corrected(self, ept2): + return self.e_tot_scf + self.e_corr_ccsd_pt2corrected(ept2) + + def e_corr_ccsd_t_pt2corrected(self, ept2): + return self.e_corr_ccsd_t - self.e_corr_pt2 + ept2 + + def e_tot_ccsd_t_pt2corrected(self, ept2): + return self.e_tot_ccsd + self.e_corr_ccsd_t_pt2corrected(ept2) + + +class LNOCCSD_T(LNOCCSD): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.ccsd_t = True + + +def fock_from_mo(mymf, s1e=None, force_exxdiv_none=True): + if s1e is None: s1e = mymf.get_ovlp() + mo0 = np.dot(s1e, mymf.mo_coeff) + moe0 = mymf.mo_energy + nocc0 = np.count_nonzero(mymf.mo_occ) + if force_exxdiv_none: + if hasattr(mymf, 'exxdiv') and mymf.exxdiv == 'ewald': # remove madelung + from pyscf.pbc.cc.ccsd import _adjust_occ + from pyscf.pbc import tools + madelung = tools.madelung(mymf.cell, mymf.kpt) + moe0 = _adjust_occ(moe0, nocc0, madelung) + fock = np.dot(mo0*moe0, mo0.T.conj()) + return fock + + +if __name__ == '__main__': + from pyscf import gto, scf, mp, cc, lo + from pyscf.cc.ccsd_t import kernel as CCSD_T + from pyscf.data.elements import chemcore + + log = logger.Logger(sys.stdout, 6) + + # S22-2: water dimer + atom = ''' + O -1.485163346097 -0.114724564047 0.000000000000 + H -1.868415346097 0.762298435953 0.000000000000 + H -0.533833346097 0.040507435953 0.000000000000 + O 1.416468653903 0.111264435953 0.000000000000 + H 1.746241653903 -0.373945564047 -0.758561000000 + H 1.746241653903 -0.373945564047 0.758561000000 + ''' + basis = 'cc-pvdz' + + mol = gto.M(atom=atom, basis=basis) + mol.verbose = 4 + frozen = chemcore(mol) + + mf = scf.RHF(mol).density_fit() + mf.kernel() + +# canonical + mmp = mp.MP2(mf, frozen=frozen) + mmp.kernel() + efull_mp2 = mmp.e_corr + + mcc = cc.CCSD(mf, frozen=frozen).set(verbose=5) + eris = mcc.ao2mo() + mcc.kernel(eris=eris) + efull_ccsd = mcc.e_corr + + efull_t = CCSD_T(mcc, eris=eris, verbose=mcc.verbose) + efull_ccsd_t = efull_ccsd + efull_t + +# LNO with PM localized orbitals + # PM localization + orbocc = mf.mo_coeff[:,frozen:np.count_nonzero(mf.mo_occ)] + mlo = lo.PipekMezey(mol, orbocc) + lo_coeff = mlo.kernel() + while True: # always performing jacobi sweep to avoid trapping in local minimum/saddle point + lo_coeff1 = mlo.stability_jacobi()[1] + if lo_coeff1 is lo_coeff: + break + mlo = lo.PipekMezey(mf.mol, lo_coeff1).set(verbose=4) + mlo.init_guess = None + lo_coeff = mlo.kernel() + + # Fragment list: for PM, every orbital corresponds to a fragment + frag_lolist = [[i] for i in range(lo_coeff.shape[1])] + + # LNO-CCSD(T) calculation: here we scan over a list of thresholds + mcc = LNOCCSD(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + gamma = 10 # thresh_occ / thresh_vir + threshs = np.asarray([1e-1,3e-5,1e-5,3e-6,1e-6,3e-7,1e-7]) + elno_ccsd_uncorr = np.zeros_like(threshs) + elno_ccsd_t_uncorr = np.zeros_like(threshs) + elno_mp2 = np.zeros_like(threshs) + for i,thresh in enumerate(threshs): + mcc.lno_thresh = [thresh*gamma, thresh] + mcc.kernel() + elno_ccsd_uncorr[i] = mcc.e_corr_ccsd + elno_ccsd_t_uncorr[i] = mcc.e_corr_ccsd_t + elno_mp2[i] = mcc.e_corr_pt2 + elno_ccsd = elno_ccsd_uncorr - elno_mp2 + efull_mp2 + elno_ccsd_t = elno_ccsd_t_uncorr - elno_mp2 + efull_mp2 + + log.info('') + log.info('Reference CCSD E_corr = %.15g', efull_ccsd) + for i,thresh in enumerate(threshs): + e0 = elno_ccsd_uncorr[i] + e1 = elno_ccsd[i] + log.info('thresh = %.3e E_corr(LNO-CCSD) = %.15g E_corr(LNO-CCSD+∆PT2) = %.15g', + thresh, e0, e1) + + # log.info('') + # log.info('Reference CCSD(T) E_corr = %.15g', efull_ccsd_t) + # for i,thresh in enumerate(threshs): + # e0 = elno_ccsd_t_uncorr[i] + # e1 = elno_ccsd_t[i] + # log.info('thresh = %.3e E_corr(LNO-CCSD(T)) = %.15g E_corr(LNO-CCSD(T)+∆PT2) = %.15g', + # thresh, e0, e1) diff --git a/pyscf/lno/lnoccsd_t.py b/pyscf/lno/lnoccsd_t.py new file mode 100644 index 000000000..24a355219 --- /dev/null +++ b/pyscf/lno/lnoccsd_t.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + + +''' LNO-RCCSD(T) (both molecule and pbc w/ Gamma-point BZ sampling) + + - Original publication by Kállay and co-workers: + Rolik and Kállay, J. Chem. Phys. 135, 104111 (2011) + + - Publication for this implementation by Ye and Berkelbach: + Ye and Berkelbach, J. Chem. Theory Comput. 2024, 20, 20, 8948–8959 +''' + +import ctypes +import numpy + +from pyscf import lib +from pyscf import symm +from pyscf.lib import logger +from pyscf.cc.ccsd_t import _sort_eri, _sort_t2_vooo_, _irrep_argsort + +libcc = lib.load_library('liblno') +CCsd_t_zcontract_lo = libcc.CCsd_t_zcontract_lo +CCsd_t_contract_lo = libcc.CCsd_t_contract_lo + + +def has_t_code(): + return CCsd_t_contract_lo is not None + +def kernel(mycc, eris, ulo, t1=None, t2=None, verbose=logger.NOTE): + r''' (T) energy correction normalized to LOs. + + Args: + ulo[mu,i] = is the overlap between the mu-th LO and the i-th occ MO. + ''' + cpu1 = cpu0 = (logger.process_clock(), logger.perf_counter()) + log = logger.new_logger(mycc, verbose) + if t1 is None: t1 = mycc.t1 + if t2 is None: t2 = mycc.t2 + + # symmetry cannot be used for LNO; turn off temporarily and turn back later + molsymm = mycc.mol.symmetry + if molsymm: + log.warn('mol.symmetry = True is found for LNO-CCSD(T). Turn it off temporarily.') + mycc.mol.symmetry = False + + nocc, nvir = t1.shape + nmo = nocc + nvir + nlo = ulo.shape[1] + assert(ulo.shape[0] == nocc) + uloT = numpy.asarray(ulo.T.conj(), order='C') + + dtype = numpy.result_type(t1, t2, eris.ovoo.dtype) + if mycc.incore_complete: + ftmp = None + eris_vvop = numpy.zeros((nvir,nvir,nocc,nmo), dtype) + else: + ftmp = lib.H5TmpFile() + eris_vvop = ftmp.create_dataset('vvop', (nvir,nvir,nocc,nmo), dtype) + + orbsym = _sort_eri(mycc, eris, nocc, nvir, eris_vvop, log) + + mo_energy, t1T, t2T, vooo, fvo, restore_t2_inplace = \ + _sort_t2_vooo_(mycc, orbsym, t1, t2, eris) + cpu1 = log.timer_debug1('CCSD(T) sort_eri', *cpu1) + + cpu2 = list(cpu1) + orbsym = numpy.hstack((numpy.sort(orbsym[:nocc]),numpy.sort(orbsym[nocc:]))) + o_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(orbsym[:nocc], minlength=8))) + v_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(orbsym[nocc:], minlength=8))) + o_sym = orbsym[:nocc] + oo_sym = (o_sym[:,None] ^ o_sym).ravel() + oo_ir_loc = numpy.append(0, numpy.cumsum(numpy.bincount(oo_sym, minlength=8))) + nirrep = max(oo_sym) + 1 + + orbsym = orbsym.astype(numpy.int32) + o_ir_loc = o_ir_loc.astype(numpy.int32) + v_ir_loc = v_ir_loc.astype(numpy.int32) + oo_ir_loc = oo_ir_loc.astype(numpy.int32) + if dtype == numpy.complex128: + drv = CCsd_t_zcontract_lo + else: + drv = CCsd_t_contract_lo + et_sum = numpy.zeros(1, dtype=dtype) + def contract(a0, a1, b0, b1, cache): + cache_row_a, cache_col_a, cache_row_b, cache_col_b = cache + drv(et_sum.ctypes.data_as(ctypes.c_void_p), + mo_energy.ctypes.data_as(ctypes.c_void_p), + t1T.ctypes.data_as(ctypes.c_void_p), + t2T.ctypes.data_as(ctypes.c_void_p), + vooo.ctypes.data_as(ctypes.c_void_p), + fvo.ctypes.data_as(ctypes.c_void_p), + uloT.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nlo), + ctypes.c_int(nocc), ctypes.c_int(nvir), + ctypes.c_int(a0), ctypes.c_int(a1), + ctypes.c_int(b0), ctypes.c_int(b1), + ctypes.c_int(nirrep), + o_ir_loc.ctypes.data_as(ctypes.c_void_p), + v_ir_loc.ctypes.data_as(ctypes.c_void_p), + oo_ir_loc.ctypes.data_as(ctypes.c_void_p), + orbsym.ctypes.data_as(ctypes.c_void_p), + cache_row_a.ctypes.data_as(ctypes.c_void_p), + cache_col_a.ctypes.data_as(ctypes.c_void_p), + cache_row_b.ctypes.data_as(ctypes.c_void_p), + cache_col_b.ctypes.data_as(ctypes.c_void_p)) + cpu2[:] = log.timer_debug1('contract %d:%d,%d:%d'%(a0,a1,b0,b1), *cpu2) + + # The rest 20% memory for cache b + mem_now = lib.current_memory()[0] + max_memory = max(0, mycc.max_memory - mem_now) + bufsize = (max_memory*.5e6/8-nocc**3*3*lib.num_threads())/(nocc*nmo) #*.5 for async_io + bufsize *= .5 #*.5 upper triangular part is loaded + bufsize *= .8 #*.8 for [a0:a1]/[b0:b1] partition + bufsize = max(8, bufsize) + log.debug('max_memory %d MB (%d MB in use)', max_memory, mem_now) + with lib.call_in_background(contract, sync=not mycc.async_io) as async_contract: + for a0, a1 in reversed(list(lib.prange_tril(0, nvir, bufsize))): + cache_row_a = numpy.asarray(eris_vvop[a0:a1,:a1], order='C') + if a0 == 0: + cache_col_a = cache_row_a + else: + cache_col_a = numpy.asarray(eris_vvop[:a0,a0:a1], order='C') + async_contract(a0, a1, a0, a1, (cache_row_a,cache_col_a, + cache_row_a,cache_col_a)) + + for b0, b1 in lib.prange_tril(0, a0, bufsize/8): + cache_row_b = numpy.asarray(eris_vvop[b0:b1,:b1], order='C') + if b0 == 0: + cache_col_b = cache_row_b + else: + cache_col_b = numpy.asarray(eris_vvop[:b0,b0:b1], order='C') + async_contract(a0, a1, b0, b1, (cache_row_a,cache_col_a, + cache_row_b,cache_col_b)) + + t2 = restore_t2_inplace(t2T) + et_sum *= 2 + if abs(et_sum[0].imag) > 1e-4: + logger.warn(mycc, 'Non-zero imaginary part of CCSD(T) energy was found %s', + et_sum[0]) + et = et_sum[0].real + log.timer('CCSD(T)', *cpu0) + log.note('CCSD(T) correction = %.15g', et) + + mycc.mol.symmetry = molsymm + + return et diff --git a/pyscf/lno/make_lno_rdm1.py b/pyscf/lno/make_lno_rdm1.py new file mode 100644 index 000000000..48625622c --- /dev/null +++ b/pyscf/lno/make_lno_rdm1.py @@ -0,0 +1,598 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + +''' Make MP2 rdm1 of different flavors + + Args: + eris : + Provide access to the MO basis DF integral `ovL` through the following methods: + get_occ_blk(i0,i1) -> ([i0:i1]v|L) + get_vir_blk(a0,a1) -> (o[a0:a1]|L) + xform_occ(u) -> einsum('iaL,iI->IaL', ovL, u) + xform_vir(u) -> einsum('iaL,aA->iAL', ovL, u) + orbact_data : tuple + moeocc, moevir, uoccact, uviract = orbact_data + where + - `moeocc` and `moevir` are the MO energy for the occupied and virtual MOs + used to obtain `ovL` + - `uoccact` and `uviract` are the overlap matrix between canonical and active + orbitals, i.e., uoccact[i,I] = and uviract[a,A] = . + dm_type : str + '1h'/'1p'/'2p' for occ and '1p'/'1h'/'2h' for vir. +''' +import sys +import numpy as np +from functools import reduce + +from pyscf import lib +from pyscf.lib import logger +from pyscf import __config__ + +DEBUG_BLKSIZE = getattr(__config__, 'lno_base_make_rdm1_DEBUG_BLKSIZE', False) + + +def make_lo_rdm1_occ(eris, moeocc, moevir, uocc, uvir, dm_type): + if dm_type == '1h': + dm = make_lo_rdm1_occ_1h(eris, moeocc, moevir, uocc) + elif dm_type == '1p': + dm = make_lo_rdm1_occ_1p(eris, moeocc, moevir, uvir) + elif dm_type == '2p': + dm = make_lo_rdm1_occ_2p(eris, moeocc, moevir, uvir) + else: + raise RuntimeError('Requested occ LNO type "%s" is unknown.' % dm_type) + return dm + +def make_lo_rdm1_vir(eris, moeocc, moevir, uocc, uvir, dm_type): + if dm_type == '1p': + dm = make_lo_rdm1_vir_1p(eris, moeocc, moevir, uvir) + elif dm_type == '1h': + dm = make_lo_rdm1_vir_1h(eris, moeocc, moevir, uocc) + elif dm_type == '2h': + dm = make_lo_rdm1_vir_2h(eris, moeocc, moevir, uocc) + else: + raise RuntimeError('Requested vir LNO type "%s" is unknown.' % dm_type) + return dm + +def _mp2_rdm1_occblksize(nocc, nvir, naux, n1, n2, M, dsize): + r''' Estimate block size for the occupied index in MP2 rdm1 evaluation. + + Model: + Assuming storing n1 copies of ([O]V|L) and n2 copies of ([O]V|[O]V), [O] is + determined by solving a quadratic equation: + (n2*V^2) * [O]^2 + (n1*V*X) * [O] - M = 0 + + Args: + nocc/nvir/naux : int + Number of occ/vir/aux orbitals. + n1/n2 : int + Number of copies of tensors of size nvir*naux*occblksize and nvir^2*occblksize^2. + M: float or int + Available memory in terms how many numbers to store, i.e., mem_in_MB * 1e6/dsize. + + Return: + occblksize (int) + mem_peak (float) : peak memory (in MB) + ''' + occblksize = max(1, min(nocc, int(np.floor((((n1*naux)**2 + 4*n2*M)**0.5 - n1*naux) / + (2*n2*nvir))))) + mem_peak = (occblksize * nvir*naux * n1 + occblksize**2 * nvir**2 * n2) * dsize/1e6 + return occblksize, mem_peak + +def _mp2_rdm1_virblksize(nocc, nvir, naux, n1, n2, M, dsize): + r''' Estimate block size for the virtual index in MP2 rdm1 evaluation. + + See `_mp2_rdm1_occblksize`. + ''' + return _mp2_rdm1_occblksize(nvir, nocc, naux, n1, n2, M, dsize) + +def make_full_rdm1(eris, moeocc, moevir, with_occ=True, with_vir=True): + r''' Occ-occ and vir-vir blocks of MP2 density matrix + + Math: + dm(i,j) + = 2 * \sum_{kab} t2(ikab).conj() * ( 2*t2(jkab) - t2(jkba) ) + dm(a,b) + = 2 * \sum_{ijc} t2(ijac) * ( 2*t2(ijbc) - t2(ijcb) ).conj() + ''' + assert( with_occ or with_vir ) + + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_full_rdm1 : nocc = %d nvir = %d naux = %d occblksize = %d ' + 'peak mem = %.2f MB', nocc, nvir, naux, occblksize, mem_peak) + + eov = moeocc[:,None] - moevir + + dmoo = np.zeros((nocc,nocc), dtype=dtype) if with_occ else None + dmvv = np.zeros((nvir,nvir), dtype=dtype) if with_vir else None + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + ivL = eris.get_occ_blk(i0,i1) + eiv = eov[i0:i1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + jvL = ivL + ejv = eiv + else: + jvL = eris.get_occ_blk(j0,j1) + ejv = eov[j0:j1] + denom = lib.direct_sum('ia+jb->ijab', eiv, ejv) + t2ijvv = np.conj(lib.einsum('iax,jbx->ijab', ivL, jvL)) / denom + jvL = None + denom = None + if with_occ: + dmoo[i0:i1,j0:j1] = 4*lib.einsum('ikac.jkbc->ij', np.conj(t2ijvv), t2ijvv) + dmoo[i0:i1,j0:j1] -= 2*lib.einsum('ikac.jkcb->ij', np.conj(t2ijvv), t2ijvv) + if with_vir: + dmvv = 4*lib.einsum('ijac,ijbc->ab', t2ijvv, np.conj(t2ijvv)) + dmvv -= 2*lib.einsum('ijac,ijcb->ab', t2ijvv, np.conj(t2ijvv)) + t2ijvv = None + ivL = None + + return dmoo, dmvv + +def make_full_rdm1_occ(eris, moeocc, moevir): + r''' Occupied MP2 density matrix + + Math: + dm(i,j) + = 2 * \sum_{kab} t2(ikab) ( 2*t2(jkab) - t2(jkba) ) + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_full_rdm1_occ : nocc = %d nvir = %d naux = %d occblksize = %d ' + 'peak mem = %.2f MB', nocc, nvir, naux, occblksize, mem_peak) + + eov = moeocc[:,None] - moevir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + ivL = eris.get_occ_blk(i0,i1) + eiv = eov[i0:i1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + jvL = ivL + ejv = eiv + else: + jvL = eris.get_occ_blk(j0,j1) + ejv = eov[j0:j1] + denom = lib.direct_sum('ia+jb->ijab', eiv, ejv) + t2ijvv = np.conj(lib.einsum('iax,jbx->ijab', ivL, jvL)) / denom + jvL = None + denom = None + dm[i0:i1,j0:j1] = 4*lib.einsum('ikac.jkbc->ij', np.conj(t2ijvv), t2ijvv) + dm[i0:i1,j0:j1] -= 2*lib.einsum('ikac.jkcb->ij', np.conj(t2ijvv), t2ijvv) + t2ijvv = None + ivL = None + + return dm + +def make_full_rdm1_vir(eris, moeocc, moevir): + r''' Virtual MP2 density matrix + + Math: + dm(a,b) + = 2 * \sum_{ijc} t2(ijac) ( 2*t2(ijbc) - t2(ijcb) ) + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_full_rdm1_vir : nocc = %d nvir = %d naux = %d occblksize = %d ' + 'peak mem = %.2f MB', nocc, nvir, naux, occblksize, mem_peak) + + eov = moeocc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=dtype) + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + ivL = eris.get_occ_blk(i0,i1) + eiv = eov[i0:i1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + jvL = ivL + ejv = eiv + else: + jvL = eris.get_occ_blk(j0,j1) + ejv = eov[j0:j1] + eijvv = lib.direct_sum('ia+jb->ijab', eiv, ejv) + t2ijvv = np.conj(lib.einsum('iax,jbx->ijab', ivL, jvL)) / eijvv + jvL = None + eijvv = None + + dm = 4*lib.einsum('ijac,ijbc->ab', t2ijvv, np.conj(t2ijvv)) + dm -= 2*lib.einsum('ijac,ijcb->ab', t2ijvv, np.conj(t2ijvv)) + + t2ijvv = None + ivL = None + + return dm + +def make_lo_rdm1_occ_1h(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized hole + + Math: + dm(i,j) + = 2 * \sum_{k'ab} t2(ik'ab) ( 2*t2(jk'ab) - t2(jk'ba) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + nOcc = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 3, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_lo_rdm1_occ_1h : nocc = %d nvir = %d nOcc = %d naux = %d ' + 'occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, occblksize, mem_peak) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eov = moeocc[:,None] - moevir + eOv = moeOcc[:,None] - moevir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for Kbatch,(K0,K1) in enumerate(lib.prange(0,nOcc,occblksize)): + KvL = eris.xform_occ(u[:,K0:K1]) + eKv = eOv[K0:K1] + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + ivL = eris.get_occ_blk(i0,i1) + eiv = eov[i0:i1] + eiKvv = lib.direct_sum('ia+Kb->iKab', eiv, eKv) + t2iKvv = np.conj(lib.einsum('iax,Kbx->iKab', ivL, KvL)) / eiKvv + ivL = None + eiKvv = None + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + t2jKvv = t2iKvv + else: + jvL = eris.get_occ_blk(j0,j1) + ejv = eov[j0:j1] + ejKvv = lib.direct_sum('ia+Kb->iKab', ejv, eKv) + t2jKvv = np.conj(lib.einsum('iax,Kbx->iKab', jvL, KvL)) / ejKvv + jvL = None + ejKvv = None + + dm[i0:i1,j0:j1] -= 4 * lib.einsum('iKab,jKab->ij', np.conj(t2iKvv), t2jKvv) + dm[i0:i1,j0:j1] += 2 * lib.einsum('iKab,jKba->ij', np.conj(t2iKvv), t2jKvv) + + t2jKvv = None + t2iKvv = None + KvL = None + + return dm + +def make_lo_rdm1_occ_1p(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized particle + + Math: + dm(i,j) + = 2 * \sum_{k'ab} t2(ik'ab) ( 2*t2(jk'ab) - t2(jk'ba) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized virtual orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + nVir = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nvir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: virblksize = max(1,nvir//2) + logger.debug1(eris, 'make_lo_rdm1_occ_1p : nocc = %d nvir = %d nVir = %d naux = %d ' + 'virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, virblksize, mem_peak) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eov = moeocc[:,None] - moevir + eoV = moeocc[:,None] - moeVir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,virblksize)): + oAL = eris.xform_vir(u[:,A0:A1]) + eoA = eoV[:,A0:A1] + for bbatch,(b0,b1) in enumerate(lib.prange(0,nvir,virblksize)): + obL = eris.get_vir_blk(b0,b1) + eob = eov[:,b0:b1] + + eooAb = lib.direct_sum('iA+jb->ijAb', eoA, eob) + t2ooAb = np.conj(lib.einsum('iAx,jbx->ijAb', oAL, obL)) / eooAb + obL = None + eooAb = None + + dm -= 2 * lib.einsum('ikAb,jkAb->ij', np.conj(t2ooAb), t2ooAb) + dm += lib.einsum('ikAb,kjAb->ij', np.conj(t2ooAb), t2ooAb) + dm += lib.einsum('kiAb,jkAb->ij', np.conj(t2ooAb), t2ooAb) + dm -= 2 * lib.einsum('kiAb,kjAb->ij', np.conj(t2ooAb), t2ooAb) + + t2ooAb = None + oAL = None + + return dm + +def make_lo_rdm1_occ_2p(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with two localized particles + + Math: + dm(i,j) + = 2 * \sum_{ka'b'} t2(ika'b') ( 2*t2(jka'b') - t2(jkb'a') ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized virtual orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + nVir = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + Virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nVir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: Virblksize = max(1,nVir//2) + logger.debug1(eris, 'make_lo_rdm1_occ_2p: nocc = %d nvir = %d nVir = %d naux = %d ' + 'Virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, Virblksize, mem_peak) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eoV = moeocc[:,None] - moeVir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,Virblksize)): + oAL = eris.xform_vir(u[:,A0:A1]) + eoA = eoV[:,A0:A1] + for Bbatch,(B0,B1) in enumerate(lib.prange(0,nVir,Virblksize)): + if Bbatch == Abatch: + oBL = oAL + eoB = eoA + else: + oBL = eris.xform_vir(u[:,B0:B1]) + eoB = eoV[:,B0:B1] + eooAB = lib.direct_sum('iA+jB->ijAB', eoA, eoB) + t2ooAB = np.conj(lib.einsum('iAx,jBx->ijAB', oAL, oBL)) / eooAB + oBL = None + eooAB = None + + dm -= 4 * lib.einsum('ikAB,jkAB->ij', np.conj(t2ooAB), t2ooAB) + dm += 2 * lib.einsum('ikAB,kjAB->ij', np.conj(t2ooAB), t2ooAB) + + t2ooAB = None + oAL = None + + return dm + +def make_lo_rdm1_vir_1p(eris, moeocc, moevir, u): + r''' Virtual MP2 density matrix with one localized particle + + Math: + dm(a,b) + = \sum_{ijc'} 2 * t2(ijac') * ( 2 * t2(ijbc') - t2(jibc') ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + nVir = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nvir,naux, 3, 3, M, dsize) + if DEBUG_BLKSIZE: virblksize = max(1,nvir//2) + logger.debug1(eris, 'make_lo_rdm1_vir_1p : nocc = %d nvir = %d nVir = %d naux = %d ' + 'virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, virblksize, mem_peak) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eov = moeocc[:,None] - moevir + eoV = moeocc[:,None] - moeVir + + # TODO: can we batch over occ index? + dm = np.zeros((nvir,nvir), dtype=dtype) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,virblksize)): + oAL = eris.xform_vir(u[:,A0:A1]) + eoA = eoV[:,A0:A1] + for abatch,(a0,a1) in enumerate(lib.prange(0,nvir,virblksize)): + oaL = eris.get_vir_blk(a0,a1) + eoa = eov[:,a0:a1] + eooAa = lib.direct_sum('iA+jb->ijAb', eoA, eoa) + t2ooAa = np.conj(lib.einsum('iAx,jbx->ijAb', oAL, oaL)) / eooAa + oaL = None + eooAa = None + for bbatch,(b0,b1) in enumerate(lib.prange(0,nvir,virblksize)): + if abatch == bbatch: + t2ooAb = t2ooAa + else: + obL = eris.get_vir_blk(b0,b1) + eob = eov[:,b0:b1] + eooAb = lib.direct_sum('iA+jb->ijAb', eoA, eob) + t2ooAb = np.conj(lib.einsum('iAx,jbx->ijAb', oAL, obL)) / eooAb + obL = None + eooAb = None + + dm[a0:a1,b0:b1] += 4 * lib.einsum('ijAa,ijAb->ab', t2ooAa, np.conj(t2ooAb)) + dm[a0:a1,b0:b1] -= 2 * lib.einsum('ijAa,jiAb->ab', t2ooAa, np.conj(t2ooAb)) + + t2ooAb = None + t2ooAa = None + oAL = None + + return dm + +def make_lo_rdm1_vir_1h(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized hole + + Math: + dm(a,b) + = \sum_{i'jc} 2 * t2(i'jac) * t2(i'jbc) + 2 * t2(i'jca) * t2(i'jcb) + - t2(i'jac) * t2(i'jcb) - t2(i'jca) * t2(i'jbc) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + nOcc = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_lo_rdm1_vir_1h : nocc = %d nvir = %d nOcc = %d naux = %d ' + 'occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, occblksize, mem_peak) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eOv = moeOcc[:,None] - moevir + eov = moeocc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=dtype) + for Ibatch,(I0,I1) in enumerate(lib.prange(0,nOcc,occblksize)): + IvL = eris.xform_occ(u[:,I0:I1]) + eIv = eOv[I0:I1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + jvL = eris.get_occ_blk(j0,j1) + ejv = eov[j0:j1] + + eIjvv = lib.direct_sum('Ia+jb->Ijab', eIv, ejv) + t2Ijvv = np.conj(lib.einsum('Iax,jbx->Ijab', IvL, jvL)) / eIjvv + jvL = None + eIjvv = None + + dm += 2 * lib.einsum('Ijac,Ijbc->ab', t2Ijvv, np.conj(t2Ijvv)) + dm -= lib.einsum('Ijac,Ijcb->ab', t2Ijvv, np.conj(t2Ijvv)) + dm -= lib.einsum('Ijca,Ijbc->ab', t2Ijvv, np.conj(t2Ijvv)) + dm += 2 * lib.einsum('Ijca,Ijcb->ab', t2Ijvv, np.conj(t2Ijvv)) + + t2Ijvv = None + IvL = None + + return dm + +def make_lo_rdm1_vir_2h(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with two localized holes + + Math: + dm(a,b) + = 2 * \sum_{i'j'c} t2(i'j'ac) ( 2*t2(i'j'bc) - t2(i'j'cb) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + dtype = eris.dtype + dsize = eris.dsize + nOcc = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + Occblksize, mem_peak = _mp2_rdm1_occblksize(nOcc,nvir,naux, 2, 3, M, dsize) + if DEBUG_BLKSIZE: Occblksize = max(1,nOcc//2) + logger.debug1(eris, 'make_lo_rdm1_vir_2h: nocc = %d nvir = %d nOcc = %d naux = %d ' + 'Occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, Occblksize, mem_peak) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eOv = moeOcc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=dtype) + for Ibatch,(I0,I1) in enumerate(lib.prange(0,nOcc,Occblksize)): + IvL = eris.xform_occ(u[:,I0:I1]) + eIv = eOv[I0:I1] + for Jbatch,(J0,J1) in enumerate(lib.prange(0,nOcc,Occblksize)): + if Jbatch == Ibatch: + JvL = IvL + eJv = eIv + else: + JvL = eris.xform_occ(u[:,J0:J1]) + eJv = eOv[J0:J1] + eIJvv = lib.direct_sum('Ia+Jb->IJab', eIv, eJv) + t2IJvv = np.conj(lib.einsum('Iax,Jbx->IJab', IvL, JvL)) / eIJvv + JvL = None + eIJvv = None + + dm += 4 * lib.einsum('IJac,IJbc->ab', t2IJvv, np.conj(t2IJvv)) + dm -= 2 * lib.einsum('IJac,IJcb->ab', t2IJvv, np.conj(t2IJvv)) + + t2IJvv = None + IvL = None + + return dm + +def subspace_eigh(fock, orb): + f = reduce(np.dot, (orb.T.conj(), fock, orb)) + if orb.shape[1] == 1: + moe = np.array([f[0,0]]) + else: + moe, u = np.linalg.eigh(f) + orb = np.dot(orb, u) + return moe, orb diff --git a/pyscf/lno/test/test_lnoccsd.py b/pyscf/lno/test/test_lnoccsd.py new file mode 100644 index 000000000..e4b4191b2 --- /dev/null +++ b/pyscf/lno/test/test_lnoccsd.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# Copyright 2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest +import numpy as np +from pyscf import __config__ +setattr(__config__, 'lnocc_DEBUG_BLKSIZE', True) # debug outcore mode +from pyscf import gto, scf, mp, cc, lo +from pyscf.cc.ccsd_t import kernel as CCSD_T +from pyscf.lno import LNOCCSD, LNOCCSD_T +from pyscf.lno.tools import autofrag_iao + + +class WaterDimer(unittest.TestCase): + @classmethod + def setUpClass(cls): + mol = gto.Mole() + mol.verbose = 4 + mol.output = '/dev/null' + mol.atom = ''' + O -1.485163346097 -0.114724564047 0.000000000000 + H -1.868415346097 0.762298435953 0.000000000000 + H -0.533833346097 0.040507435953 0.000000000000 + O 1.416468653903 0.111264435953 0.000000000000 + H 1.746241653903 -0.373945564047 -0.758561000000 + H 1.746241653903 -0.373945564047 0.758561000000 + ''' + mol.basis = 'cc-pvdz' + mol.precision = 1e-10 + mol.build() + mf = scf.RHF(mol).density_fit().run() + + # canonical + frozen = 2 + mymp = mp.MP2(mf, frozen=frozen) + mymp.kernel(with_t2=False) + efull_mp2 = mymp.e_corr + + mycc = cc.CCSD(mf, frozen=frozen) + eris = mycc.ao2mo() + mycc.kernel(eris=eris) + efull_ccsd = mycc.e_corr + + efull_t = CCSD_T(mycc, eris=eris, verbose=mycc.verbose) + efull_ccsd_t = efull_ccsd + efull_t + + cls.mol = mol + cls.mf = mf + cls.frozen = frozen + cls.ecano = [efull_mp2, efull_ccsd, efull_ccsd_t] + @classmethod + def tearDownClass(cls): + cls.mol.stdout.close() + del cls.mol, cls.mf, cls.ecano, cls.frozen + + def test_lno_pm_by_thresh(self): + mol = self.mol + mf = self.mf + frozen = self.frozen + + # PM localization + orbocc = mf.mo_coeff[:,frozen:np.count_nonzero(mf.mo_occ)] + mlo = lo.PipekMezey(mol, orbocc) + lo_coeff = mlo.kernel() + while True: # always performing jacobi sweep to avoid trapping in local minimum/saddle point + lo_coeff1 = mlo.stability_jacobi()[1] + if lo_coeff1 is lo_coeff: + break + mlo = lo.PipekMezey(mf.mol, lo_coeff1).set(verbose=4) + mlo.init_guess = None + lo_coeff = mlo.kernel() + + # Fragment list: for PM, every orbital corresponds to a fragment + frag_lolist = [[i] for i in range(lo_coeff.shape[1])] + + gamma = 10 + threshs = [1e-5,1e-6,1e-100] + refs = [ + [-0.4044781783,-0.4231598372,-0.4292049721], + [-0.4058765086,-0.4244510794,-0.4307864928], + self.ecano + ] + for thresh,ref in zip(threshs,refs): + mcc = LNOCCSD_T(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + mcc.lno_thresh = [thresh*gamma,thresh] + mcc.kernel() + emp2 = mcc.e_corr_pt2 + eccsd = mcc.e_corr_ccsd + eccsd_t = mcc.e_corr_ccsd_t + # print('[%s],' % (','.join([f'{x:.10f}' for x in [emp2,eccsd,eccsd_t]]))) + self.assertAlmostEqual(emp2, ref[0], 6) + self.assertAlmostEqual(eccsd, ref[1], 6) + self.assertAlmostEqual(eccsd_t, ref[2], 6) + + + # force outcore ao2mo for generating ovL + for thresh,ref in zip(threshs,refs): + mcc = LNOCCSD_T(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + mcc.force_outcore_ao2mo = True + mcc.lno_thresh = [thresh*gamma,thresh] + mcc.kernel() + emp2 = mcc.e_corr_pt2 + eccsd = mcc.e_corr_ccsd + eccsd_t = mcc.e_corr_ccsd_t + # print('[%s],' % (','.join([f'{x:.10f}' for x in [emp2,eccsd,eccsd_t]]))) + self.assertAlmostEqual(emp2, ref[0], 6) + self.assertAlmostEqual(eccsd, ref[1], 6) + self.assertAlmostEqual(eccsd_t, ref[2], 6) + + def test_lno_iao_by_thresh(self): + mol = self.mol + mf = self.mf + frozen = self.frozen + + # IAO localization + orbocc = mf.mo_coeff[:,frozen:np.count_nonzero(mf.mo_occ)] + iao_coeff = lo.iao.iao(mol, orbocc) + lo_coeff = lo.orth.vec_lowdin(iao_coeff, mf.get_ovlp()) + moliao = lo.iao.reference_mol(mol) + + # Fragment list: all IAOs belonging to same atom form a fragment + frag_lolist = autofrag_iao(moliao) + + gamma = 10 + threshs = [1e-5,1e-6,1e-100] + refs = [ + [-0.4054784012,-0.4240686326,-0.4303996712], + [-0.4060479828,-0.4245745223,-0.4309965749], + self.ecano + ] + for thresh,ref in zip(threshs,refs): + mcc = LNOCCSD_T(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + mcc.lno_thresh = [thresh*gamma,thresh] + mcc.kernel() + emp2 = mcc.e_corr_pt2 + eccsd = mcc.e_corr_ccsd + eccsd_t = mcc.e_corr_ccsd_t + # print('[%s],' % (','.join([f'{x:.10f}' for x in [emp2,eccsd,eccsd_t]]))) + self.assertAlmostEqual(emp2, ref[0], 6) + self.assertAlmostEqual(eccsd, ref[1], 6) + self.assertAlmostEqual(eccsd_t, ref[2], 6) + + + +if __name__ == "__main__": + print("Full Tests for LNO-CCSD and LNO-CCSD(T)") + unittest.main() diff --git a/pyscf/lno/test/test_ulnoccsd.py b/pyscf/lno/test/test_ulnoccsd.py new file mode 100644 index 000000000..4925055ea --- /dev/null +++ b/pyscf/lno/test/test_ulnoccsd.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest +import numpy as np +from pyscf import gto, scf, mp, cc, lo +from pyscf.cc.uccsd_t import kernel as CCSD_T +from pyscf.lno import tools +from pyscf.lno.ulnoccsd import ULNOCCSD_T + + +class WaterDimer(unittest.TestCase): + @classmethod + def setUpClass(cls): + mol = gto.Mole() + mol.verbose = 4 + mol.output = '/dev/null' + mol.atom = ''' + O -1.485163346097 -0.114724564047 0.000000000000 + H -1.868415346097 0.762298435953 0.000000000000 + H -0.533833346097 0.040507435953 0.000000000000 + O 1.416468653903 0.111264435953 0.000000000000 + H 1.746241653903 -0.373945564047 -0.758561000000 + H 1.746241653903 -0.373945564047 0.758561000000 + ''' + mol.basis = 'cc-pvdz' + mol.build() + mf = scf.UHF(mol).density_fit().run() + + # canonical + frozen = 2 + mymp = mp.MP2(mf, frozen=frozen) + mymp.kernel(with_t2=False) + efull_mp2 = mymp.e_corr + + mycc = cc.CCSD(mf, frozen=frozen) + eris = mycc.ao2mo() + mycc.kernel(eris=eris) + efull_ccsd = mycc.e_corr + + efull_t = CCSD_T(mycc, eris=eris, verbose=mycc.verbose) + efull_ccsd_t = efull_ccsd + efull_t + + cls.mol = mol + cls.mf = mf + cls.frozen = frozen + cls.ecano = [efull_mp2, efull_ccsd, efull_ccsd_t] + @classmethod + def tearDownClass(cls): + cls.mol.stdout.close() + del cls.mol, cls.mf, cls.ecano, cls.frozen + + def test_ulno_pm_by_thresh(self): + mol = self.mol + mf = self.mf + frozen = self.frozen + + # PM localization + orbocc = list() + lo_coeff = list() + for s in range(2): + orbocc.append(mf.mo_coeff[s][:,frozen:np.count_nonzero(mf.mo_occ[s])]) + mlo = lo.PipekMezey(mol, orbocc[s]) + lo_coeff_s = mlo.kernel() + while True: # always performing jacobi sweep to avoid trapping in local minimum/saddle point + lo_coeff1_s = mlo.stability_jacobi()[1] + if lo_coeff1_s is lo_coeff_s: + break + mlo = lo.PipekMezey(mf.mol, lo_coeff1_s).set(verbose=4) + mlo.init_guess = None + lo_coeff_s = mlo.kernel() + lo_coeff.append(lo_coeff_s) + + # Fragment list: for PM, every orbital corresponds to a fragment + oa = [[[i],[]] for i in range(orbocc[0].shape[1])] + ob = [[[],[i]] for i in range(orbocc[1].shape[1])] + frag_lolist = oa + ob + + gamma = 10 + threshs = [1e-5*2,1e-6*2,1e-100*2] + refs = [ + [-0.3995407761,-0.4185382023,-0.4231105742], + [-0.4052089997,-0.4238689186,-0.4300854290], + self.ecano + ] + for thresh,ref in zip(threshs,refs): + mcc = ULNOCCSD_T(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + mcc.lno_thresh = [thresh*gamma,thresh] + mcc.kernel() + emp2 = mcc.e_corr_pt2 + eccsd = mcc.e_corr_ccsd + eccsd_t = mcc.e_corr_ccsd_t + # print('[%s],' % (','.join([f'{x:.10f}' for x in [emp2,eccsd,eccsd_t]]))) + self.assertAlmostEqual(emp2, ref[0], 6) + self.assertAlmostEqual(eccsd, ref[1], 6) + self.assertAlmostEqual(eccsd_t, ref[2], 6) + + +if __name__ == "__main__": + print("Full Tests for LNO-CCSD and LNO-CCSD(T)") + unittest.main() diff --git a/pyscf/lno/tools.py b/pyscf/lno/tools.py new file mode 100644 index 000000000..4d0067f21 --- /dev/null +++ b/pyscf/lno/tools.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + +import numpy as np + +def autofrag_atom(mol, H2heavy=False): + ''' Group non-ghost atoms into fragments. Return fragment definitions by + atom indices + + Args: + mol (gto.Mole or pbc.gto.Cell): + PySCF Mole object. + H2heavy (bool): + Wether to include H atoms into the fragment of the nearest heavy + atom (e.g., a CH3 group). Default is False, which means every atom + defines an individual fragment. + In case of no heavy atoms (e.g., a H chain), this variable is ignored + and every H atom defines a fragment. + + Return: + frag_atmlist (nested list): + Fragment definition by atom indices. E.g., [[0],[1,5],[2,3,4],...] means + fragment 1 consists of atom 0 + fragment 2 consists of atom 1 and 5 + fragment 3 consists of atom 2, 3, and 4 + ... + ''' + if H2heavy: + get_dist = lambda x,y: ((x[:,None,:]-y)**2.).sum(axis=-1) + + if hasattr(mol, 'lattice_vectors'): # mol is actually a Cell object + alat = mol.lattice_vectors() + else: + alat = None + cs = mol.atom_charges() + rs = mol.atom_coords() + idx_H = np.where(cs == 1)[0] + idx_X = np.where(cs > 1)[0] + if idx_X.size > 0: + if alat is None: + d2 = get_dist(rs[idx_H], rs[idx_X]) + H2Xmap = np.argmin(d2, axis=1) + else: + d2 = [] + for jx in [-1,0,1]: + for jy in [-1,0,1]: + for jz in [-1,0,1]: + a = np.dot(np.array([jx,jy,jz]), alat) + d2.append( get_dist(rs[idx_H], rs[idx_X]+a) ) + d2 = np.hstack(d2) + H2Xmap = np.argmin(d2, axis=1) % len(idx_X) + frag_atmlist = [None] * len(idx_X) + for i,iX in enumerate(idx_X): + iHs = np.where(H2Xmap==i)[0] + l = np.asarray(np.concatenate([[iX], idx_H[iHs]]), + dtype=int).tolist() + frag_atmlist[i] = l + else: # all-H system + print('warning: no heavy atom detected in the system; every ' + 'hydrogen atom is treated as a single fragment.') + frag_atmlist = [[i] for i in idx_H] + else: + frag_atmlist = [[i] for i in np.where(mol.atom_charges() > 0)[0]] + + return frag_atmlist + +def autofrag_iao(moliao, frag_type='atom', frag_atmlist=None): + ''' Group IAOs into fragments. Return fragment definitions by IAO indices. + + Args: + moliao (gto.Mole or pbc.gto.Cell): + Mole/Cell object generated by `lo.iao.reference_mol`. + frag_atmlist (nested list): + Fragment definitions by atom indices. If not provided, `autofrag_atom` + will be called to generate a `frag_atmlist` using `moliao` with + `H2heavy` set to False. + frag_type (str, case insensitive): + How IAOs are grouped into fragments: + 'atom' (default): + All IAOs on an atom define a fragment. + 'shell': + Each shell of IAOs on an atom define a fragment. + E.g., 3 fragments per carbon atom, 1s, 2s and 2p. + 'shell deep': + Shells of IAOs sharing same angular momentum on an atom define + a fragment. E.g., 2 fragments per carbon atom, 1s+2s and 2p. + '1o' or 'orbital': + Each IAO defines a fragment. Note that this choice leads + to energy not invariant to rotations of a molecule. + + Returns: + frag_lolist (nested list): + Fragment definition by IAO indices. E.g., [[0],[1,5],[2,3,4],...] means + fragment 1 consists of IAO 0 + fragment 2 consists of IAO 1 and 5 + fragment 3 consists of IAO 2, 3, and 4 + ... + ''' + frag_type = frag_type.lower() + if frag_type == '1o' or frag_type.startswith('orb'): + frag_lolist = [[i] for i in range(moliao.nao_nr())] + elif frag_type.startswith('at'): + if frag_atmlist is None: frag_atmlist = autofrag_atom(moliao) + frag_lolist = [np.hstack([range(p0,p1) for (b0,b1,p0,p1) in + moliao.aoslice_by_atom()[atom_ids]]).tolist() + for atom_ids in frag_atmlist] + elif frag_type == 'shell': + nbas = moliao.nbas + ao_loc = moliao.ao_loc_nr() + frag_lolist = [list(range(*ao_loc[i:i+2])) for i in range(nbas)] + elif frag_type == 'shell deep': + from pyscf import lib + nbas = moliao.nbas + ao_loc = moliao.ao_loc_nr() + bas_nao = ao_loc[1:] - ao_loc[:-1] + bas_ls = np.asarray([moliao.bas_angular(i) for i in range(nbas)]) + bas_nao0_sph = 2*bas_ls+1 + bas_nao0_cart = (bas_ls+1)*(bas_ls+2)//2 + if np.all(bas_nao%bas_nao0_sph == 0): + bas_nao0 = bas_nao0_sph + elif np.all(bas_nao%bas_nao0_cart == 0): + bas_nao0 = bas_nao0_cart + else: + raise RuntimeError + frag_lolist = [list(range(p0,p1)) for i in range(nbas) for p0,p1 in + lib.prange(*ao_loc[i:i+2],bas_nao0[i])] + else: + raise ValueError('Unknown frag_type %s' % (str(frag_type))) + return frag_lolist + +def _matpow(A, p): + e, u = np.linalg.eigh(A) + return np.dot(u * e**p, u.T.conj()) + +def _map_lo_to_frag(mol, orbloc, frag_atmlist, verbose=None): + r''' Assign input LOs (assumed orthonormal) to fragments using the Lowdin charge. + + For each IAO 'i', a 1D array, [p_1, p_2, ... p_nfrag], is computed, where + p_ifrag = \sum_{mu on fragment i} ( (s1e^{1/2}*orbloc)[mu,i] )**2. + ''' + from pyscf.lib import logger + if verbose is None: + verbose = mol.verbose + log = logger.new_logger(mol, verbose) + + if hasattr(mol, 'pbc_intor'): + s1e = mol.pbc_intor('int1e_ovlp') + else: + s1e = mol.intor('int1e_ovlp') + s1e_sqrt = _matpow(s1e, 0.5) + plo_ao = np.dot(s1e_sqrt, orbloc)**2. + aoslice_by_atom = mol.aoslice_nr_by_atom() + aoind_by_frag = [np.concatenate([range(*aoslice_by_atom[atm][-2:]) + for atm in atmlist]) + for atmlist in frag_atmlist] + plo_frag = np.array([plo_ao[aoind].sum(axis=0) + for aoind in aoind_by_frag]).T + lo_frag_map = plo_frag.argmax(axis=1) + nlo, nfrag = plo_frag.shape + for i in range(nlo): + log.debug1('LO %d is assigned to fragment %d with charge %.2f', + i, lo_frag_map[i], plo_frag[i, lo_frag_map[i]]) + log.debug2('pop by frag:' + ' %.2f'*nfrag, *plo_frag[i]) + + frag_lolist = [np.where(lo_frag_map==i)[0] for i in range(nfrag)] + return frag_lolist + +def map_lo_to_frag(mol, orbloc, frag_atmlist, verbose=None): + if isinstance(orbloc, (list, tuple)) and len(orbloc) == 2: + frag_lolista = _map_lo_to_frag(mol, orbloc[0], frag_atmlist, verbose=verbose) + frag_lolistb = _map_lo_to_frag(mol, orbloc[1], frag_atmlist, verbose=verbose) + assert len(frag_lolista) == len(frag_lolistb) + frag_lolist = [] + for a, b in zip(frag_lolista, frag_lolistb): + frag_lolist.append([a, b]) + return frag_lolist + else: + return _map_lo_to_frag(mol, orbloc, frag_atmlist, verbose=verbose) + diff --git a/pyscf/lno/ulno.py b/pyscf/lno/ulno.py new file mode 100644 index 000000000..4976caecd --- /dev/null +++ b/pyscf/lno/ulno.py @@ -0,0 +1,1069 @@ +# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Chenghan Li +# Xing Zhang +# Ardavan Farahvash + +import sys +from functools import reduce +import numpy as np +import h5py + +from pyscf import lib +from pyscf.lib import logger +from pyscf import ao2mo +from pyscf import mp + +from pyscf.lno import lno +from pyscf.lno.make_lno_rdm1 import _mp2_rdm1_occblksize, DEBUG_BLKSIZE + +einsum = lib.einsum + +def make_las(mlno, eris, orbloc, lno_type, lno_param): + """ + Create localized active space for a given set of localized orbitals + given in orbloc + """ + log = logger.new_logger(mlno) + #cput1 = (logger.process_clock(), logger.perf_counter()) + + s1e = mlno.s1e + + orboccfrz_core = [None,] * 2 + orbocc = [None,] * 2 + orbvir = [None,] * 2 + orbvirfrz_core = [None,] * 2 + moeocc = [None,] * 2 + moevir = [None,] * 2 + + uocc_loc = [None,] * 2 + uocc_std = [None,] * 2 + uocc_orth = [None,] * 2 + #uvir_loc = [None,] * 2 + #uvir_std = [None,] * 2 + #uvir_orth = [None,] * 2 + + mo_splits = mlno.split_mo_coeff() + moe_splits = mlno.split_mo_energy() + for s in range(2): + orboccfrz_core[s], orbocc[s], orbvir[s], orbvirfrz_core[s] = mo_splits[s] + moeocc[s], moevir[s] = moe_splits[s][1:3] + + ##################################### + # Projection of LO onto occ and vir # + ##################################### + ovlp = reduce(np.dot, (orbloc[s].T.conj(), s1e, orbocc[s])) + uocc_loc[s], uocc_std[s], uocc_orth[s] = \ + lno.projection_construction(ovlp, mlno.lo_proj_thresh, mlno.lo_proj_thresh_active) + # NOTE we allow empty fragments + # if uocc_loc[s].shape[1] == 0: + # log.error('LOs do not overlap with occupied space. This could be caused ' + # 'by either a bad fragment choice or too high of `lo_proj_thresh_active` ' + # '(current value: %s).', mlno.lo_proj_thresh_active) + # raise RuntimeError + log.info('LO occ proj: %d active | %d standby | %d orthogonal', + *[u.shape[1] for u in [uocc_loc[s], uocc_std[s], uocc_orth[s]]]) + + #################### + # LNO construction # + #################### + if lno_type[0] == lno_type[1] == '1h': + # NOTE: uvir_loc is not used in 1h/1h, so we pass None + if getattr(mlno, 'with_df', None): + dmoo, dmvv = make_lo_rdm1_1h_df(eris, moeocc, moevir, uocc_loc) + else: + dmoo, dmvv = make_lo_rdm1_1h(eris, moeocc, moevir, uocc_loc) + else: + raise NotImplementedError('Unsupported LNO type') + + # if mlno._match_oldulno: + # dmoo[0],dmoo[1]=dmoo[0]/2.0,dmoo[1]/2.0 + # dmvv[0],dmvv[1]=dmvv[0]/2.0,dmvv[1]/2.0 + + orbfrag = [None,] * 2 + frzfrag = [None,] * 2 + uoccact_loc = [None,] * 2 + frag_msg = "" + + for s in range(2): + dmoo[s] = reduce(np.dot, (uocc_orth[s].T.conj(), dmoo[s], uocc_orth[s])) + + _param = lno_param[s][0] + if _param['norb'] is not None: + _param['norb'] -= uocc_loc[s].shape[1] + uocc_std[s].shape[1] + + uoccact_orth, uoccfrz_orth = lno.natorb_select(dmoo[s], uocc_orth[s], **_param) + orboccfrz = np.hstack((orboccfrz_core[s], np.dot(orbocc[s], uoccfrz_orth))) + uoccact = lno.subspace_eigh(np.diag(moeocc[s]), np.hstack((uoccact_orth, uocc_std[s], uocc_loc[s])))[1] + orboccact = np.dot(orbocc[s], uoccact) + uoccact_loc[s] = np.linalg.multi_dot((orboccact.T.conj(), s1e, orbloc[s])) + + orbviract, orbvirfrz = lno.natorb_select(dmvv[s], orbvir[s], **(lno_param[s][1])) + orbvirfrz = np.hstack((orbvirfrz, orbvirfrz_core[s])) + uviract = reduce(np.dot, (orbvir[s].T.conj(), s1e, orbviract)) + uviract = lno.subspace_eigh(np.diag(moevir[s]), uviract)[1] + orbviract = np.dot(orbvir[s], uviract) + + #################### + # LAS construction # + #################### + orbfragall = [orboccfrz, orboccact, orbviract, orbvirfrz] + orbfrag[s] = np.hstack(orbfragall) + norbfragall = np.asarray([x.shape[1] for x in orbfragall]) + locfragall = np.cumsum([0] + norbfragall.tolist()).astype(int) + frzfrag[s] = np.concatenate(( + np.arange(locfragall[0], locfragall[1]), + np.arange(locfragall[3], locfragall[4]))).astype(int) + frag_msg += '\nSpin channel %d: %d/%d Occ | %d/%d Vir | %d/%d MOs\n' % ( + s, + norbfragall[1], sum(norbfragall[:2]), + norbfragall[2], sum(norbfragall[2:4]), + sum(norbfragall[1:3]), sum(norbfragall) + ) + if len(frzfrag[s]) == 0: + frzfrag[s] = 0 + + return orbfrag, frzfrag, uoccact_loc, frag_msg + +def make_lo_rdm1_1h(eris, moeocc, moevir, uocc): + r''' + Create unrestricted MP2 density matrix with one localized hole + + Math: + dmoo_a(i,j) = sum_{Kab} [ T(Kaib) * T(Kajb)] + + 2* sum_{K'ab'} [ t(K'aib') * t(K'ajb') ] + + dmoo_b(i,j) - reverse alpha/beta spin indices from dmoo_a + + dmvv_a(a,b) = sum_{Kic} [ T(Kaic) * T(Kbjc) ] + + sum_{K'ic'} [ t(K'c'ib) * t(K'c'ia) ] + + sum_{Ki'c'} [ t(Kai'c) * t(Kbi'c') ] + + + dmvv_b(a,b) - reverse alpha/beta spin indices from dmvv_a + + Notation: + i,j - canonical occupied orbitals + a,b,c - canonical virtual orbitals + K - local occupied orbitals + indices with ' are beta spin, others are alpha. + + are canonical alpha; I,J,K are local alpha; + A,B,C are canonical beta; + + t(ijab) = (ia|jb) / (e_i+e_j-e_a-e_b) + T(ijab) = t(ijab) - t(ijba) + + # + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + moeocc : [moeocc,_a moeocc_b] + Occ MO energies + moeocc : [moevir,_a moevir_b] + Vir MO energies + uocc : [u_a, u_b] + Overlap between the canonical and localized occupied orbitals. + u_a(i,I) = (alpha), u_b(i',I') = (beta) + ''' + log = logger.new_logger(eris) + + # Unpack spins + moeocc_a, moeocc_b = moeocc + moevir_a, moevir_b = moevir + u_a, u_b = uocc + + nocca, nvira = eris.nocc[0], eris.nvir[0] + noccb, nvirb = eris.nocc[1], eris.nvir[1] + nOcca = u_a.shape[1] + nOccb = u_b.shape[1] + dtype = eris.dtype + dsize = eris.dsize + + # Determine block sizes + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.1 * 1e6 / dsize + + # Alpha block size (based on alpha canonical occ) + occblksize_a, mem_peak_a = _mp2_rdm1_occblksize(nocca, nvira, 0, 0, 3, M/2, dsize) + if DEBUG_BLKSIZE: occblksize_a = max(1, nocca // 2) + + # Beta block size (based on beta canonical occ) + occblksize_b, mem_peak_b = _mp2_rdm1_occblksize(noccb, nvirb, 0, 0, 3, M/2, dsize) + if DEBUG_BLKSIZE: occblksize_b = max(1, noccb // 2) + + # + log.debug1('make_lo_rdm1_1h (alpha): nocc=%d nvir=%d nOcc=%d blksize=%d peak_mem=%.2f MB', + nocca, nvira, nOcca, occblksize_a, mem_peak_a) + + log.debug1('make_lo_rdm1_1h (beta): nocc=%d nvir=%d nOcc=%dblksize=%d peak_mem=%.2f MB', + noccb, nvirb, nOccb, occblksize_b, mem_peak_b) + + # Localized MO energies + moeI_a, u_a = lno.subspace_eigh(np.diag(moeocc_a), u_a) + moeI_b, u_b = lno.subspace_eigh(np.diag(moeocc_b), u_b) + + # Energy denominators + eov_a = moeocc_a[:, None] - moevir_a + eIv_a = moeI_a[:, None] - moevir_a + eov_b = moeocc_b[:, None] - moevir_b + eIv_b = moeI_b[:, None] - moevir_b + + # Initialize RDMs + dmoo_a = np.zeros((nocca, nocca), dtype=dtype) + dmoo_b = np.zeros((noccb, noccb), dtype=dtype) + dmvv_a = np.zeros((nvira, nvira), dtype=dtype) + dmvv_b = np.zeros((nvirb, nvirb), dtype=dtype) + + #-- construct t2aa and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOcca, occblksize_a)): + # fragment-occ DF energies + eKv_a = eIv_a[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, nocca, occblksize_a)): + # full-occ DF energies + eiv_a = eov_a[i0:i1] + + # form t2_ovov-block + denom_aa = lib.direct_sum('Ka+ib->Kaib', eKv_a, eiv_a) + t2aa_i = eris.get_ivov(u_a, K0, K1, i0, i1) / denom_aa + t2aa_i = t2aa_i - t2aa_i.transpose(0,3,2,1) + denom_aa = None + eiv_a=None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, nocca, occblksize_a)): + if ibatch==jbatch: + t2aa_j=t2aa_i + else: + # full-occ DF energies + ejv_a = eov_a[j0:j1] + + # form t2_ovov-block + denom_aa = lib.direct_sum('Ka+jb->Kajb', eKv_a, ejv_a) + t2aa_j = eris.get_ivov(u_a, K0, K1, j0, j1) / denom_aa + t2aa_j = t2aa_j - t2aa_j.transpose(0,3,2,1) + denom_aa = None + ejv_a=None + + # contract block to make occupied MP2-DM + dmoo_a[i0:i1, j0:j1] += lib.einsum('Kaib,Kajb->ij', t2aa_i, t2aa_j.conj()) + t2aa_j=None + + # contract block to make virtual MP2-DM + dmvv_a += lib.einsum('Kaic,Kbic->ba', np.conj(t2aa_i), t2aa_i) + t2aa_i=None + + eKv_a=None + + #-- construct t2bb and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOccb, occblksize_b)): + # fragment-occ DF energies + eKv_b = eIv_b[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, noccb, occblksize_b)): + # full-occ DF integrals/energies + eiv_b = eov_b[i0:i1] + + # form t2_ovov-block + denom_bb = lib.direct_sum('Ka+ib->Kaib', eKv_b, eiv_b) + t2bb_i = eris.get_IVOV(u_b, K0, K1, i0, i1) / denom_bb + t2bb_i = t2bb_i - t2bb_i.transpose(0,3,2,1) + denom_bb = None + eiv_b = None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, noccb, occblksize_b)): + if ibatch==jbatch: + t2bb_j=t2bb_i + else: + # full-occ DF integrals/energies + ejv_b = eov_b[j0:j1] + + # form t2_ovov-block + denom_bb = lib.direct_sum('Ka+jb->Kajb', eKv_b, ejv_b) + t2bb_j = eris.get_IVOV(u_b, K0, K1, j0, j1) / denom_bb + t2bb_j = t2bb_j - t2bb_j.transpose(0,3,2,1) + denom_bb = None + ejv_b=None + + # contract block to make occupied MP2/DM + dmoo_b[i0:i1, j0:j1] += lib.einsum('Kaib,Kajb->ij', t2bb_i, t2bb_j.conj()) + t2bb_j=None + + # contract block to make virtual MP2/DM + dmvv_b += lib.einsum('Kaic,Kbic->ba', t2bb_i.conj(), t2bb_i) + t2bb_i = None + eKv_b = None + + #-- construct t2ba and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOccb, occblksize_b)): + # fragment-occ DF energies + eKv_b = eIv_b[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, nocca, occblksize_a)): + # full-occ DF energies + eiv_a = eov_a[i0:i1] + + # form t2_ovov-block + denom_ba = lib.direct_sum('Ka+ib->Kaib', eKv_b, eiv_a) + t2ba_i = eris.get_IVov(u_b, K0, K1, i0, i1) / denom_ba + denom_ba = None + eiv_a=None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, nocca, occblksize_a)): + if ibatch==jbatch: + t2ba_j=t2ba_i + else: + # full-occ DF energies + ejv_a = eov_a[j0:j1] + + # form t2_ovov-block + denom_ba = lib.direct_sum('Ka+jb->Kajb', eKv_b, ejv_a) + t2ba_j = eris.get_IVov(u_b, K0, K1, j0, j1) / denom_ba + denom_ba = None + ejv_a=None + + # contract block to make occupied MP2/DM + dmoo_a[i0:i1, j0:j1] += 2*lib.einsum('Kaib,Kajb->ij', t2ba_i, t2ba_j.conj()) + t2ba_j=None + + # contract block to make virtual MP2/DM + dmvv_a += lib.einsum('Kcia,Kcib->ba', t2ba_i.conj(), t2ba_i) + dmvv_b += lib.einsum('Kaic,Kbic->ba', t2ba_i.conj(), t2ba_i) + t2ba_i = None + eKv_b=None + + #-- construct t2ab and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOcca, occblksize_a)): + # fragment-occ DF energies + eKv_a = eIv_a[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, noccb, occblksize_b)): + # full-occ DF energies + eiv_b = eov_b[i0:i1] + + # form t2_ovov-block + denom_ab = lib.direct_sum('Ka+ib->Kaib', eKv_a, eiv_b) + t2ab_i = eris.get_ivOV(u_a, K0, K1, i0, i1) / denom_ab + denom_ba = None + eiv_b=None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, noccb, occblksize_b)): + if ibatch==jbatch: + t2ab_j=t2ab_i + else: + # full-occ DF energies + ejv_b = eov_b[j0:j1] + + # form t2_ovov-block + denom_ab = lib.direct_sum('Ka+jb->Kajb', eKv_a, ejv_b) + t2ab_j = eris.get_ivOV(u_a, K0, K1, j0, j1) / denom_ab + denom_ba = None + ejv_b=None + + # contract block to make occupied MP2/DM + dmoo_b[i0:i1, j0:j1] += 2*lib.einsum('Kaib,Kajb->ij', t2ab_i, t2ab_j.conj()) + t2ab_j = None + + dmvv_a += lib.einsum('Kaic,Kbic->ba', t2ab_i.conj(), t2ab_i) + dmvv_b += lib.einsum('Kcia,Kcib->ba', t2ab_i.conj(), t2ab_i) + t2ab_i = None + eKv_a=None + + return [dmoo_a, dmoo_b], [dmvv_a, dmvv_b] + +def make_lo_rdm1_1h_df(eris, moeocc, moevir, uocc): + r''' + Create unrestricted MP2 density matrix with one localized hole + Density-fitted version + + Math: + dmoo_a(i,j) = sum_{Kab} [ T(Kaib) * T(Kajb)] + + 2* sum_{K'ab'} [ t(K'aib') * t(K'ajb') ] + + dmoo_b(i,j) - reverse alpha/beta spin indices from dmoo_a + + dmvv_a(a,b) = sum_{Kic} [ T(Kaic) * T(Kbjc) ] + + sum_{K'ic'} [ t(K'c'ib) * t(K'c'ia) ] + + sum_{Ki'c'} [ t(Kai'c) * t(Kbi'c') ] + + + dmvv_b(a,b) - reverse alpha/beta spin indices from dmvv_a + + Notation: + i,j - canonical occupied orbitals + a,b,c - canonical virtual orbitals + K - local occupied orbitals + indices with ' are beta spin, others are alpha. + + are canonical alpha; I,J,K are local alpha; + A,B,C are canonical beta; + + t(ijab) = (ia|jb) / (e_i+e_j-e_a-e_b) + T(ijab) = t(ijab) - t(ijba) + + # + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + moeocc : [moeocc,_a moeocc_b] + Occ MO energies + moeocc : [moevir,_a moevir_b] + Vir MO energies + uocc : [u_a, u_b] + Overlap between the canonical and localized occupied orbitals. + u_a(i,I) = (alpha), u_b(i',I') = (beta) + ''' + log = logger.new_logger(eris) + + # Unpack spins + moeocc_a, moeocc_b = moeocc + moevir_a, moevir_b = moevir + u_a, u_b = uocc + + nocca, nvira, naux = eris.nocc[0], eris.nvir[0], eris.naux + noccb, nvirb = eris.nocc[1], eris.nvir[1] + nOcca = u_a.shape[1] + nOccb = u_b.shape[1] + dtype = eris.dtype + dsize = eris.dsize + + # Determine block sizes + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6 / dsize + + # Alpha block size (based on alpha canonical occ) + occblksize_a, mem_peak_a = _mp2_rdm1_occblksize(nocca, nvira, naux, 3, 3, M/2, dsize) + if DEBUG_BLKSIZE: occblksize_a = max(1, nocca // 2) + + # Beta block size (based on beta canonical occ) + occblksize_b, mem_peak_b = _mp2_rdm1_occblksize(noccb, nvirb, naux, 3, 3, M/2, dsize) + if DEBUG_BLKSIZE: occblksize_b = max(1, noccb // 2) + + # + log.debug1('make_lo_rdm1_1h (alpha): nocc=%d nvir=%d nOcc=%d naux=%d blksize=%d peak_mem=%.2f MB', + nocca, nvira, nOcca, naux, occblksize_a, mem_peak_a) + + log.debug1('make_lo_rdm1_1h (beta): nocc=%d nvir=%d nOcc=%d naux=%d blksize=%d peak_mem=%.2f MB', + noccb, nvirb, nOccb, naux, occblksize_b, mem_peak_b) + + # Localized MO energies + moeI_a, u_a = lno.subspace_eigh(np.diag(moeocc_a), u_a) + moeI_b, u_b = lno.subspace_eigh(np.diag(moeocc_b), u_b) + + # Energy denominators + eov_a = moeocc_a[:, None] - moevir_a + eIv_a = moeI_a[:, None] - moevir_a + eov_b = moeocc_b[:, None] - moevir_b + eIv_b = moeI_b[:, None] - moevir_b + + # Initialize RDMs + dmoo_a = np.zeros((nocca, nocca), dtype=dtype) + dmoo_b = np.zeros((noccb, noccb), dtype=dtype) + dmvv_a = np.zeros((nvira, nvira), dtype=dtype) + dmvv_b = np.zeros((nvirb, nvirb), dtype=dtype) + + #-- construct t2aa and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOcca, occblksize_a)): + + # fragment-occ DF integrals/energies + KvL_a = eris.xform_occ(u_a[:, K0:K1], spin='a') + eKv_a = eIv_a[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, nocca, occblksize_a)): + + # full-occ DF integrals/energies + ivL_a = eris.get_occ_blk(i0, i1, spin='a') + eiv_a = eov_a[i0:i1] + + # form t2-block + denom_aa = lib.direct_sum('Ka+ib->Kaib', eKv_a, eiv_a) + t2aa_i = lib.einsum('Kax,ibx->Kaib', KvL_a, ivL_a) / denom_aa + t2aa_i = t2aa_i - t2aa_i.transpose(0,3,2,1) + denom_aa = None + ivL_a = None + eiv_a = None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, nocca, occblksize_a)): + if jbatch == ibatch: + t2aa_j = t2aa_i + else: + # full-occ DF integrals/energies + jvL_a = eris.get_occ_blk(j0, j1, spin='a') + ejv_a = eov_a[j0:j1] + + # form t2-block + denom_aa = lib.direct_sum('Ka+jb->Kajb', eKv_a, ejv_a) + t2aa_j = lib.einsum('Kax,jbx->Kajb', KvL_a, jvL_a) / denom_aa + t2aa_j = t2aa_j - t2aa_j.transpose(0,3,2,1) + denom_aa = None + jvL_a = None + ejv_a = None + + # contract block to make occupied MP2-DM + dmoo_a[i0:i1, j0:j1] += lib.einsum('Kaib,Kajb->ij', t2aa_i, t2aa_j.conj()) + t2aa_j = None + + # contract block to make virtual MP2-DM + dmvv_a += lib.einsum('Kaic,Kbic->ba', np.conj(t2aa_i), t2aa_i) + t2aa_i=None + + KvL_a = None + eKv_a = None + + #-- construct t2bb and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOccb, occblksize_b)): + # fragment-occ DF integrals/energies + KvL_b = eris.xform_occ(u_b[:, K0:K1], spin='b') + eKv_b = eIv_b[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, noccb, occblksize_b)): + # full-occ DF integrals/energies + ivL_b = eris.get_occ_blk(i0, i1, spin='b') + eiv_b = eov_b[i0:i1] + + # form t2-block + denom_bb = lib.direct_sum('Ka+ib->Kaib', eKv_b, eiv_b) + t2bb_i = lib.einsum('Kax,ibx->Kaib', KvL_b, ivL_b) / denom_bb + t2bb_i = t2bb_i - t2bb_i.transpose(0,3,2,1) + denom_bb = None + ivL_b = None + eiv_b = None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, noccb, occblksize_b)): + if jbatch == ibatch: + t2bb_j = t2bb_i + else: + # full-occ DF integrals/energies + jvL_b = eris.get_occ_blk(j0, j1, spin='b') + ejv_b = eov_b[j0:j1] + + # form t2-block + denom_bb = lib.direct_sum('Ka+jb->Kajb', eKv_b, ejv_b) + t2bb_j = lib.einsum('Kax,jbx->Kajb', KvL_b, jvL_b) / denom_bb + t2bb_j = t2bb_j - t2bb_j.transpose(0,3,2,1) + denom_bb = None + jvL_b = None + ejv_b = None + + # contract block to make occupied MP2-DM + dmoo_b[i0:i1, j0:j1] += lib.einsum('Kaib,Kajb->ij', t2bb_i, t2bb_j.conj()) + t2bb_j=None + + # contract block to make virtual MP2-DM + dmvv_b += lib.einsum('Kaic,Kbic->ba', t2bb_i.conj(), t2bb_i) + t2bb_i = None + + KvL_b = None + eKv_b = None + + #-- construct t2ba and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOccb, occblksize_b)): + # fragment-occ DF integrals/energies + KvL_b = eris.xform_occ(u_b[:, K0:K1], spin='b') + eKv_b = eIv_b[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, nocca, occblksize_a)): + # full-occ DF integrals/energies + ivL_a = eris.get_occ_blk(i0, i1, spin='a') + eiv_a = eov_a[i0:i1] + + # form t2-block + denom_ba = lib.direct_sum('Ka+ib->Kaib', eKv_b, eiv_a) + t2ba_i = lib.einsum('Kax,ibx->Kaib', KvL_b, ivL_a) / denom_ba + ivL_a = None + eiv_a = None + denom_ba = None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, nocca, occblksize_a)): + if jbatch == ibatch: + t2ba_j = t2ba_i + + else: + # full-occ DF integrals/energies + jvL_a = eris.get_occ_blk(j0, j1, spin='a') + ejv_a = eov_a[j0:j1] + + # form t2-block + denom_ba = lib.direct_sum('Ka+jb->Kajb', eKv_b, ejv_a) + t2ba_j = lib.einsum('Kax,jbx->Kajb', KvL_b, jvL_a) / denom_ba + jvL_a = None + ejv_a=None + denom_ba = None + + # contract block to make occupied MP2-DM + dmoo_a[i0:i1, j0:j1]+=2*lib.einsum('Kaib,Kajb->ij', t2ba_i, t2ba_j.conj()) + t2ba_j=None + + # contract block to make virtual MP2-DM + dmvv_a += lib.einsum('Kcia,Kcib->ba', t2ba_i.conj(), t2ba_i) + dmvv_b += lib.einsum('Kaic,Kbic->ba', t2ba_i.conj(), t2ba_i) + t2ba_i = None + + KvL_b = None + eKv_b = None + + #-- construct t2ab and contract + for Kbatch, (K0, K1) in enumerate(lib.prange(0, nOcca, occblksize_a)): + # fragment-occ DF integrals/energies + KvL_a = eris.xform_occ(u_a[:, K0:K1], spin='a') + eKv_a = eIv_a[K0:K1] + + for ibatch, (i0, i1) in enumerate(lib.prange(0, noccb, occblksize_b)): + # full-occ DF integrals/energies + ivL_b = eris.get_occ_blk(i0, i1, spin='b') + eiv_b = eov_b[i0:i1] + + # form t2-block + denom_ab = lib.direct_sum('Ka+ib->Kaib', eKv_a, eiv_b) + t2ab_i = lib.einsum('Kax,ibx->Kaib', KvL_a, ivL_b) / denom_ab + ivL_b = None + eiv_b = None + denom_ba = None + + for jbatch, (j0, j1) in enumerate(lib.prange(0, noccb, occblksize_b)): + if jbatch == ibatch: + t2ab_j = t2ab_i + else: + # full-occ DF integrals/energies + jvL_b = eris.get_occ_blk(j0, j1, spin='b') + ejv_b = eov_b[j0:j1] + + # form t2-block + denom_ab = lib.direct_sum('Ka+jb->Kajb', eKv_a, ejv_b) + t2ab_j = lib.einsum('Kax,jbx->Kajb', KvL_a, jvL_b) / denom_ab + jvL_b = None + ejv_b = None + denom_ba = None + + # contract block to make occupied MP2-DM + dmoo_b[i0:i1, j0:j1]+=2*lib.einsum('Kaib,Kajb->ij', t2ab_i, t2ab_j.conj()) + t2ba_j=None + + + # contract block to make virtual MP2-DM + dmvv_a += lib.einsum('Kaic,Kbic->ba', t2ab_i.conj(), t2ab_i) + dmvv_b += lib.einsum('Kcia,Kcib->ba', t2ab_i.conj(), t2ab_i) + t2ab_i = None + + KvL_a = None + eKv_a = None + + return [dmoo_a, dmoo_b], [dmvv_a, dmvv_b] + + +#####------ Density Fitted ERIS code +class _ULNO_DF_ERIs: + # This class is now more of a holder for common propert0es, + # matching the structure of _LNODFINCOREERIS + def __init__(self, with_df, orbocc, orbvir, max_memory, verbose=None, stdout=None): + self.with_df = with_df + self.orbocc = orbocc # [orb_a, orb_b] + self.orbvir = orbvir # [orb_v, orb_V] + + self.max_memory = max_memory + self.verbose = verbose + self.stdout = stdout + + self.dtype = self.orbocc[0].dtype + self.dsize = self.orbocc[0].itemsize + + self.ovL = None # Alpha (o,v,L) + self.OVL = None # Beta (O,V,L) + + @property + def nocc(self): + return [self.orbocc[0].shape[1], self.orbocc[1].shape[1]] + + @property + def nvir(self): + return [self.orbvir[0].shape[1], self.orbvir[1].shape[1]] + + @property + def naux(self): + return self.with_df.get_naoaux() + + def get_occ_blk(self, i0, i1, spin='a'): + if spin == 'a': + return np.asarray(self.ovL[i0:i1], order='C') + else: + return np.asarray(self.OVL[i0:i1], order='C') + + def get_vir_blk(self, a0, a1, spin='a'): + if spin == 'a': + return np.asarray(self.ovL[:, a0:a1], order='C') + else: + return np.asarray(self.OVL[:, a0:a1], order='C') + + def xform_occ(self, u, spin='a'): + if spin == 'a': + ovL = self.ovL + nocc, nvir, naux = self.nocc[0], self.nvir[0], self.naux + else: + ovL = self.OVL + nocc, nvir, naux = self.nocc[1], self.nvir[1], self.naux + + nOcc = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*naux) - nOcc)))) + if DEBUG_BLKSIZE: occblksize = max(1, nocc // 2) + + ovL = np.empty((nOcc, nvir, naux), dtype=self.dtype) + for iblk, (i0, i1) in enumerate(lib.prange(0, nocc, occblksize)): + if iblk == 0: + ovL[:] = lib.einsum('iax,iI->Iax', self.get_occ_blk(i0, i1, spin=spin), u[i0:i1].conj()) + else: + ovL[:] += lib.einsum('iax,iI->Iax', self.get_occ_blk(i0, i1, spin=spin), u[i0:i1].conj()) + return ovL + + def xform_vir(self, u, spin='a'): + if spin == 'a': + ovL = self.ovL + nocc, nvir, naux = self.nocc[0], self.nvir[0], self.naux + else: + ovL = self.OVL + nocc, nvir, naux = self.nocc[1], self.nvir[1], self.naux + + nVir = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*naux) - nocc*nVir/float(nvir))))) + if DEBUG_BLKSIZE: occblksize = max(1, nocc // 2) + + ovL = np.empty((nocc, nVir, naux), dtype=self.dtype) + for i0, i1 in lib.prange(0, nocc, occblksize): + ovL[i0:i1] = lib.einsum('iax,aA->iAx', self.get_occ_blk(i0, i1, spin=spin), u) + return ovL + + +class _ULNO_DF_Incore_ERIs(_ULNO_DF_ERIs): + def __init__(self, with_df, orbocc, orbvir, max_memory, verbose=None, stdout=None): + super().__init__(with_df, orbocc, orbvir, max_memory, verbose, stdout) + + def build(self): + log = logger.new_logger(self) + self.ovL, self.OVL = _init_ump_df_eris(self.with_df, + self.orbocc[0], self.orbvir[0], + self.orbocc[1], self.orbvir[1], + self.max_memory, ovL_a=self.ovL, ovL_b=self.OVL, log=log) + + +class _ULNO_DF_Outcore_ERIs(_ULNO_DF_ERIs): + def __init__(self, with_df, orbocc, orbvir, max_memory, ovL=None, ovL_to_save=None, + verbose=None, stdout=None): + super().__init__(with_df, orbocc, orbvir, max_memory, verbose, stdout) + self._ovL = ovL # Can be a path or list of paths + self._ovL_to_save = ovL_to_save # Can be a path + + def build(self): + log = logger.new_logger(self) + nocca, nvira, naux = self.nocc[0], self.nvir[0], self.naux + noccb, nvirb = self.nocc[1], self.nvir[1] + ovL_shape_a = (nocca, nvira, naux) + ovL_shape_b = (noccb, nvirb, naux) + + ovL_a_dataset_name = 'ovL_a' + ovL_b_dataset_name = 'ovL_b' + + if self._ovL is None: + if isinstance(self._ovL_to_save, str): + self.feri = h5py.File(self._ovL_to_save, 'w') + log.info('ovL (alpha/beta) is saved to %s', self.feri.filename) + else: + self.feri = lib.H5TmpFile() + log.info('ovL (alpha/beta) is saved to tmpfile %s', self.feri.filename) + + self.ovL = self.feri.create_dataset(ovL_a_dataset_name, ovL_shape_a, dtype=self.dtype, + chunks=(1, *ovL_shape_a[1:])) + self.OVL = self.feri.create_dataset(ovL_b_dataset_name, ovL_shape_b, dtype=self.dtype, + chunks=(1, *ovL_shape_b[1:])) + + _init_ump_df_eris(self.with_df, self.orbocc[0], self.orbvir[0], + self.orbocc[1], self.orbvir[1], self.max_memory, + ovL_a=self.ovL, ovL_b=self.OVL, log=log) + + elif isinstance(self._ovL, str): + self.feri = h5py.File(self._ovL, 'r') + log.info('ovL (alpha/beta) is read from %s', self.feri.filename) + assert(ovL_a_dataset_name in self.feri) + assert(ovL_b_dataset_name in self.feri) + assert(self.feri[ovL_a_dataset_name].shape == ovL_shape_a) + assert(self.feri[ovL_b_dataset_name].shape == ovL_shape_b) + self.ovL = self.feri[ovL_a_dataset_name] + self.OVL = self.feri[ovL_b_dataset_name] + else: + # Handle case where self._ovL is [ovL_a_obj, ovL_b_obj] (not paths) + # This path is less common but supported in restricted code. + # For simplicity, we assume string path or None. + raise RuntimeError("Invalid _ovL input. Expecting None or HDF5 file path.") + +# Helper function for DF ERI generation +def _init_ump_df_eris(with_df, occ_coeff_a, vir_coeff_a, occ_coeff_b, vir_coeff_b, + max_memory, ovL_a=None, ovL_b=None, log=None): + from pyscf.ao2mo import _ao2mo + + if log is None: log = logger.Logger(sys.stdout, 3) + + # array shapes + nao, nocca = occ_coeff_a.shape + nvira = vir_coeff_a.shape[1] + nmoa = nocca + nvira + + nao, noccb = occ_coeff_b.shape + nvirb = vir_coeff_b.shape[1] + nmob = noccb + nvirb + + nao_pair = nao**2 + naux = with_df.get_naoaux() + + dtype = occ_coeff_a.dtype + dsize = occ_coeff_a.itemsize + + mo_a = np.asarray(np.hstack((occ_coeff_a, vir_coeff_a)), order='F') + mo_b = np.asarray(np.hstack((occ_coeff_b, vir_coeff_b)), order='F') + ijslice_a = (0, nocca, nocca, nmoa) + ijslice_b = (0, noccb, noccb, nmob) + + if ovL_a is None: + ovL_a = np.empty((nocca, nvira, naux), dtype=dtype) + + if ovL_b is None: + ovL_b = np.empty((noccb, nvirb, naux), dtype=dtype) + + mem_avail = max_memory - lib.current_memory()[0] + + # --- Define DF loop and ao2mo functions --- + if dtype == np.float64: + def loop_df(blksize): + for Lpq in with_df.loop(blksize=blksize): + yield Lpq + Lpq = None + + def ao2mo_df(Lpq, mo, ijslice, out): + return _ao2mo.nr_e2(Lpq, mo, ijslice, aosym='s2', out=out) + + else: + def loop_df(blksize): + kpti_kptj = [with_df.kpts[0]]*2 + for LpqR, LpqI, sign in with_df.sr_loop(blksize=blksize, + kpti_kptj=kpti_kptj): + Lpq = LpqR + LpqI*1j + LpqR = LpqI = None + if Lpq.shape[1] != nao_pair: + Lpq = lib.unpack_tril(Lpq).astype(dtype) + yield Lpq + Lpq = None + + def ao2mo_df(Lpq, mo, ijslice, out): + return _ao2mo.r_e2(Lpq, mo, ijslice, [], None, aosym='s1', out=out) + + # --- In-core ERI generation --- + if isinstance(ovL_a, np.ndarray) and isinstance(ovL_b, np.ndarray): + mem_auxblk = (nao_pair + nocca*nvira + noccb*nvirb) * dsize / 1e6 + aux_blksize = min(naux, max(1, int(np.floor(mem_avail * 0.5 / mem_auxblk)))) + if DEBUG_BLKSIZE: aux_blksize = max(1, naux // 2) + log.debug('aux blksize for incore ao2mo (unrestricted): %d/%d', aux_blksize, naux) + + buf_a = np.empty(aux_blksize * nocca * nvira, dtype=dtype) + buf_b = np.empty(aux_blksize * noccb * nvirb, dtype=dtype) + + p1 = 0 + for Lpq in loop_df(aux_blksize): + p0, p1 = p1, p1 + Lpq.shape[0] + out_a = ao2mo_df(Lpq, mo_a, ijslice_a, buf_a) + out_b = ao2mo_df(Lpq, mo_b, ijslice_b, buf_b) + ovL_a[:, :, p0:p1] = out_a.reshape(-1, nocca, nvira).transpose(1, 2, 0) + ovL_b[:, :, p0:p1] = out_b.reshape(-1, noccb, nvirb).transpose(1, 2, 0) + Lpq = out_a = out_b = None + buf_a = buf_b = None + + # --- Out-of-core (HDF5) ERI generation --- + else: + + # batching occ [O]XV and aux ([O]V + Nao_pair)*[X] + # We process alpha and beta spins sequentially to save memory + + # Process Alpha + mem_occblk_a = naux * nvira * dsize / 1e6 + occ_blksize_a = min(nocca, max(1, int(np.floor(mem_avail * 0.6 / mem_occblk_a)))) + if DEBUG_BLKSIZE: occ_blksize_a = max(1, nocca // 2) + mem_auxblk_a = (occ_blksize_a * nvira + nao_pair) * dsize / 1e6 + aux_blksize_a = min(naux, max(1, int(np.floor(mem_avail * 0.3 / mem_auxblk_a)))) + if DEBUG_BLKSIZE: aux_blksize_a = max(1, naux // 2) + log.debug('occ blksize (alpha) for outcore ao2mo: %d/%d', occ_blksize_a, nocca) + log.debug('aux blksize (alpha) for outcore ao2mo: %d/%d', aux_blksize_a, naux) + + buf_a = np.empty(naux * occ_blksize_a * nvira, dtype=dtype) + buf2_a = np.empty(aux_blksize_a * occ_blksize_a * nvira, dtype=dtype) + + for i0, i1 in lib.prange(0, nocca, occ_blksize_a): + nocci = i1 - i0 + ijslice = (i0, i1, nocca, nmoa) + p1 = 0 + ovL_block = np.ndarray((nocci, nvira, naux), dtype=dtype, buffer=buf_a) + for Lpq in loop_df(aux_blksize_a): + p0, p1 = p1, p1 + Lpq.shape[0] + out = ao2mo_df(Lpq, mo_a, ijslice, buf2_a) + ovL_block[:, :, p0:p1] = out.reshape(-1, nocci, nvira).transpose(1, 2, 0) + Lpq = out = None + ovL_a[i0:i1] = ovL_block + ovL_block = None + buf_a = buf2_a = None + + # Process Beta + mem_occblk_b = naux * nvirb * dsize / 1e6 + occ_blksize_b = min(noccb, max(1, int(np.floor(mem_avail * 0.6 / mem_occblk_b)))) + if DEBUG_BLKSIZE: occ_blksize_b = max(1, noccb // 2) + mem_auxblk_b = (occ_blksize_b * nvirb + nao_pair) * dsize / 1e6 + aux_blksize_b = min(naux, max(1, int(np.floor(mem_avail * 0.3 / mem_auxblk_b)))) + if DEBUG_BLKSIZE: aux_blksize_b = max(1, naux // 2) + log.debug('occ blksize (beta) for outcore ao2mo: %d/%d', occ_blksize_b, noccb) + log.debug('aux blksize (beta) for outcore ao2mo: %d/%d', aux_blksize_b, naux) + + buf_b = np.empty(naux * occ_blksize_b * nvirb, dtype=dtype) + buf2_b = np.empty(aux_blksize_b * occ_blksize_b * nvirb, dtype=dtype) + + for i0, i1 in lib.prange(0, noccb, occ_blksize_b): + nocci = i1 - i0 + ijslice = (i0, i1, noccb, nmob) + p1 = 0 + OVL_block = np.ndarray((nocci, nvirb, naux), dtype=dtype, buffer=buf_b) + for Lpq in loop_df(aux_blksize_b): + p0, p1 = p1, p1 + Lpq.shape[0] + out = ao2mo_df(Lpq, mo_b, ijslice, buf2_b) + OVL_block[:, :, p0:p1] = out.reshape(-1, nocci, nvirb).transpose(1, 2, 0) + Lpq = out = None + ovL_b[i0:i1] = OVL_block + OVL_block = None + buf_b = buf2_b = None + + return ovL_a, ovL_b + + +#####------ Non-Density Fitted ERIS code +class _ULNO_ERIs: + def __init__(self, mlno, orbocc, orbvir, max_memory, verbose=None, stdout=None): + self.orbo = orbocc[0] + self.orbO = orbocc[1] + self.orbv = orbvir[0] + self.orbV = orbvir[1] + + self.max_memory = max_memory + self.verbose = verbose + self.stdout = stdout + + self.dtype = self.orbo.dtype + self.dsize = self.orbo.itemsize + + if mlno._scf._eri is None: + self._eri = mlno.mol.intor('int2e', aosym='s8') + else: + self._eri = mlno._scf._eri + + @property + def nocc(self): + return [self.orbo.shape[1], self.orbO.shape[1]] + + @property + def nvir(self): + return [self.orbv.shape[1], self.orbV.shape[1]] + +class _ULNO_Incore_ERIs(_ULNO_ERIs): + def _common_init_(self, mlno, mo_coeff=None): + super()._common_init_(mlno, mo_coeff) + if mlno._scf._eri is None: + self._eri = mlno.mol.intor('int2e', aosym='s8') + else: + self._eri = mlno._scf._eri + + def get_ivov(self, u, i0, i1, j0, j1): + orbi = np.dot(self.orbo, u) + g = ao2mo.general(self._eri, [orbi[:,i0:i1], self.orbv, self.orbo[:,j0:j1], self.orbv], compact=False) + return g.reshape(orbi[:,i0:i1].shape[1], self.orbv.shape[1], self.orbo[:,j0:j1].shape[1], self.orbv.shape[1]) + + def get_ivOV(self, u, i0, i1, j0, j1): + orbi = np.dot(self.orbo, u) + g = ao2mo.general(self._eri, [orbi[:,i0:i1], self.orbv, self.orbO[:,j0:j1], self.orbV], compact=False) + return g.reshape(orbi[:,i0:i1].shape[1], self.orbv.shape[1], self.orbO[:,j0:j1].shape[1], self.orbV.shape[1]) + + def get_IVov(self, u, i0, i1, j0, j1): + orbI = np.dot(self.orbO, u) + g = ao2mo.general(self._eri, [orbI[:,i0:i1], self.orbV, self.orbo[:,j0:j1], self.orbv], compact=False) + return g.reshape(orbI[:,i0:i1].shape[1], self.orbV.shape[1], self.orbo[:,j0:j1].shape[1], self.orbv.shape[1]) + + def get_IVOV(self, u, i0, i1, j0, j1): + orbI = np.dot(self.orbO, u) + g = ao2mo.general(self._eri, [orbI[:,i0:i1], self.orbV, self.orbO[:,j0:j1], self.orbV], compact=False) + return g.reshape(orbI[:,i0:i1].shape[1], self.orbV.shape[1], self.orbO[:,j0:j1].shape[1], self.orbV.shape[1]) + +# unrestricted LNO class +class ULNO(lno.LNO): + def ao2mo(self, mo_coeff=None): + log = logger.new_logger(self) + cput0 = (logger.process_clock(), logger.perf_counter()) + + if mo_coeff is None: + mo_coeff = self.mo_coeff + + mos = self.split_mo_coeff(mo_coeff) + orbocc = [mos[0][1], mos[1][1]] + orbvir = [mos[0][2], mos[1][2]] + + nmoa, nmob = self.get_nmo() + mem_now = self.max_memory - lib.current_memory()[0] + + if getattr(self, 'with_df', None): + naux = self.with_df.get_naoaux() + nocca, nvira = orbocc[0].shape[1], orbvir[0].shape[1] + noccb, nvirb = orbocc[1].shape[1], orbvir[1].shape[1] + + dsize = orbocc[0].itemsize + mem_df = (nocca * nvira + noccb * nvirb) * naux * dsize / 1024**2. + log.debug('ao2mo est mem= %.2f MB avail mem= %.2f MB', mem_df, mem_now) + + if ( (self._ovL_to_save is not None) or (self._ovL is not None) or + self.force_outcore_ao2mo or (mem_df > mem_now * 0.5) ): + eris = _ULNO_DF_Outcore_ERIs(self.with_df, orbocc, orbvir, self.max_memory, + ovL=self._ovL, ovL_to_save=self._ovL_to_save, + verbose=self.verbose, stdout=self.stdout) + else: + eris = _ULNO_DF_Incore_ERIs(self.with_df, orbocc, orbvir, self.max_memory, + verbose=self.verbose, stdout=self.stdout) + eris.build() + + else: + mem_incore = nmoa**4 * 8 / 1e6 * 4. # Rough estimate + log.debug('ao2mo (non-DF) est mem= %.2f MB avail mem= %.2f MB', mem_incore, mem_now) + if ((self._scf._eri is not None or + mem_incore < mem_now or + self.mol.incore_anyway) and + not self.force_outcore_ao2mo): + eris = _ULNO_Incore_ERIs(self, orbocc, orbvir, self.max_memory, + verbose=self.verbose, stdout=self.stdout) + else: + raise NotImplementedError("Unrestricted non-DF out-of-core ERIs not implemented.") + + log.timer('Integral xform ', *cput0) + return eris + + get_nocc = mp.ump2.get_nocc + get_nmo = mp.ump2.get_nmo + + # bug-fix core orbital fixing + get_frozen_mask = mp.ump2.get_frozen_mask + # + + split_mo_coeff = mp.dfump2.DFUMP2.split_mo_coeff + split_mo_energy = mp.dfump2.DFUMP2.split_mo_energy + split_mo_occ = mp.dfump2.DFUMP2.split_mo_occ + make_las = make_las \ No newline at end of file diff --git a/pyscf/lno/ulnoccsd.py b/pyscf/lno/ulnoccsd.py new file mode 100644 index 000000000..c09b9523e --- /dev/null +++ b/pyscf/lno/ulnoccsd.py @@ -0,0 +1,530 @@ +# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Chenghan Li +# Xing Zhang +# + +import numpy as np +from functools import reduce + +from pyscf.lib import logger +from pyscf import lib +from pyscf.mp.ump2 import get_frozen_mask + +from pyscf.lno.ulno import ULNO +from pyscf.lno import lnoccsd +from pyscf.lno.lnoccsd import LNOCCSD, LNOCCSD_T + +einsum = lib.einsum + +from pyscf.cc import uccsd +def UCCSD(mf, frozen=None, mo_coeff=None, mo_occ=None): + import numpy + from pyscf import lib + from pyscf.soscf import newton_ah + from pyscf import scf + + #log = logger.new_logger(mf) + + if not mf.istype('UHF'): + mf = scf.addons.convert_to_uhf(mf) + + if getattr(mf, 'with_df', None): + mf.with_df.get_naoaux() + + if mo_occ is None: + mo_occ = mf.mo_occ + return MODIFIED_UCCSD(mf, frozen, mo_coeff, mo_occ) + + +class MODIFIED_UCCSD(uccsd.UCCSD): + def ao2mo(self, mo_coeff=None): + #nmo = self.nmo[0] + #nao = self.mo_coeff[0].shape[0] + #nmo_pair = nmo * (nmo+1) // 2 + #nao_pair = nao * (nao+1) // 2 + #mem_incore = 2 * (max(nao_pair**2, nmo**4) + nmo_pair**2) * 8/1e6 + #mem_now = lib.current_memory()[0] + if self._scf._eri is not None: #and + #(mem_incore+mem_now < self.max_memory or self.incore_complete)): + return _make_eris_incore(self, mo_coeff) + + elif getattr(self._scf, 'with_df', None): + logger.warn(self, 'CCSD detected DF being used in the HF object. ' + 'MO integrals are computed based on the DF 3-index tensors.\n' + 'It\'s recommended to use dfccsd.CCSD for the ' + 'DF-CCSD calculations') + return _make_df_eris_outcore(self, mo_coeff) + + else: + raise NotImplementedError # should never happen + +class _ChemistsERIs(uccsd._ChemistsERIs): + def _common_init_(self, mycc, mo_coeff=None): + mymf = mycc._scf + + if mo_coeff is None: + mo_coeff = mycc.mo_coeff + + moidxa, moidxb = get_frozen_mask(mycc) + self.mo_coeff = mo_coeff = mo_coeff[0][:,moidxa], mo_coeff[1][:,moidxb] + + # Note: Recomputed fock matrix and HF energy since SCF may not be fully converged. + # This block is modified to take precomputed 1e integrals + s1e = getattr(mycc, '_s1e', None) + h1e = getattr(mycc, '_h1e', None) + vhf = getattr(mycc, '_vhf', None) + dm = mymf.make_rdm1(mycc.mo_coeff, mycc.mo_occ) + if vhf is None: vhf = self.get_vhf(mymf, dm, h1e=h1e, s1e=s1e) + fockao = mymf.get_fock(vhf=vhf, dm=dm, h1e=h1e, s1e=s1e) + self.focka = reduce(np.dot, (mo_coeff[0].conj().T, fockao[0], mo_coeff[0])) + self.fockb = reduce(np.dot, (mo_coeff[1].conj().T, fockao[1], mo_coeff[1])) + self.fock = (self.focka, self.fockb) + self.e_hf = mymf.energy_tot(dm=dm, vhf=vhf, h1e=h1e) + + nocca, noccb = self.nocc = mycc.nocc + self.mol = mycc.mol + + # Note self.mo_energy can be different to fock.diagonal(). + # self.mo_energy is used in the initial guess function (to generate + # MP2 amplitudes) and CCSD update_amps preconditioner. + # fock.diagonal() should only be used to compute the expectation value + # of Slater determinants. + mo_ea = self.focka.diagonal().real + mo_eb = self.fockb.diagonal().real + self.mo_energy = (mo_ea, mo_eb) + gap_a = abs(mo_ea[:nocca,None] - mo_ea[None,nocca:]) + gap_b = abs(mo_eb[:noccb,None] - mo_eb[None,noccb:]) + if gap_a.size > 0: + gap_a = gap_a.min() + else: + gap_a = 1e9 + if gap_b.size > 0: + gap_b = gap_b.min() + else: + gap_b = 1e9 + if gap_a < 1e-5 or gap_b < 1e-5: + logger.warn(mycc, 'HOMO-LUMO gap (%s,%s) too small for UCCSD', + gap_a, gap_b) + return self + + def get_vhf(self, mymf, dm, h1e=None, s1e=None): + ''' Build vhf from input dm. + + NOTE: + If the input dm is the same as the SCF dm, vhf is built directly from the SCF + MO and MO energy; otherwise, scf.get_vhf is called. + ''' + dm0 = mymf.make_rdm1() + errdm = abs(dm0-dm).max() + if errdm < 1e-6: + if h1e is None: h1e = mymf.get_hcore() + if s1e is None: s1e = mymf.get_ovlp() + moa = np.dot(s1e, mymf.mo_coeff[0]) + mob = np.dot(s1e, mymf.mo_coeff[1]) + moea, moeb = mymf.mo_energy + vhf = np.asarray([np.dot(moa*moea, moa.T)-h1e, np.dot(mob*moeb, mob.T)-h1e]) + else: + vhf = mymf.get_veff(mymf.mol, dm) + return vhf + +def _make_eris_incore(mycc, mo_coeff=None): + from pyscf import ao2mo + cput0 = (logger.process_clock(), logger.perf_counter()) + log = logger.Logger(mycc.stdout, mycc.verbose) + eris = _ChemistsERIs() + eris._common_init_(mycc, mo_coeff) + + moa, mob = eris.mo_coeff + nocca, noccb = eris.nocc + #nao = moa.shape[0] + nmoa = moa.shape[1] + nmob = mob.shape[1] + + eri_aa = ao2mo.restore(1, ao2mo.full(mycc._scf._eri, moa), nmoa) + eri_bb = ao2mo.restore(1, ao2mo.full(mycc._scf._eri, mob), nmob) + eri_ab = ao2mo.general(mycc._scf._eri, (moa,moa,mob,mob), compact=False) + eri_ba = eri_ab.reshape(nmoa,nmoa,nmob,nmob).transpose(2,3,0,1) + + eri_aa = eri_aa.reshape(nmoa,nmoa,nmoa,nmoa) + eri_ab = eri_ab.reshape(nmoa,nmoa,nmob,nmob) + eri_ba = eri_ba.reshape(nmob,nmob,nmoa,nmoa) + eri_bb = eri_bb.reshape(nmob,nmob,nmob,nmob) + eris.oooo = eri_aa[:nocca,:nocca,:nocca,:nocca].copy() + eris.ovoo = eri_aa[:nocca,nocca:,:nocca,:nocca].copy() + eris.ovov = eri_aa[:nocca,nocca:,:nocca,nocca:].copy() + eris.oovv = eri_aa[:nocca,:nocca,nocca:,nocca:].copy() + eris.ovvo = eri_aa[:nocca,nocca:,nocca:,:nocca].copy() + eris.ovvv = eri_aa[:nocca,nocca:,nocca:,nocca:].copy() + eris.vvvv = eri_aa[nocca:,nocca:,nocca:,nocca:].copy() + + eris.OOOO = eri_bb[:noccb,:noccb,:noccb,:noccb].copy() + eris.OVOO = eri_bb[:noccb,noccb:,:noccb,:noccb].copy() + eris.OVOV = eri_bb[:noccb,noccb:,:noccb,noccb:].copy() + eris.OOVV = eri_bb[:noccb,:noccb,noccb:,noccb:].copy() + eris.OVVO = eri_bb[:noccb,noccb:,noccb:,:noccb].copy() + eris.OVVV = eri_bb[:noccb,noccb:,noccb:,noccb:].copy() + eris.VVVV = eri_bb[noccb:,noccb:,noccb:,noccb:].copy() + + eris.ooOO = eri_ab[:nocca,:nocca,:noccb,:noccb].copy() + eris.ovOO = eri_ab[:nocca,nocca:,:noccb,:noccb].copy() + eris.ovOV = eri_ab[:nocca,nocca:,:noccb,noccb:].copy() + eris.ooVV = eri_ab[:nocca,:nocca,noccb:,noccb:].copy() + eris.ovVO = eri_ab[:nocca,nocca:,noccb:,:noccb].copy() + eris.ovVV = eri_ab[:nocca,nocca:,noccb:,noccb:].copy() + eris.vvVV = eri_ab[nocca:,nocca:,noccb:,noccb:].copy() + + #eris.OOoo = eri_ba[:noccb,:noccb,:nocca,:nocca].copy() + eris.OVoo = eri_ba[:noccb,noccb:,:nocca,:nocca].copy() + #eris.OVov = eri_ba[:noccb,noccb:,:nocca,nocca:].copy() + eris.OOvv = eri_ba[:noccb,:noccb,nocca:,nocca:].copy() + eris.OVvo = eri_ba[:noccb,noccb:,nocca:,:nocca].copy() + eris.OVvv = eri_ba[:noccb,noccb:,nocca:,nocca:].copy() + #eris.VVvv = eri_ba[noccb:,noccb:,nocca:,nocca:].copy() + + log.timer('CCSD integral transformation', *cput0) + return eris + +def _make_df_eris_outcore(mycc, mo_coeff=None): + cput0 = (logger.process_clock(), logger.perf_counter()) + log = logger.Logger(mycc.stdout, mycc.verbose) + eris = _ChemistsERIs() + eris._common_init_(mycc, mo_coeff) + + moa, mob = eris.mo_coeff + nocca, noccb = eris.nocc + nao = moa.shape[0] + nmoa = moa.shape[1] + nmob = mob.shape[1] + nvira = nmoa - nocca + nvirb = nmob - noccb + nvira_pair = nvira*(nvira+1)//2 + nvirb_pair = nvirb*(nvirb+1)//2 + naux = mycc._scf.with_df.get_naoaux() + + # --- Three-center integrals + # (L|aa) + Loo = np.empty((naux,nocca,nocca)) + Lov = np.empty((naux,nocca,nvira)) + Lvo = np.empty((naux,nvira,nocca)) + Lvv = np.empty((naux,nvira_pair)) + # (L|bb) + LOO = np.empty((naux,noccb,noccb)) + LOV = np.empty((naux,noccb,nvirb)) + LVO = np.empty((naux,nvirb,noccb)) + LVV = np.empty((naux,nvirb_pair)) + p1 = 0 + oa, va = np.s_[:nocca], np.s_[nocca:] + ob, vb = np.s_[:noccb], np.s_[noccb:] + # Transform three-center integrals to MO basis + for eri1 in mycc._scf.with_df.loop(): + eri1 = lib.unpack_tril(eri1).reshape(-1,nao,nao) + # (L|aa) + Lpq = einsum('Lab,ap,bq->Lpq', eri1, moa, moa) + p0, p1 = p1, p1 + Lpq.shape[0] + blk = np.s_[p0:p1] + Loo[blk] = Lpq[:,oa,oa] + Lov[blk] = Lpq[:,oa,va] + Lvo[blk] = Lpq[:,va,oa] + Lvv[blk] = lib.pack_tril(Lpq[:,va,va].reshape(-1,nvira,nvira)) + # (L|bb) + Lpq = einsum('Lab,ap,bq->Lpq', eri1, mob, mob) + LOO[blk] = Lpq[:,ob,ob] + LOV[blk] = Lpq[:,ob,vb] + LVO[blk] = Lpq[:,vb,ob] + LVV[blk] = lib.pack_tril(Lpq[:,vb,vb].reshape(-1,nvirb,nvirb)) + Loo = Loo.reshape(naux,nocca*nocca) + Lov = Lov.reshape(naux,nocca*nvira) + Lvo = Lvo.reshape(naux,nocca*nvira) + LOO = LOO.reshape(naux,noccb*noccb) + LOV = LOV.reshape(naux,noccb*nvirb) + LVO = LVO.reshape(naux,noccb*nvirb) + + # --- Four-center integrals + dot = lib.ddot + eris.feri1 = lib.H5TmpFile() + # (aa|aa) + eris.oooo = eris.feri1.create_dataset('oooo', (nocca,nocca,nocca,nocca), 'f8') + eris.oovv = eris.feri1.create_dataset('oovv', (nocca,nocca,nvira,nvira), 'f8', chunks=(nocca,nocca,1,nvira)) + eris.ovoo = eris.feri1.create_dataset('ovoo', (nocca,nvira,nocca,nocca), 'f8', chunks=(nocca,1,nocca,nocca)) + eris.ovvo = eris.feri1.create_dataset('ovvo', (nocca,nvira,nvira,nocca), 'f8', chunks=(nocca,1,nvira,nocca)) + eris.ovov = eris.feri1.create_dataset('ovov', (nocca,nvira,nocca,nvira), 'f8', chunks=(nocca,1,nocca,nvira)) + eris.ovvv = eris.feri1.create_dataset('ovvv', (nocca,nvira,nvira_pair), 'f8') + eris.vvvv = eris.feri1.create_dataset('vvvv', (nvira_pair,nvira_pair), 'f8') + eris.oooo[:] = dot(Loo.T, Loo).reshape(nocca,nocca,nocca,nocca) + eris.ovoo[:] = dot(Lov.T, Loo).reshape(nocca,nvira,nocca,nocca) + eris.oovv[:] = lib.unpack_tril(dot(Loo.T, Lvv)).reshape(nocca,nocca,nvira,nvira) + eris.ovvo[:] = dot(Lov.T, Lvo).reshape(nocca,nvira,nvira,nocca) + eris.ovov[:] = dot(Lov.T, Lov).reshape(nocca,nvira,nocca,nvira) + eris.ovvv[:] = dot(Lov.T, Lvv).reshape(nocca,nvira,nvira_pair) + eris.vvvv[:] = dot(Lvv.T, Lvv) + # (bb|bb) + eris.OOOO = eris.feri1.create_dataset('OOOO', (noccb,noccb,noccb,noccb), 'f8') + eris.OOVV = eris.feri1.create_dataset('OOVV', (noccb,noccb,nvirb,nvirb), 'f8', chunks=(noccb,noccb,1,nvirb)) + eris.OVOO = eris.feri1.create_dataset('OVOO', (noccb,nvirb,noccb,noccb), 'f8', chunks=(noccb,1,noccb,noccb)) + eris.OVVO = eris.feri1.create_dataset('OVVO', (noccb,nvirb,nvirb,noccb), 'f8', chunks=(noccb,1,nvirb,noccb)) + eris.OVOV = eris.feri1.create_dataset('OVOV', (noccb,nvirb,noccb,nvirb), 'f8', chunks=(noccb,1,noccb,nvirb)) + eris.OVVV = eris.feri1.create_dataset('OVVV', (noccb,nvirb,nvirb_pair), 'f8') + eris.VVVV = eris.feri1.create_dataset('VVVV', (nvirb_pair,nvirb_pair), 'f8') + eris.OOOO[:] = dot(LOO.T, LOO).reshape(noccb,noccb,noccb,noccb) + eris.OVOO[:] = dot(LOV.T, LOO).reshape(noccb,nvirb,noccb,noccb) + eris.OOVV[:] = lib.unpack_tril(dot(LOO.T, LVV)).reshape(noccb,noccb,nvirb,nvirb) + eris.OVVO[:] = dot(LOV.T, LVO).reshape(noccb,nvirb,nvirb,noccb) + eris.OVOV[:] = dot(LOV.T, LOV).reshape(noccb,nvirb,noccb,nvirb) + eris.OVVV[:] = dot(LOV.T, LVV).reshape(noccb,nvirb,nvirb_pair) + eris.VVVV[:] = dot(LVV.T, LVV) + # (aa|bb) + eris.ooOO = eris.feri1.create_dataset('ooOO', (nocca,nocca,noccb,noccb), 'f8') + eris.ooVV = eris.feri1.create_dataset('ooVV', (nocca,nocca,nvirb,nvirb), 'f8', chunks=(nocca,nocca,1,nvirb)) + eris.ovOO = eris.feri1.create_dataset('ovOO', (nocca,nvira,noccb,noccb), 'f8', chunks=(nocca,1,noccb,noccb)) + eris.ovVO = eris.feri1.create_dataset('ovVO', (nocca,nvira,nvirb,noccb), 'f8', chunks=(nocca,1,nvirb,noccb)) + eris.ovOV = eris.feri1.create_dataset('ovOV', (nocca,nvira,noccb,nvirb), 'f8', chunks=(nocca,1,noccb,nvirb)) + eris.ovVV = eris.feri1.create_dataset('ovVV', (nocca,nvira,nvirb_pair), 'f8') + eris.vvVV = eris.feri1.create_dataset('vvVV', (nvira_pair,nvirb_pair), 'f8') + eris.ooOO[:] = dot(Loo.T, LOO).reshape(nocca,nocca,noccb,noccb) + eris.ovOO[:] = dot(Lov.T, LOO).reshape(nocca,nvira,noccb,noccb) + eris.ooVV[:] = lib.unpack_tril(dot(Loo.T, LVV)).reshape(nocca,nocca,nvirb,nvirb) + eris.ovVO[:] = dot(Lov.T, LVO).reshape(nocca,nvira,nvirb,noccb) + eris.ovOV[:] = dot(Lov.T, LOV).reshape(nocca,nvira,noccb,nvirb) + eris.ovVV[:] = dot(Lov.T, LVV).reshape(nocca,nvira,nvirb_pair) + eris.vvVV[:] = dot(Lvv.T, LVV) + # (bb|aa) + eris.OOvv = eris.feri1.create_dataset('OOvv', (noccb,noccb,nvira,nvira), 'f8', chunks=(noccb,noccb,1,nvira)) + eris.OVoo = eris.feri1.create_dataset('OVoo', (noccb,nvirb,nocca,nocca), 'f8', chunks=(noccb,1,nocca,nocca)) + eris.OVvo = eris.feri1.create_dataset('OVvo', (noccb,nvirb,nvira,nocca), 'f8', chunks=(noccb,1,nvira,nocca)) + eris.OVvv = eris.feri1.create_dataset('OVvv', (noccb,nvirb,nvira_pair), 'f8') + eris.OVoo[:] = dot(LOV.T, Loo).reshape(noccb,nvirb,nocca,nocca) + eris.OOvv[:] = lib.unpack_tril(dot(LOO.T, Lvv)).reshape(noccb,noccb,nvira,nvira) + eris.OVvo[:] = dot(LOV.T, Lvo).reshape(noccb,nvirb,nvira,nocca) + eris.OVvv[:] = dot(LOV.T, Lvv).reshape(noccb,nvirb,nvira_pair) + + log.timer('CCSD integral transformation', *cput0) + return eris + + +def impurity_solve(mcc, mo_coeff, uocc_loc, mo_occ, maskact, eris, + ccsd_t=False, log=None, verbose_imp=None, + max_las_size_ccsd=1000, max_las_size_ccsd_t=1000): + + log = logger.new_logger(mcc if log is None else log) + cput1 = (logger.process_clock(), logger.perf_counter()) + + occidxa = mo_occ[0]>1e-10 + occidxb = mo_occ[1]>1e-10 + nmo = mo_occ[0].size, mo_occ[1].size + moidxa, moidxb = maskact + + orbfrzocca = mo_coeff[0][:, ~moidxa & occidxa] + orbactocca = mo_coeff[0][:, moidxa & occidxa] + orbactvira = mo_coeff[0][:, moidxa & ~occidxa] + orbfrzvira = mo_coeff[0][:, ~moidxa & ~occidxa] + nfrzocca, nactocca, nactvira, nfrzvira = [orb.shape[1] + for orb in [orbfrzocca,orbactocca, + orbactvira,orbfrzvira]] + orbfrzoccb = mo_coeff[1][:, ~moidxb & occidxb] + orbactoccb = mo_coeff[1][:, moidxb & occidxb] + orbactvirb = mo_coeff[1][:, moidxb & ~occidxb] + orbfrzvirb = mo_coeff[1][:, ~moidxb & ~occidxb] + nfrzoccb, nactoccb, nactvirb, nfrzvirb = [orb.shape[1] + for orb in [orbfrzoccb,orbactoccb, + orbactvirb,orbfrzvirb]] + nlo = [uocc_loc[0].shape[1], uocc_loc[1].shape[1]] + prjlo = [uocc_loc[0].T.conj(), uocc_loc[1].T.conj()] + + log.debug(' impsol: alpha %d LOs %d/%d MOs %d occ %d vir', + nlo[0], nactocca+nactvira, nmo[0], nactocca, nactvira) + log.debug(' impsol: beta %d LOs %d/%d MOs %d occ %d vir', + nlo[1], nactoccb+nactvirb, nmo[1], nactoccb, nactvirb) + + if nactocca * nactvira == 0 and nactoccb * nactvirb == 0: + elcorr_pt2 = lib.tag_array(0., spin_comp=np.array((0., 0.))) + elcorr_cc = lib.tag_array(0., spin_comp=np.array((0., 0.))) + elcorr_cc_t = 0. + else: + # solve impurity problem + imp_eris = mcc.ao2mo() + cput1 = log.timer_debug1('imp sol - eri ', *cput1) + # MP2 fragment energy + t1, t2 = mcc.init_amps(eris=imp_eris)[1:] + cput1 = log.timer_debug1('imp sol - mp2 amp', *cput1) + elcorr_pt2 = get_fragment_energy(imp_eris, t1, t2, prjlo) + cput1 = log.timer_debug1('imp sol - mp2 ene', *cput1) + # CCSD fragment energy + t1, t2 = mcc.kernel(eris=imp_eris, t1=t1, t2=t2)[1:] + cput1 = log.timer_debug1('imp sol - cc amp', *cput1) + elcorr_cc = get_fragment_energy(imp_eris, t1, t2, prjlo) + cput1 = log.timer_debug1('imp sol - cc ene', *cput1) + if ccsd_t: + from pyscf.lno.ulnoccsd_t_slow import kernel as UCCSD_T + elcorr_cc_t = UCCSD_T(mcc, imp_eris, prjlo, t1=t1, t2=t2) + cput1 = log.timer_debug1('imp sol - cc (T)', *cput1) + else: + elcorr_cc_t = 0. + + frag_msg = ' '.join([f'E_corr(MP2) = {elcorr_pt2:.15g}', + f'E_corr(CCSD) = {elcorr_cc:.15g}', + f'E_corr(CCSD(T)) = {elcorr_cc_t:.15g}']) + + t1 = t2 = imp_eris = None + + return (elcorr_pt2, elcorr_cc, elcorr_cc_t), frag_msg + +def get_fragment_energy(eris, t1, t2, prj): + prja, prjb = prj + eris_ovov = np.asarray(eris.ovov) + eris_OVOV = np.asarray(eris.OVOV) + eris_ovOV = np.asarray(eris.ovOV) + t1a, t1b = t1 + t2aa, t2ab, t2bb = t2 + nocca, noccb = t2ab.shape[:2] + fov = eris.focka[:nocca,nocca:] + fOV = eris.fockb[:noccb,noccb:] + ea = einsum('ia,ka->ik', fov, t1a) + eb = einsum('ia,ka->ik', fOV, t1b) + ea += 0.25 * einsum('ijab,kajb->ik', t2aa, eris_ovov) + ea -= 0.25 * einsum('ijab,kbja->ik', t2aa, eris_ovov) + eb += 0.25 * einsum('ijab,kajb->ik', t2bb, eris_OVOV) + eb -= 0.25 * einsum('ijab,kbja->ik', t2bb, eris_OVOV) + ea += 0.5 * einsum('iJaB,kaJB->ik', t2ab, eris_ovOV) + eb += 0.5 * einsum('iJaB,iaKB->JK', t2ab, eris_ovOV) + ea += 0.5 * einsum('ia,jb,kajb->ik', t1a, t1a, eris_ovov) + ea -= 0.5 * einsum('ia,jb,kbja->ik', t1a, t1a, eris_ovov) + eb += 0.5 * einsum('ia,jb,kajb->ik', t1b, t1b, eris_OVOV) + eb -= 0.5 * einsum('ia,jb,kbja->ik', t1b, t1b, eris_OVOV) + ea += 0.5 * einsum('ia,jb,kajb->ik', t1a, t1b, eris_ovOV) + eb += 0.5 * einsum('ia,jb,iakb->jk', t1a, t1b, eris_ovOV) + + e = einsum('ik,li,lk->', ea, prja, prja) + e += einsum('ik,li,lk->', eb, prjb, prjb) + return lib.tag_array(e, spin_comp=np.array((0., 0.))) + +def get_maskact(frozen, nmo): + maskact = [None,] * 2 + for s in range(2): + frozen[s], maskact[s] = lnoccsd.get_maskact(frozen[s], nmo[s]) + return frozen, maskact + +def fock_from_mo(mymf, s1e=None, force_exxdiv_none=True): + if s1e is None: + s1e = mymf.get_ovlp() + fock = [] + for s in range(2): + mo0 = np.dot(s1e, mymf.mo_coeff[s]) + moe0 = mymf.mo_energy[s] + fock.append(np.dot(mo0 * moe0, mo0.T.conj())) + return fock + +class ULNOCCSD(ULNO, LNOCCSD): + + get_frozen_mask = get_frozen_mask + + def _precompute(self): + log = logger.new_logger(self) + mf = self._scf + s1e = self.s1e + h1e = self.h1e + if self._vhf is None: + log.warn('Input vhf is not found. Building vhf from SCF MO.') + self._vhf = fock_from_mo(mf, s1e=s1e, force_exxdiv_none=True) - h1e + + def impurity_solve(self, mf, mo_coeff, uocc_loc, eris, frozen=None, log=None): + if log is None: + log = logger.new_logger(self) + mo_occ = self.mo_occ + frozen, maskact = get_maskact(frozen, [mo_occ[0].size, mo_occ[1].size]) + mcc = UCCSD(mf, mo_coeff=mo_coeff, frozen=frozen).set(verbose=self.verbose_imp) + mcc._s1e = self._s1e + mcc._h1e = self._h1e + mcc._vhf = self._vhf + if self.kwargs_imp is not None: + mcc = mcc.set(**self.kwargs_imp) + + return impurity_solve(mcc, mo_coeff, uocc_loc, mo_occ, maskact, eris, log=log, + ccsd_t=self.ccsd_t, verbose_imp=self.verbose_imp, + max_las_size_ccsd=self._max_las_size_ccsd, + max_las_size_ccsd_t=self._max_las_size_ccsd_t) + + +class ULNOCCSD_T(ULNOCCSD, LNOCCSD_T): + pass + + +if __name__ == '__main__': + from pyscf import gto, lo, scf, mp, cc + + # S22-2: water dimer + atom = ''' + O -1.485163346097 -0.114724564047 0.000000000000 + H -1.868415346097 0.762298435953 0.000000000000 + H -0.533833346097 0.040507435953 0.000000000000 + O 1.416468653903 0.111264435953 0.000000000000 + H 1.746241653903 -0.373945564047 -0.758561000000 + H 1.746241653903 -0.373945564047 0.758561000000 + ''' + basis = 'cc-pvdz' + + mol = gto.M(atom=atom, basis=basis, spin=0, verbose=5, max_memory=16000) + mf = scf.UHF(mol).density_fit() + mf.kernel() + + frozen = 2 + # canonical + mmp = mp.UMP2(mf, frozen=frozen) + mmp.kernel() + + mcc = cc.UCCSD(mf, frozen=frozen) + eris = mcc.ao2mo() + mcc.kernel(eris=eris) + eccsd_t = mcc.ccsd_t(eris=eris) + + # PM + orbocca = mf.mo_coeff[0][:,frozen:np.count_nonzero(mf.mo_occ[0])] + orbloca = lo.PipekMezey(mol, orbocca).kernel() + orboccb = mf.mo_coeff[1][:,frozen:np.count_nonzero(mf.mo_occ[1])] + orblocb = lo.PipekMezey(mol, orboccb).kernel() + orbloc = [orbloca, orblocb] + + # LNO + lno_type = ['1h'] * 2 + lno_thresh = [1e-4] * 2 + oa = [[[i],[]] for i in range(orbloca.shape[1])] + ob = [[[],[i]] for i in range(orblocb.shape[1])] + frag_lolist = oa + ob + + mlno = ULNOCCSD_T(mf, orbloc, frag_lolist, lno_type=lno_type, lno_thresh=lno_thresh, frozen=frozen) + mlno.lo_proj_thresh_active = None + mlno.verbose_imp = 4 + mlno.kernel() + ecc = mlno.e_corr_ccsd + ecc_t = mlno.e_corr_ccsd_t + ecc_pt2corrected = mlno.e_corr_ccsd_pt2corrected(mmp.e_corr) + ecc_t_pt2corrected = mlno.e_corr_ccsd_t_pt2corrected(mmp.e_corr) + log = logger.new_logger(mol) + log.info('lno_thresh = %s\n' + ' E_corr(CCSD) = %.10f rel = %6.2f%% ' + 'diff = % .10f', + lno_thresh, ecc, ecc/mcc.e_corr*100, ecc-mcc.e_corr) + log.info(' E_corr(CCSD_T) = %.10f rel = %6.2f%% ' + 'diff = % .10f', + ecc_t, ecc_t/(mcc.e_corr+eccsd_t)*100, + ecc_t-(mcc.e_corr+eccsd_t)) + log.info(' E_corr(CCSD+PT2) = %.10f rel = %6.2f%% ' + 'diff = % .10f', + ecc_pt2corrected, ecc_pt2corrected/mcc.e_corr*100, + ecc_pt2corrected - mcc.e_corr) + log.info(' E_corr(CCSD_T+PT2) = %.10f rel = %6.2f%% ' + 'diff = % .10f', + ecc_t_pt2corrected, ecc_t_pt2corrected/(mcc.e_corr+eccsd_t)*100, + ecc_t_pt2corrected-(mcc.e_corr+eccsd_t)) diff --git a/pyscf/lno/ulnoccsd_t_slow.py b/pyscf/lno/ulnoccsd_t_slow.py new file mode 100644 index 000000000..4c9e344de --- /dev/null +++ b/pyscf/lno/ulnoccsd_t_slow.py @@ -0,0 +1,294 @@ +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Chenghan Li +# + +import numpy as np +from pyscf import lib + +try: + from cotengra import einsum +except ImportError: + einsum = lib.einsum + +def kernel(mcc, eris, prjlo, t1=None, t2=None): + ''' + adapted from pyscf.cc.gccsd_t_slow + + Args: + prjlo[mu,i] = is the overlap between the mu-th LO and the i-th occ MO. + ''' + if t1 is None or t2 is None: + t1, t2 = mcc.t1, mcc.t2 + + prjloa, prjlob = prjlo + t1a, t1b = t1 + t2aa, t2ab, t2bb = t2 + nocca, noccb = t2ab.shape[:2] + nvira, nvirb = t2ab.shape[2:] + mo_ea, mo_eb = eris.mo_energy + eia = mo_ea[:nocca, None] - mo_ea[nocca:] + eIA = mo_eb[:noccb, None] - mo_eb[noccb:] + fvo = eris.focka[nocca:, :nocca] + fVO = eris.fockb[noccb:, :noccb] + + et = 0 + log = lib.logger.Logger(mcc.stdout, mcc.verbose) + from multiprocessing import pool + p = pool.ThreadPool() + + # aaa + cput0 = (lib.logger.process_clock(), lib.logger.perf_counter()) + bcei = np.array(eris.get_ovvv()).transpose(2, 1, 3, 0) + bcei -= bcei.transpose(1, 0, 2, 3) + majk = np.array(eris.ovoo).conj().transpose(2, 1, 3, 0) + majk -= majk.transpose(0, 1, 3, 2) + bcjk = np.array(eris.ovov).conj().transpose(1, 3, 0, 2) + bcjk -= bcjk.transpose(1, 0, 2, 3) + + def t3c_aaa1(a, b, d3): + t3c = (einsum('jke,cei->ijkc', t2aa[:, :, a, :], bcei[b, :, :, :]) - + einsum('imc,mjk->ijkc', t2aa[:, :, b], majk[:, a, :, :])) + t3c = t3c - t3c.transpose(1, 0, 2, 3) - t3c.transpose(2, 1, 0, 3) + return t3c / d3 + + def t3c_aaa2(a, b, d3): + t3c = (einsum('jkce,ei->ijkc', t2aa, bcei[b, a]) - + einsum('im,mcjk->ijkc', t2aa[:, :, b, a], majk)) + t3c = t3c - t3c.transpose(1, 0, 2, 3) - t3c.transpose(2, 1, 0, 3) + return t3c / d3 + + def t3d_aaa1(a, b, d3): + t3d = einsum('i,cjk->ijkc', t1a[:, a], bcjk[b]) + t3d += einsum('i,jkc->ijkc', fvo[a], t2aa[:, :, b]) + t3d = t3d - t3d.transpose(1, 0, 2, 3) - t3d.transpose(2, 1, 0, 3) + return t3d / d3 + + def t3d_aaa2(a, b, d3): + t3d = einsum('ic,jk->ijkc', t1a, bcjk[b, a]) + t3d += einsum('ci,jk->ijkc', fvo, t2aa[:, :, b, a]) + t3d = t3d - t3d.transpose(1, 0, 2, 3) - t3d.transpose(2, 1, 0, 3) + return t3d / d3 + #for a in range(nvira): + def task_a(a): + et = 0 + for b in range(a+1, nvira): + d3 = lib.direct_sum( + 'i+j+kc->ijkc', eia[:, a], eia[:, b], eia) + t3c = 0 + t3c += t3c_aaa1(a, b, d3) + t3c -= t3c_aaa1(b, a, d3) + t3c -= t3c_aaa2(a, b, d3) + t3d = 0 + t3d += t3d_aaa1(a, b, d3) + t3d -= t3d_aaa1(b, a, d3) + t3d -= t3d_aaa2(a, b, d3) + et_ij = einsum('mjkc,njkc,njkc->mn', + (t3c+t3d).conj(), d3, t3c) / 9 + et += 2 * einsum('ij,li,lj->', et_ij, prjloa, prjloa) + return et + et += sum(p.map(task_a, range(nvira))) + cput0 = log.timer_debug1('(T) aaa', *cput0) + + # aab + bCEi = -np.array(eris.get_ovVV()).transpose(1, 2, 3, 0) + MajK = -np.array(eris.ovOO).transpose(2, 1, 0, 3) + baei = bcei + jKmC = np.array(eris.OVoo).transpose(3, 0, 2, 1) + bCeK = np.array(eris.get_OVvv()).transpose(2, 1, 3, 0) + maji = majk + bCjK = np.array(eris.ovOV).transpose(1, 3, 0, 2) + baji = np.array(eris.ovov).transpose(1, 3, 0, 2) + baji -= baji.transpose(0, 1, 3, 2) + # t3c: + + def t3c_aab1(a, b, d3): + # Pij Pab [ t_jKaE - t_MibC ] + t3c = (einsum('jKE,CEi->ijKC', t2ab[:, :, a, :], bCEi[b]) - + einsum('iMC,MjK->ijKC', t2ab[:, :, b], MajK[:, a, :, :])) + t3c = t3c - t3c.transpose(1, 0, 2, 3) + return t3c / d3 + + def t3c_aab2(a, b, d3): + # -Pij [ t_jKCe - t_miba ] (a <-> C) + t3c = (einsum('jKeC,ei->ijKC', t2ab, baei[b, a, :, :]) - + einsum('mi,jKmC->ijKC', t2aa[:, :, b, a], jKmC)) + t3c = t3c - t3c.transpose(1, 0, 2, 3) + return t3c / d3 + + def t3c_aab3(a, b, d3): + # -Pab [ t_jiae - t_mKbC ] (i <-> K) + t3c = (einsum('jie,CeK->ijKC', t2aa[:, :, a, :], bCeK[b]) - + einsum('mKC,mji->ijKC', -t2ab[:, :, b], maji[:, a, :, :])) + return -t3c / d3 + # t3d: + + def t3d_aab1(a, b, d3): + # Pij Pab ( tia ) + t3d = einsum('i,CjK->ijKC', t1a[:, a], bCjK[b]) + t3d += einsum('i,jKC->ijKC', fvo[a], t2ab[:, :, b]) + t3d = t3d - t3d.transpose(1, 0, 2, 3) + return t3d / d3 + + def t3d_aab2(a, b, d3): + # tKC (i <-> K, a <-> C) + t3d = einsum('KC,ji->ijKC', t1b, baji[b, a]) + t3d += einsum('CK,ij->ijKC', fVO, t2aa[:, :, a, b]) + return t3d / d3 + #for a in range(nvira): + def task_a(a): + et = 0 + for b in range(a+1, nvira): + d3 = lib.direct_sum( + 'i+j+kC->ijkC', eia[:, a], eia[:, b], eIA) + t3c = 0 + t3c += t3c_aab1(a, b, d3) + t3c -= t3c_aab1(b, a, d3) + t3c += t3c_aab2(a, b, d3) + t3c += t3c_aab3(a, b, d3) + t3c -= t3c_aab3(b, a, d3) + t3d = 0 + t3d += t3d_aab1(a, b, d3) + t3d -= t3d_aab1(b, a, d3) + t3d += t3d_aab2(a, b, d3) + et_ij = einsum('ijmC,ijnC,ijnC->mn', (t3c+t3d).conj(), d3, t3c) + et += 2 * einsum('ij,li,lj->', et_ij, prjlob, prjlob) * 3 / 36 * 4 + et_ij = einsum('mjkC,njkC,njkC->mn', (t3c+t3d).conj(), d3, t3c) + et += 2 * einsum('ij,li,lj->', et_ij, prjloa, prjloa) * 6 / 36 * 4 + return et + et += sum(p.map(task_a, range(nvira))) + cput0 = log.timer_debug1('(T) aab', *cput0) + + # bbb + bcei = np.array(eris.get_OVVV()).transpose(2, 1, 3, 0) + bcei -= bcei.transpose(1, 0, 2, 3) + majk = np.array(eris.OVOO).conj().transpose(2, 1, 3, 0) + majk -= majk.transpose(0, 1, 3, 2) + bcjk = np.array(eris.OVOV).conj().transpose(1, 3, 0, 2) + bcjk -= bcjk.transpose(1, 0, 2, 3) + + def t3c_bbb1(a, b, d3): + t3c = (einsum('jke,cei->ijkc', t2bb[:, :, a], bcei[b]) - + einsum('imc,mjk->ijkc', t2bb[:, :, b], majk[:, a])) + t3c = t3c - t3c.transpose(1, 0, 2, 3) - t3c.transpose(2, 1, 0, 3) + return t3c / d3 + + def t3c_bbb2(a, b, d3): + t3c = (einsum('jkce,ei->ijkc', t2bb, bcei[b, a]) - + einsum('im,mcjk->ijkc', t2bb[:, :, b, a], majk)) + t3c = t3c - t3c.transpose(1, 0, 2, 3) - t3c.transpose(2, 1, 0, 3) + return t3c / d3 + + def t3d_bbb1(a, b, d3): + t3d = einsum('i,cjk->ijkc', t1b[:, a], bcjk[b]) + t3d += einsum('i,jkc->ijkc', fVO[a], t2bb[:, :, b]) + t3d = t3d - t3d.transpose(1, 0, 2, 3) - t3d.transpose(2, 1, 0, 3) + return t3d / d3 + + def t3d_bbb2(a, b, d3): + t3d = einsum('ic,jk->ijkc', t1b, bcjk[b, a]) + t3d += einsum('ci,jk->ijkc', fVO, t2bb[:, :, b, a]) + t3d = t3d - t3d.transpose(1, 0, 2, 3) - t3d.transpose(2, 1, 0, 3) + return t3d / d3 + #for a in range(nvirb): + def task_a(a): + et = 0 + for b in range(a+1, nvirb): + d3 = lib.direct_sum( + 'i+j+kc->ijkc', eIA[:, a], eIA[:, b], eIA) + t3c = 0 + t3c += t3c_bbb1(a, b, d3) + t3c -= t3c_bbb1(b, a, d3) + t3c -= t3c_bbb2(a, b, d3) + t3d = 0 + t3d += t3d_bbb1(a, b, d3) + t3d -= t3d_bbb1(b, a, d3) + t3d -= t3d_bbb2(a, b, d3) + et_ij = einsum('mjkc,njkc,njkc->mn', + (t3c+t3d).conj(), d3, t3c) / 9 + et += 2 * einsum('ij,li,lj->', et_ij, prjlob, prjlob) + return et + et += sum(p.map(task_a, range(nvirb))) + cput0 = log.timer_debug1('(T) bbb', *cput0) + + # bba + bCEi = -np.array(eris.get_OVvv()).transpose(1, 2, 3, 0) + MajK = -np.array(eris.OVoo).transpose(2, 1, 0, 3) + baei = bcei + jKmC = np.array(eris.ovOO).transpose(3, 0, 2, 1) + bCeK = np.array(eris.get_ovVV()).transpose(2, 1, 3, 0) + maji = majk + bCjK = np.array(eris.ovOV).transpose(3, 1, 2, 0) + baji = np.array(eris.OVOV).transpose(1, 3, 0, 2) + baji -= baji.transpose(0, 1, 3, 2) + # t3c: + + def t3c_bba1(a, b, d3): + t3c = (einsum('KjE,CEi->ijKC', t2ab[:, :, :, a], bCEi[b]) - + einsum('MiC,MjK->ijKC', t2ab[:, :, :, b], MajK[:, a])) + t3c = t3c - t3c.transpose(1, 0, 2, 3) + return t3c / d3 + + def t3c_bba2(a, b, d3): + t3c = (einsum('KjCe,ei->ijKC', t2ab, baei[b, a]) - + einsum('mi,jKmC->ijKC', t2bb[:, :, b, a], jKmC)) + t3c = t3c - t3c.transpose(1, 0, 2, 3) + return t3c / d3 + + def t3c_bba3(a, b, d3): + # -Pab [ t_jiae - t_mKbC ] (i <-> K) + t3c = (einsum('jie,CeK->ijKC', t2bb[:, :, a], bCeK[b]) - + einsum('KmC,mji->ijKC', -t2ab[:, :, :, b], maji[:, a])) + return -t3c / d3 + # t3d: + + def t3d_bba1(a, b, d3): + # Pij Pab ( tia ) + t3d = einsum('i,CjK->ijKC', t1b[:, a], bCjK[b]) + t3d += einsum('i,KjC->ijKC', fVO[a], t2ab[:, :, :, b]) + t3d = t3d - t3d.transpose(1, 0, 2, 3) + return t3d / d3 + + def t3d_bba2(a, b, d3): + # tKC (i <-> K, a <-> C) + t3d = einsum('KC,ji->ijKC', t1a, baji[b, a]) + t3d += einsum('CK,ij->ijKC', fvo, t2bb[:, :, a, b]) + return t3d / d3 + #for a in range(nvirb): + def task_a(a): + et = 0 + for b in range(a+1, nvirb): + d3 = lib.direct_sum( + 'i+j+kc->ijkc', eIA[:, a], eIA[:, b], eia) + t3c = 0 + t3c += t3c_bba1(a, b, d3) + t3c -= t3c_bba1(b, a, d3) + t3c += t3c_bba2(a, b, d3) + t3c += t3c_bba3(a, b, d3) + t3c -= t3c_bba3(b, a, d3) + t3d = 0 + t3d += t3d_bba1(a, b, d3) + t3d -= t3d_bba1(b, a, d3) + t3d += t3d_bba2(a, b, d3) + et_ij = einsum('ijmc,ijnc,ijnc->mn', (t3c+t3d).conj(), d3, t3c) + et += 2 * einsum('ij,li,lj->', et_ij, prjloa, prjloa) * 3 / 36 * 4 + et_ij = einsum('mjkc,njkc,njkc->mn', (t3c+t3d).conj(), d3, t3c) + et += 2 * einsum('ij,li,lj->', et_ij, prjlob, prjlob) * 6 / 36 * 4 + return et + et += sum(p.map(task_a, range(nvirb))) + cput0 = log.timer_debug1('(T) bba', *cput0) + + et *= .25 + return et diff --git a/pyscf/pbc/lno/__init__.py b/pyscf/pbc/lno/__init__.py new file mode 100644 index 000000000..5c485b954 --- /dev/null +++ b/pyscf/pbc/lno/__init__.py @@ -0,0 +1,2 @@ +from .klno import KLNO +from .klnoccsd import KLNOCCSD, KLNOCCSD_T diff --git a/pyscf/pbc/lno/klno.py b/pyscf/pbc/lno/klno.py new file mode 100644 index 000000000..c6ef9c5d7 --- /dev/null +++ b/pyscf/pbc/lno/klno.py @@ -0,0 +1,544 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + + +''' Generic framework for k-point local natural orbital (KLNO)-based methods. This code + can be used to implement LNO-based local correlation approximation to many correlated + wavefunction methods with periodic boundary condition. See `klnoccsd.py` for the + implementation of KLNO-CCSD as an example. + + - Original publication of molecular LNO by Kállay and co-workers: + Rolik and Kállay, J. Chem. Phys. 135, 104111 (2011) + + - Publication for periodic KLNO by Ye and Berkelbach: + Ye and Berkelbach, J. Chem. Theory Comput. 2024, 20, 20, 8948–8959 +''' + + +import sys +import numpy as np +import h5py + +from pyscf.lib import logger +from pyscf import lib +from pyscf.pbc.df.df import _load3c +from pyscf.pbc.lib.kpts_helper import gamma_point +from pyscf import __config__ + +from pyscf.lno import LNO +from pyscf.pbc.lno.tools import K2SDF, k2s_scf +from pyscf.pbc.lno.make_lno_rdm1 import make_lo_rdm1_occ, make_lo_rdm1_vir + +einsum = lib.einsum + +DEBUG_BLKSIZE = getattr(__config__, 'lno_base_klno_base_DEBUG_BLKSIZE', False) + + +class KLNO(LNO): + r''' Base class for LNO-based methods with k-point mean-field reference. + + This base class provides common functions for constructing KLNO subspace. + Specific LNO-based methods (e.g., KLNO-CCSD, KLNO-CCSD(T)) can be implemented as + derived classes from this base class with appropriately defined method + `impurity_solve`. + + Input: + kmf (PySCF KSCF object): + KSCF mean-field object. + lo_coeff (np.ndarray): + Supercell AO coefficient matrix of LOs. Must span occupied space. + frag_lolist (nested list): + Fragment assignment in terms of LO index. E.g., [[0,2], [1], ...] means + frag 1 consists of LO 0 and 2, frag 2 consists of LO 1, etc. + lno_type (len-2 list): + lno_type = [occ_lno_type, vir_lno_type], where 'occ_lno_type' can be + '1h', '1p', or '2p' and 'vir_lno_type' can be '1p', '1h', '2h'. + Default is ['1h','1h']. + lno_thresh (float of len-2 list): + Thresholds for LNO truncation. Use a len-2 list to specify thresh for + occ and vir separately. Default is [1e-5,1e-6]. + frozen (int or list): + Same as the `frozen` attr in MP2/CCSD etc. modules. + ''' + + def __init__(self, kmf, lo_coeff, frag_lolist, lno_type=None, lno_thresh=None, frozen=None, + mf=None): + if mf is None: mf = k2s_scf(kmf) + LNO.__init__(self, mf, lo_coeff, frag_lolist, lno_type=lno_type, + lno_thresh=lno_thresh, frozen=frozen) + self._kscf = kmf + self.with_df = kmf.with_df + self.unit_cell = kmf.cell + self.kpts = kmf.kpts + + def ao2mo(self): + log = logger.new_logger(self) + + if self.with_df is None: + log.error('DF is not found. Rerun KSCF with DF.') + raise NotImplementedError + else: + cput0 = (logger.process_clock(), logger.perf_counter()) + orbocc, orbvir = self.split_mo_coeff()[1:3] + dsize = 16 # Lov is always complex (but the eri from contracting Lov may be real) + nocc = orbocc.shape[1] + nvir = orbvir.shape[1] + # FIXME: more accurate mem estimate + mem_now = self.max_memory - lib.current_memory()[0] + nkpts = len(self.kpts) + naux = self.with_df.get_naoaux() + nk = nkpts//2+nkpts%2 if gamma_point(self.kpts[0]) and np.isrealobj(orbocc) else nkpts + mem_df = nk*nocc*nvir*naux*dsize/1024**2. + log.debug('ao2mo est mem= %.2f MB avail mem= %.2f MB', mem_df, mem_now) + if ( (self._ovL_to_save is not None) or (self._ovL is not None) or + self.force_outcore_ao2mo or (mem_df > mem_now*0.5) ): + eris = _KLNODFOUTCOREERIS(self.with_df, orbocc, orbvir, self.max_memory, + ovL=self._ovL, ovL_to_save=self._ovL_to_save, + verbose=self.verbose, stdout=self.stdout) + else: + eris = _KLNODFINCOREERIS(self.with_df, orbocc, orbvir, self.max_memory, + verbose=self.verbose, stdout=self.stdout) + eris.build() + log.timer('Integral xform ', *cput0) + + return eris + + def make_lo_rdm1_occ(self, eris, moeocc, moevir, uocc_loc, uvir_loc, occ_lno_type): + return make_lo_rdm1_occ(eris, moeocc, moevir, uocc_loc, uvir_loc, occ_lno_type) + + def make_lo_rdm1_vir(self, eris, moeocc, moevir, uocc_loc, uvir_loc, vir_lno_type): + return make_lo_rdm1_vir(eris, moeocc, moevir, uocc_loc, uvir_loc, vir_lno_type) + + +def _KLNODFINCOREERIS(with_df, orbocc, orbvir, max_memory, verbose=None, stdout=None): + if gamma_point(with_df.kpts[0]) and np.isrealobj(orbocc.dtype): + _ERIS = _KLNODFINCOREERIS_REAL + else: + _ERIS = _KLNODFINCOREERIS_COMPLEX + return _ERIS(with_df, orbocc, orbvir, max_memory, verbose, stdout) + +def _KLNODFOUTCOREERIS(with_df, orbocc, orbvir, max_memory, ovL=None, ovL_to_save=None, + verbose=None, stdout=None): + if gamma_point(with_df.kpts[0]) and np.isrealobj(orbocc.dtype): + _ERIS = _KLNODFOUTCOREERIS_REAL + else: + _ERIS = _KLNODFOUTCOREERIS_COMPLEX + return _ERIS(with_df, orbocc, orbvir, max_memory, ovL, ovL_to_save, verbose, stdout) + + +''' DF ERI for real orbitals +''' +class _KLNODFINCOREERIS_REAL(K2SDF): + def __init__(self, with_df, orbocc, orbvir, max_memory, verbose=None, stdout=None): + K2SDF.__init__(self, with_df) + self.orbocc = orbocc + self.orbvir = orbvir + + self.max_memory = max_memory + self.verbose = verbose + self.stdout = stdout + + self.dtype = np.float64 + self.dsize = 8 + self.dtype_eri, self.dsize_eri = self.get_eri_dtype_dsize(orbocc, orbvir) + + self.ovLR = None + self.ovLI = None + + @property + def nocc(self): + return self.orbocc.shape[1] + @property + def nvir(self): + return self.orbvir.shape[1] + + def build(self): + log = logger.new_logger(self) + self.ovLR, self.ovLI = _init_mp_df_eris_real(self, self.orbocc, self.orbvir, + self.max_memory, ovLR=self.ovLR, ovLI=self.ovLI, log=log) + + def get_occ_blk(self, i0,i1): + return np.asarray(self.ovLR[i0:i1]), np.asarray(self.ovLI[i0:i1]) + + def get_vir_blk(self, a0,a1, real_and_imag=False): + return np.asarray(self.ovLR[:,a0:a1], order='C'), np.asarray(self.ovLI[:,a0:a1], order='C') + + def xform_occ(self, u): + assert( u.dtype == np.float64 ) + nocc, nvir, Naux = self.nocc, self.nvir, self.Naux_ibz + nOcc = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*Naux) - nOcc)))) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + OvLR = np.empty((nOcc,nvir,Naux), dtype=np.float64) + OvLI = np.empty((nOcc,nvir,Naux), dtype=np.float64) + for iblk,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + ovLR, ovLI = self.get_occ_blk(i0,i1) + if iblk == 0: + OvLR[:] = lib.einsum('iax,iI->Iax', ovLR, u[i0:i1]) + OvLI[:] = lib.einsum('iax,iI->Iax', ovLI, u[i0:i1]) + else: + OvLR[:] += lib.einsum('iax,iI->Iax', ovLR, u[i0:i1]) + OvLI[:] += lib.einsum('iax,iI->Iax', ovLI, u[i0:i1]) + ovLR = ovLI = None + return OvLR, OvLI + + def xform_vir(self, u): + assert( u.dtype == np.float64 ) + nocc, nvir, Naux = self.nocc, self.nvir, self.Naux_ibz + nVir = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*Naux) - nocc*nVir/float(nvir))))) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + oVLR = np.empty((nocc,nVir,Naux), dtype=np.float64) + oVLI = np.empty((nocc,nVir,Naux), dtype=np.float64) + for i0,i1 in lib.prange(0,nocc,occblksize): + ovLR, ovLI = self.get_occ_blk(i0,i1) + oVLR[i0:i1] = lib.einsum('iax,aA->iAx', ovLR, u) + oVLI[i0:i1] = lib.einsum('iax,aA->iAx', ovLI, u) + ovLR = ovLI = None + return oVLR, oVLI + + +class _KLNODFOUTCOREERIS_REAL(_KLNODFINCOREERIS_REAL): + def __init__(self, with_df, orbocc, orbvir, max_memory, ovL=None, ovL_to_save=None, + verbose=None, stdout=None): + _KLNODFINCOREERIS_REAL.__init__(self, with_df, orbocc, orbvir, max_memory, verbose, stdout) + + self._ovL = ovL + self._ovL_to_save = ovL_to_save + + def build(self): + log = logger.new_logger(self) + ovL_shape = (self.nocc,self.nvir,self.Naux_ibz) + ovL_dtype = self.dtype + if self._ovL is None: + if isinstance(self._ovL_to_save, str): + self.feri = h5py.File(self._ovL_to_save, 'w') + else: + self.feri = lib.H5TmpFile() + log.info('ovL is saved to %s', self.feri.filename) + # TODO: determine a chunks size + self.ovLR = self.feri.create_dataset('ovLR', ovL_shape, ovL_dtype) + self.ovLI = self.feri.create_dataset('ovLI', ovL_shape, ovL_dtype) + _init_mp_df_eris_real(self, self.orbocc, self.orbvir, self.max_memory, + ovLR=self.ovLR, ovLI=self.ovLI, log=log) + elif isinstance(self._ovL, str): + self.feri = h5py.File(self._ovL, 'r') + log.info('ovL is read from %s', self.feri.filename) + for key in ['ovLR', 'ovLI']: + assert( key in self.feri ) + assert( self.feri[key].shape == ovL_shape ) + assert( self.feri[key].dtype == ovL_dtype ) + setattr(self, key, self.feri[key]) + else: + raise RuntimeError + +def _init_mp_df_eris_real(k2sdf, orbocc, orbvir, max_memory, ovLR=None, ovLI=None, log=None): + r''' ovL[q,I,A,L] := (I A | L,q) + = \sum_{k} \sum_{mu,nu} (mu,k nu,k-q | L,q) C(mu,k I).conj() C(nu,k-q A) + ''' + from pyscf.ao2mo import _ao2mo + + if log is None: log = logger.Logger(sys.stdout, 3) + + korbocc = k2sdf.s2k_mo_coeff(orbocc) + korbvir = k2sdf.s2k_mo_coeff(orbvir) + naux_by_q = k2sdf.naux_by_q + naux = k2sdf.naux + Naux = k2sdf.Naux_ibz + nqpts = len(k2sdf.qpts_ibz) + + REAL = np.float64 + COMPLEX = np.complex128 + dsize = 8 + + nao, nocc = korbocc[0].shape + nvir = korbvir[0].shape[1] + + if ovLR is None: + ovLR = np.empty((nocc,nvir,Naux), dtype=REAL) + ovLI = np.empty((nocc,nvir,Naux), dtype=REAL) + + cput1 = (logger.process_clock(), logger.perf_counter()) + + mem_avail = max_memory - lib.current_memory()[0] + + if isinstance(ovLR, np.ndarray): + mem_avail -= Naux*nocc*nvir*2 * dsize/1e6 # subtract mem for holding ovLR/I incore + mode = 'incore' + else: + mode = 'outcore' + + # batching aux (OV*3 + Nao_pair) * [X] = M + mem_auxblk = (nao**2+nocc*nvir*3) * dsize/1e6 + aux_blksize = min(naux, max(1, int(np.floor(mem_avail*0.7 / mem_auxblk)))) + if DEBUG_BLKSIZE: aux_blksize = max(1,naux//2) + log.debug('aux blksize for %s ao2mo: %d/%d', mode, aux_blksize, naux) + buf = np.empty(aux_blksize*nocc*nvir, dtype=COMPLEX) + bufR = np.empty(aux_blksize*nocc*nvir, dtype=REAL) + bufI = np.empty(aux_blksize*nocc*nvir, dtype=REAL) + + for qi,q in enumerate(k2sdf.ibz2bz): + nauxq = naux_by_q[q] + if nauxq < naux: + ovLR[:,:,naux*qi+nauxq:naux*(qi+1)] = 0 + ovLI[:,:,naux*qi+nauxq:naux*(qi+1)] = 0 + for p0,p1 in lib.prange(0, nauxq, aux_blksize): + auxslice = (p0,p1) + dp = p1 - p0 + LovR = np.ndarray((dp,nocc,nvir), dtype=REAL, buffer=bufR) + LovI = np.ndarray((dp,nocc,nvir), dtype=REAL, buffer=bufI) + LovR.fill(0) + LovI.fill(0) + for (ki,kj),LpqR,LpqI in k2sdf.loop_ao2mo(q, orbocc, orbvir, buf=buf, + real_and_imag=True, auxslice=auxslice): + LovR += LpqR.reshape(dp,nocc,nvir) + LovI += LpqI.reshape(dp,nocc,nvir) + LpqR = LpqI = None + w = k2sdf.qpts_ibz_weights[qi] + LovR *= w + LovI *= w + b0 = naux*qi + p0 + b1 = b0 + dp + ovLR[:,:,b0:b1] = LovR.transpose(1,2,0) + ovLI[:,:,b0:b1] = LovI.transpose(1,2,0) + LovR = LovI = None + cput1 = log.timer('ao2mo for qidx %d/%d'%(qi+1,nqpts), *cput1) + + buf = bufR = bufI = None + + return ovLR, ovLI + + +''' DF ERI for complex orbitals +''' +class _KLNODFINCOREERIS_COMPLEX(K2SDF): + def __init__(self, with_df, orbocc, orbvir, max_memory, verbose=None, stdout=None): + K2SDF.__init__(self, with_df) + self.orbocc = orbocc + self.orbvir = orbvir + + self.max_memory = max_memory + self.verbose = verbose + self.stdout = stdout + + self.dtype = np.complex128 + self.dsize = 16 + self.dtype_eri, self.dsize_eri = self.get_eri_dtype_dsize(orbocc, orbvir) + + self.ovL = None + + @property + def nocc(self): + return self.orbocc.shape[1] + @property + def nvir(self): + return self.orbvir.shape[1] + + def build(self): + log = logger.new_logger(self) + self.ovL = _init_mp_df_eris_complex(self, self.orbocc, self.orbvir, + self.max_memory, ovL=self.ovL, log=log) + + def get_occ_blk(self, q, i0,i1, real_and_imag=False): + if real_and_imag: + out = self.ovL[q,i0:i1] + return np.asarray(out.real, order='C'), np.asarray(out.imag, order='C') + else: + return np.asarray(self.ovL[q,i0:i1], order='C') + + def get_vir_blk(self, q, a0,a1, real_and_imag=False): + if real_and_imag: + out = self.ovL[q,:,a0:a1] + return np.asarray(out.real, order='C'), np.asarray(out.imag, order='C') + else: + return np.asarray(self.ovL[q,:,a0:a1], order='C') + + def xform_occ(self, q, u, real_and_imag=False): + nocc, nvir, naux = self.nocc, self.nvir, self.naux + nOcc = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*naux) - nOcc)))) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + OvL = np.empty((nOcc,nvir,naux), dtype=self.dtype) + for iblk,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + if iblk == 0: + OvL[:] = lib.einsum('iax,iI->Iax', self.get_occ_blk(q,i0,i1), u[i0:i1].conj()) + else: + OvL[:] += lib.einsum('iax,iI->Iax', self.get_occ_blk(q,i0,i1), u[i0:i1].conj()) + if real_and_imag: + return np.asarray(OvL.real, order='C'), np.asarray(OvL.imag, order='C') + else: + return OvL + + def xform_vir(self, q, u, real_and_imag=False): + nocc, nvir, naux = self.nocc, self.nvir, self.naux + nVir = u.shape[1] + M = (self.max_memory - lib.current_memory()[0])*1e6 / self.dsize + occblksize = min(nocc, max(1, int(np.floor(M*0.5/(nvir*naux) - nocc*nVir/float(nvir))))) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + oVL = np.empty((nocc,nVir,naux), dtype=self.dtype) + for i0,i1 in lib.prange(0,nocc,occblksize): + oVL[i0:i1] = lib.einsum('iax,aA->iAx', self.get_occ_blk(q,i0,i1), u) + if real_and_imag: + return np.asarray(oVL.real, order='C'), np.asarray(oVL.imag, order='C') + else: + return oVL + + +class _KLNODFOUTCOREERIS_COMPLEX(_KLNODFINCOREERIS_COMPLEX): + def __init__(self, with_df, orbocc, orbvir, max_memory, ovL=None, ovL_to_save=None, + verbose=None, stdout=None): + raise NotImplementedError + _KLNODFINCOREERIS_COMPLEX.__init__(self, with_df, orbocc, orbvir, max_memory, + verbose, stdout) + + self._ovL = ovL + self._ovL_to_save = ovL_to_save + + def build(self): + log = logger.new_logger(self) + if self._ovL is None: + if isinstance(self._ovL_to_save, str): + self.feri = h5py.File(self._ovL_to_save, 'w') + else: + self.feri = lib.H5TmpFile() + log.info('ovL is saved to %s', self.feri.filename) + shape = (len(self.qpts),self.nocc,self.nvir,self.naux) + self.ovL = self.feri.create_dataset('ovL', shape, self.dtype, chunks=(1,*shape[1:])) + _init_mp_df_eris_complex(self, self.orbocc, self.orbvir, self.max_memory, + ovL=self.ovL, log=log) + elif isinstance(self._ovL, str): + self.feri = h5py.File(self._ovL, 'r') + log.info('ovL is read from %s', self.feri.filename) + assert( 'ovL' in self.feri ) + ovL_shape = (self.nocc,self.nvir,self.naux) + assert( self.feri['ovL/0'].shape == ovL_shape ) + self.ovL = self.feri['ovL'] + else: + raise RuntimeError + +def _init_mp_df_eris_complex(k2sdf, orbocc, orbvir, max_memory, ovL=None, log=None): + r''' ovL[q,I,A,L] := (I A | L,q) + = \sum_{k} \sum_{mu,nu} (mu,k nu,k-q | L,q) C(mu,k I).conj() C(nu,k-q A) + ''' + from pyscf.ao2mo import _ao2mo + + if log is None: log = logger.Logger(sys.stdout, 3) + + korbocc = k2sdf.s2k_mo_coeff(orbocc) + korbvir = k2sdf.s2k_mo_coeff(orbvir) + with_df = k2sdf.with_df + kpts = k2sdf.kpts + qpts = k2sdf.qpts + kikj_by_q = k2sdf.kikj_by_q + nqpts = len(qpts) + naux_by_q = k2sdf.naux_by_q + naux = k2sdf.naux + + dtype = k2sdf.dtype + dsize = k2sdf.dsize + + nao, nocc = korbocc[0].shape + nvir = korbvir[0].shape[1] + nmo = nocc + nvir + + def fao2mo(j3c, mo, i0, i1, p0, p1, buf): + tao = [] + ao_loc = None + ijslice = (i0,i1,nocc,nmo) + + if dtype == np.float64: + Lpq_ao = np.asarray(j3c[p0:p1].real) + return _ao2mo.nr_e2(Lpq_ao, mo, ijslice, aosym='s2', out=buf) + else: + Lpq_ao = np.asarray(j3c[p0:p1]) + if Lpq_ao[0].size != nao**2: # aosym = 's2' + Lpq_ao = lib.unpack_tril(Lpq_ao).astype(np.complex128) + return _ao2mo.r_e2(Lpq_ao, mo, ijslice, tao, ao_loc, out=buf) + + if ovL is None: + ovL = np.empty((nqpts,nocc,nvir,naux), dtype=dtype) + + cput1 = (logger.process_clock(), logger.perf_counter()) + + mem_avail = max_memory - lib.current_memory()[0] + if isinstance(ovL, np.ndarray): + # subtract mem for holding ovL incore + mem_avail -= nqpts*naux*nocc*nvir * dsize/1e6 + # incore: batching aux (OV + Nao_pair) * [X] = M + mem_auxblk = (nao**2+nocc*nvir) * dsize/1e6 + aux_blksize = min(naux, max(1, int(np.floor(mem_avail*0.7 / mem_auxblk)))) + if DEBUG_BLKSIZE: aux_blksize = max(1,naux//2) + log.debug('aux blksize for incore ao2mo: %d/%d', aux_blksize, naux) + buf = np.empty(aux_blksize*nocc*nvir, dtype=dtype) + + ovL.fill(0) + for q in range(nqpts): + nauxq = naux_by_q[q] + ovLq = ovL[q] + if nauxq < naux: + ovLq[:,:,nauxq:naux] = 0 + for ki,kj in kikj_by_q[q]: + kpti_kptj = np.asarray((kpts[ki],kpts[kj])) + mo = np.asarray(np.hstack((korbocc[ki], korbvir[kj])), order='F') + with _load3c(with_df._cderi, with_df._dataname, kpti_kptj=kpti_kptj) as j3c: + for p0,p1 in lib.prange(0,nauxq,aux_blksize): + out = fao2mo(j3c, mo, 0, nocc, p0, p1, buf) + ovLq[:,:,p0:p1] += out.reshape(p1-p0,nocc,nvir).transpose(1,2,0) + out = None + ovLq /= nqpts**0.5 + ovLq = None + cput1 = log.timer('ao2mo for qidx %d/%d'%(q+1,nqpts), *cput1) + + buf = None + else: + # outcore: batching occ [O]XV and aux ([O]V + Nao_pair)*[X] + mem_occblk = naux*nvir * dsize/1e6 + occ_blksize = min(nocc, max(1, int(np.floor(mem_avail*0.6 / mem_occblk)))) + if DEBUG_BLKSIZE: occ_blksize = max(1,nocc//2) + mem_auxblk = (occ_blksize*nvir+nao**2) * dsize/1e6 + aux_blksize = min(naux, max(1, int(np.floor(mem_avail*0.3 / mem_auxblk)))) + if DEBUG_BLKSIZE: aux_blksize = max(1,naux//2) + log.debug('occ blksize for outcore ao2mo: %d/%d', occ_blksize, nocc) + log.debug('aux blksize for outcore ao2mo: %d/%d', aux_blksize, naux) + buf = np.empty(naux*occ_blksize*nvir, dtype=dtype) + buf2 = np.empty(aux_blksize*occ_blksize*nvir, dtype=dtype) + + for q in range(nqpts): + nauxq = naux_by_q[q] + if nauxq < naux: + ovL[q,:,:,nauxq:naux] = 0 + for i0,i1 in lib.prange(0, nocc, occ_blksize): + OvL = np.ndarray((i1-i0,nvir,nauxq), buffer=buf, dtype=dtype) + OvL.fill(0) + for ki,kj in kikj_by_q[q]: + kpti_kptj = np.asarray((kpts[ki],kpts[kj])) + mo = np.asarray(np.hstack((korbocc[ki], korbvir[kj])), order='F') + with _load3c(with_df._cderi, with_df._dataname, kpti_kptj=kpti_kptj) as j3c: + for p0,p1 in lib.prange(0,nauxq,aux_blksize): + out = fao2mo(j3c, mo, i0, i1, p0, p1, buf2) + OvL[:,:,p0:p1] += out.reshape(p1-p0,i1-i0,nvir).transpose(1,2,0) + out = None + ovL[q,i0:i1] = OvL/nqpts**0.5 + OvL = None + cput1 = log.timer('ao2mo for qidx %d/%d'%(q+1,nqpts), *cput1) + + buf = buf2 = None + return ovL diff --git a/pyscf/pbc/lno/klnoccsd.py b/pyscf/pbc/lno/klnoccsd.py new file mode 100644 index 000000000..6ec17efb6 --- /dev/null +++ b/pyscf/pbc/lno/klnoccsd.py @@ -0,0 +1,559 @@ +#!/usr/bin/env python +# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Hong-Zhou Ye +# + + +''' KLNO-RCCSD and KLNO-CCSD(T): + + - Original publication of molecular LNO by Kállay and co-workers: + Rolik and Kállay, J. Chem. Phys. 135, 104111 (2011) + + - Publication for periodic KLNO by Ye and Berkelbach: + Ye and Berkelbach, J. Chem. Theory Comput. 2024, 20, 20, 8948–8959 +''' + + +import numpy as np + +from pyscf.pbc.lib.kpts_helper import gamma_point +from pyscf import lib +logger = lib.logger + +# for _contract_vvvv_t2 +from pyscf import __config__ +from pyscf.cc import _ccsd +import ctypes + +from pyscf.lno.lnoccsd import (LNOCCSD, MODIFIED_CCSD, MODIFIED_DFCCSD, _ChemistsERIs, + impurity_solve, get_maskact, _cp) +from pyscf.pbc.lno.klno import KLNO +from pyscf.pbc.lno.tools import K2SDF, zdotCNtoR, zdotNNtoR, k2s_aoint + +FORCE_DFKCC = getattr(__config__, 'lno_cc_kccsd_FORCE_DFKCC', False) # force using DF CC +DEBUG_BLKSIZE = getattr(__config__, 'lno_cc_kccsd_DEBUG_BLKSIZE', False) + + +r''' Beginning of modification of PySCF's (DF)CCSD class for K2S (DF)CCSD + + These functions are modified from pyscf.cc and parallel those in pyscf.cc.lnoccsd. + The major change compared to the latter is using KSCF DF integrals. + + For KSCF that includes the Gamma-point, the two classes + - MODIFIED_K2SCCSD + - MODIFIED_DFK2SCCSD + assume time-reversal symmetry is conserved. As a result, the restored supercell + ERIs are real-valued and can be calculated by + (pq|rs) = \sum_{P}^{naux} \sum_{q}^{Nk} (pq|P,q) (rs|P,q).conj() + + For the general case where time-reversal symmetry is not conserved or a twisted + k-point mesh is used, the two classes + - MODIFIED_K2SCCSD_complex + - MODIFIED_DFK2SCCSD_complex + will be used. The restored supercell ERIs are in general complex-valued and + calculated by + (pq|rs) = \sum_{P}^{naux} \sum_{q}^{Nk} (pq|P,q) (rs|P,-q) +''' +from pyscf.cc import ccsd, dfccsd, rccsd +def K2SCCSD(mf, with_df, frozen, mo_coeff, mo_occ): + ''' with_df is KSCF DF object + ''' + import numpy + from pyscf import lib + from pyscf.soscf import newton_ah + from pyscf import scf + + log = logger.new_logger(mf) + + if isinstance(mf, newton_ah._CIAH_SOSCF) or not isinstance(mf, scf.hf.RHF): + mf = scf.addons.convert_to_rhf(mf) + + ''' auto-choose if using DFCCSD (storing Lvv) or CCSD (storing vvvv) by memory + ''' + k2sdf = K2SDF(with_df) + naux = k2sdf.Naux_ibz + maskocc = mo_occ > 1e-10 + frozen, maskact = get_maskact(frozen, len(mo_occ)) + nvir = np.count_nonzero(~maskocc & maskact) + nvir_pair = nvir*(nvir+1)//2 + mem_avail = mf.max_memory - lib.current_memory()[0] + mem_need = (nvir_pair**2 + nvir_pair*naux) * 8/1024**2. + log.debug1('naux= %d nvir_pair= %d mem_avail= %.1f mem_vvvv= %.1f', + naux, nvir_pair, mem_avail, mem_need) + + if gamma_point(with_df.kpts[0]) and np.isrealobj(mo_coeff): + ''' Gamma-inclusive k-point mesh and time-reversal symmetry conserved + ''' + if not FORCE_DFKCC and (naux > nvir_pair or mem_need < mem_avail * 0.7): + log.debug1('Using CCSD') + return MODIFIED_K2SCCSD(mf, with_df, frozen, mo_coeff, mo_occ) + else: + log.debug1('Using DFCCSD') + return MODIFIED_DFK2SCCSD(mf, with_df, frozen, mo_coeff, mo_occ) + else: + raise NotImplementedError + if not FORCE_DFKCC and (naux > nvir_pair or mem_need < mem_avail * 0.7): + log.debug1('Using complex CCSD') + return MODIFIED_K2SCCSD_complex(mf, with_df, frozen, mo_coeff, mo_occ) + else: + log.debug1('Using complex DFCCSD') + raise NotImplementedError('LNO-DFCCSD not implemented for complex orbitals.') + return MODIFIED_DFK2SCCSD_complex(mf, with_df, frozen, mo_coeff, mo_occ) + +class MODIFIED_K2SCCSD(MODIFIED_CCSD): + _keys = {"k2sdf"} + def __init__(self, mf, with_df, frozen, mo_coeff, mo_occ): + MODIFIED_CCSD.__init__(self, mf, frozen, mo_coeff, mo_occ) + self.k2sdf = K2SDF(with_df) + + def ao2mo(self, mo_coeff=None): + return _make_df_eris_outcore(self, mo_coeff) + +def _make_df_eris_outcore(mycc, mo_coeff=None): + from pyscf.ao2mo import _ao2mo + + cput0 = (logger.process_clock(), logger.perf_counter()) + log = logger.Logger(mycc.stdout, mycc.verbose) + eris = _ChemistsERIs() + eris._common_init_(mycc, mo_coeff) + + mo_coeff = eris.mo_coeff + nocc = eris.nocc + nmo = mo_coeff.shape[1] + nvir = nmo - nocc + nvir_pair = nvir*(nvir+1)//2 + + k2sdf = mycc.k2sdf + Naux = k2sdf.Naux_ibz + naux = k2sdf.naux + naux_by_q = k2sdf.naux_by_q + REAL = np.float64 + COMPLEX = np.complex128 + LooR = np.zeros((Naux,nocc,nocc), dtype=REAL) + LooI = np.zeros((Naux,nocc,nocc), dtype=REAL) + LovR = np.zeros((Naux,nocc,nvir), dtype=REAL) + LovI = np.zeros((Naux,nocc,nvir), dtype=REAL) + LvoR = np.zeros((Naux,nvir,nocc), dtype=REAL) + LvoI = np.zeros((Naux,nvir,nocc), dtype=REAL) + LvvR = np.zeros((Naux,nvir_pair), dtype=REAL) + LvvI = np.zeros((Naux,nvir_pair), dtype=REAL) + buf = np.empty((naux,nmo,nmo), dtype=COMPLEX) + + for qi,q in enumerate(k2sdf.ibz2bz): + nauxq = naux_by_q[q] + p0 = naux * qi + p1 = p0 + nauxq + for (ki,kj),LpqR,LpqI in k2sdf.loop_ao2mo(q,mo_coeff,mo_coeff,buf=buf,real_and_imag=True): + LpqR = LpqR.reshape(nauxq,nmo,nmo) + LpqI = LpqI.reshape(nauxq,nmo,nmo) + LooR[p0:p1] += LpqR[:,:nocc,:nocc] + LooI[p0:p1] += LpqI[:,:nocc,:nocc] + LovR[p0:p1] += LpqR[:,:nocc,nocc:] + LovI[p0:p1] += LpqI[:,:nocc,nocc:] + LvoR[p0:p1] += LpqR[:,nocc:,:nocc] + LvoI[p0:p1] += LpqI[:,nocc:,:nocc] + LvvR[p0:p1] += lib.pack_tril(LpqR[:,nocc:,nocc:]) + LvvI[p0:p1] += lib.pack_tril(LpqI[:,nocc:,nocc:]) + LpqR = LpqI = None + w = k2sdf.qpts_ibz_weights[qi] + LooR[p0:p1] *= w + LooI[p0:p1] *= w + LovR[p0:p1] *= w + LovI[p0:p1] *= w + LvoR[p0:p1] *= w + LvoI[p0:p1] *= w + LvvR[p0:p1] *= w + LvvI[p0:p1] *= w + + LooR = LooR.reshape(Naux,nocc*nocc) + LooI = LooI.reshape(Naux,nocc*nocc) + LovR = LovR.reshape(Naux,nocc*nvir) + LovI = LovI.reshape(Naux,nocc*nvir) + LvoR = LvoR.reshape(Naux,nocc*nvir) + LvoI = LvoI.reshape(Naux,nocc*nvir) + + eris.feri1 = lib.H5TmpFile() + eris.oooo = eris.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') + eris.oovv = eris.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', + chunks=(nocc,nocc,1,nvir)) + eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', + chunks=(nocc,1,nocc,nocc)) + eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', + chunks=(nocc,1,nvir,nocc)) + eris.ovov = eris.feri1.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8', + chunks=(nocc,1,nocc,nvir)) + eris.ovvv = eris.feri1.create_dataset('ovvv', (nocc,nvir,nvir_pair), 'f8') + eris.vvvv = eris.feri1.create_dataset('vvvv', (nvir_pair,nvir_pair), 'f8') + eris.oooo[:] = zdotCNtoR(LooR.T, LooI.T, LooR, LooI).reshape(nocc,nocc,nocc,nocc) + eris.ovoo[:] = zdotCNtoR(LovR.T, LovI.T, LooR, LooI).reshape(nocc,nvir,nocc,nocc) + eris.oovv[:] = lib.unpack_tril(zdotCNtoR(LooR.T, LooI.T, + LvvR , LvvI)).reshape(nocc,nocc,nvir,nvir) + eris.ovvo[:] = zdotCNtoR(LovR.T, LovI.T, LvoR, LvoI).reshape(nocc,nvir,nvir,nocc) + eris.ovov[:] = zdotCNtoR(LovR.T, LovI.T, LovR, LovI).reshape(nocc,nvir,nocc,nvir) + eris.ovvv[:] = zdotCNtoR(LovR.T, LovI.T, LvvR, LvvI).reshape(nocc,nvir,nvir_pair) + eris.vvvv[:] = zdotCNtoR(LvvR.T, LvvI.T, LvvR, LvvI) + + # def contract1(LbraR, LbraI, LketR, LketI): + # out_shape = (LbraR.shape[1], LketR.shape[1]) + # out = np.zeros(out_shape, dtype=REAL) + # for q1,q2 in enumerate(k2sdf.qconserv): + # i0,i1 = k2sdf.get_auxslice(q1) + # j0,j1 = k2sdf.get_auxslice(q2) + # zdotNNtoR(LbraR[i0:i1].T, LbraI[i0:i1].T, LketR[j0:j1], LketI[j0:j1], 1, out, 1) + # return out + # + # eris.oooo[:] = contract1(LooR, LooI, LooR, LooI).reshape(nocc,nocc,nocc,nocc) + # eris.ovoo[:] = contract1(LovR, LovI, LooR, LooI).reshape(nocc,nvir,nocc,nocc) + # eris.oovv[:] = lib.unpack_tril( + # contract1(LooR, LooI, LvvR, LvvI)).reshape(nocc,nocc,nvir,nvir) + # eris.ovvo[:] = contract1(LovR, LovI, LvoR, LvoI).reshape(nocc,nvir,nvir,nocc) + # eris.ovov[:] = contract1(LovR, LovI, LovR, LovI).reshape(nocc,nvir,nocc,nvir) + # eris.ovvv[:] = contract1(LovR, LovI, LvvR, LvvI).reshape(nocc,nvir,nvir_pair) + # eris.vvvv[:] = contract1(LvvR, LvvI, LvvR, LvvI).reshape(nvir_pair,nvir_pair) + + log.timer('CCSD integral transformation', *cput0) + return eris + + +class MODIFIED_DFK2SCCSD(MODIFIED_DFCCSD): + def __init__(self, mf, with_df, frozen, mo_coeff, mo_occ): + MODIFIED_DFCCSD.__init__(self, mf, frozen, mo_coeff, mo_occ) + self.k2sdf = K2SDF(with_df) + + def ao2mo(self, mo_coeff=None): + return _make_df_eris(self, mo_coeff) + +class _K2SDFChemistsERIs(_ChemistsERIs): + def _contract_vvvv_t2(self, mycc, t2, direct=False, out=None, verbose=None): + assert(not direct) + return _contract_vvvv_t2(mycc, self.mol, self.vvLR, self.vvLI, t2, out, verbose) +def _contract_vvvv_t2(mycc, mol, vvLR, vvLI, t2, out=None, verbose=None): + '''Ht2 = np.einsum('ijcd,acdb->ijab', t2, vvvv) + + Args: + vvvv : None or integral object + if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm + ''' + MEMORYMIN = getattr(__config__, 'cc_ccsd_memorymin', 2000) + _dgemm = lib.numpy_helper._dgemm + time0 = logger.process_clock(), logger.perf_counter() + log = logger.new_logger(mol, verbose) + + naux = vvLR.shape[-1] + nvira, nvirb = t2.shape[-2:] + x2 = t2.reshape(-1,nvira,nvirb) + nocc2 = x2.shape[0] + nvir2 = nvira * nvirb + Ht2 = np.ndarray(x2.shape, buffer=out) + Ht2[:] = 0 + + max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0]) + def contract_blk_(eri, i0, i1, j0, j1): + ic = i1 - i0 + jc = j1 - j0 + #:Ht2[:,j0:j1] += np.einsum('xef,efab->xab', x2[:,i0:i1], eri) + _dgemm('N', 'N', nocc2, jc*nvirb, ic*nvirb, + x2.reshape(-1,nvir2), eri.reshape(-1,jc*nvirb), + Ht2.reshape(-1,nvir2), 1, 1, i0*nvirb, 0, j0*nvirb) + + if i0 > j0: + #:Ht2[:,i0:i1] += np.einsum('xef,abef->xab', x2[:,j0:j1], eri) + _dgemm('N', 'T', nocc2, ic*nvirb, jc*nvirb, + x2.reshape(-1,nvir2), eri.reshape(-1,jc*nvirb), + Ht2.reshape(-1,nvir2), 1, 1, j0*nvirb, 0, i0*nvirb) + +#TODO: check if vvL can be entirely loaded into memory + nvir_pair = nvirb * (nvirb+1) // 2 + dmax = np.sqrt(max_memory*.7e6/8/nvirb**2/2) + dmax = int(min((nvira+3)//4, max(ccsd.BLKMIN, dmax))) + vvblk = (max_memory*1e6/8 - dmax**2*(nvirb**2*1.5+naux*2))/(naux*2) + vvblk = int(min((nvira+3)//4, max(ccsd.BLKMIN, vvblk/(naux*2)))) + eribuf = np.empty((dmax,dmax,nvir_pair)) + loadbuf = np.empty((dmax,dmax,nvirb,nvirb)) + tril2sq = lib.square_mat_in_trilu_indices(nvira) + + for i0, i1 in lib.prange(0, nvira, dmax): + off0 = i0*(i0+1)//2 + off1 = i1*(i1+1)//2 + vvLR0 = _cp(vvLR[off0:off1]) + vvLI0 = _cp(vvLI[off0:off1]) + for j0, j1 in lib.prange(0, i1, dmax): + ijLR = vvLR0[tril2sq[i0:i1,j0:j1] - off0].reshape(-1,naux) + ijLI = vvLI0[tril2sq[i0:i1,j0:j1] - off0].reshape(-1,naux) + eri = np.ndarray(((i1-i0)*(j1-j0),nvir_pair), buffer=eribuf) + for p0, p1 in lib.prange(0, nvir_pair, vvblk): + vvLR1 = _cp(vvLR[p0:p1]) + vvLI1 = _cp(vvLI[p0:p1]) + eri[:,p0:p1] = zdotCNtoR(ijLR, ijLI, vvLR1.T, vvLI1.T) + vvLR1 = vvLI1 = None + ijLR = ijLI = None + + tmp = np.ndarray((i1-i0,nvirb,j1-j0,nvirb), buffer=loadbuf) + _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), + eri.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*4)(i0, i1, j0, j1), + ctypes.c_int(nvirb)) + contract_blk_(tmp, i0, i1, j0, j1) + time0 = log.timer_debug1('vvvv [%d:%d,%d:%d]'%(i0,i1,j0,j1), *time0) + vvLR0 = vvLI0 = None + return Ht2.reshape(t2.shape) +def _make_df_eris(mycc, mo_coeff=None): + from pyscf.ao2mo import _ao2mo + + eris = _K2SDFChemistsERIs() + eris._common_init_(mycc, mo_coeff) + nocc = eris.nocc + nmo = eris.fock.shape[0] + nvir = nmo - nocc + nvir_pair = nvir*(nvir+1)//2 + mo_coeff = eris.mo_coeff + + k2sdf = mycc.k2sdf + naux_by_q = k2sdf.naux_by_q + naux = k2sdf.naux + Naux = k2sdf.Naux_ibz + REAL = np.float64 + COMPLEX = np.complex128 + + eris.feri = lib.H5TmpFile() + eris.oooo = eris.feri.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') + eris.ovoo = eris.feri.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', + chunks=(nocc,1,nocc,nocc)) + eris.ovov = eris.feri.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8', + chunks=(nocc,1,nocc,nvir)) + eris.ovvo = eris.feri.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', + chunks=(nocc,1,nvir,nocc)) + eris.oovv = eris.feri.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', + chunks=(nocc,nocc,1,nvir)) + # nrow ~ 4e9/8/blockdim to ensure hdf5 chunk < 4GB + chunks = (min(nvir_pair,int(4e8/k2sdf.blockdim)), min(Naux,k2sdf.blockdim)) + eris.vvLR = eris.feri.create_dataset('vvLR', (nvir_pair,Naux), 'f8', chunks=chunks) + eris.vvLI = eris.feri.create_dataset('vvLI', (nvir_pair,Naux), 'f8', chunks=chunks) + + # estimate aux blksize + mem_avail = mycc.max_memory - lib.current_memory()[0] + mem_avail -= Naux*nocc*nmo * 16/1e6 + mem_block = (nmo**2 + nvir_pair) * 16/1e6 + aux_blksize = max(1, min(naux, int(np.round(np.floor(mem_avail*0.7/mem_block))))) + if DEBUG_BLKSIZE: aux_blksize = max(1, naux//2) + + LooR = np.zeros((Naux,nocc,nocc), dtype=REAL) + LooI = np.zeros((Naux,nocc,nocc), dtype=REAL) + LovR = np.zeros((Naux,nocc,nvir), dtype=REAL) + LovI = np.zeros((Naux,nocc,nvir), dtype=REAL) + + buf = np.empty((aux_blksize,nmo,nmo), dtype=COMPLEX) + bufR = np.empty((aux_blksize,nvir_pair), dtype=REAL) + bufI = np.empty((aux_blksize,nvir_pair), dtype=REAL) + + for qi,q in enumerate(k2sdf.ibz2bz): + nauxq = naux_by_q[q] + if nauxq < naux: + eris.vvLR[:,naux*qi+nauxq:naux*(qi+1)] = 0 + eris.vvLI[:,naux*qi+nauxq:naux*(qi+1)] = 0 + for r0,r1 in lib.prange(0,nauxq,aux_blksize): + auxslice = (r0,r1) + dr = r1-r0 + p0 = naux*qi + r0 + p1 = p0 + dr + LvvR = np.ndarray((dr,nvir_pair), dtype=REAL, buffer=bufR) + LvvI = np.ndarray((dr,nvir_pair), dtype=REAL, buffer=bufI) + LvvR.fill(0) + LvvI.fill(0) + for (ki,kj),LpqR,LpqI in k2sdf.loop_ao2mo(q,mo_coeff,mo_coeff,buf=buf, + real_and_imag=True,auxslice=auxslice): + LpqR = LpqR.reshape(dr,nmo,nmo) + LpqI = LpqI.reshape(dr,nmo,nmo) + LooR[p0:p1] += LpqR[:,:nocc,:nocc] + LooI[p0:p1] += LpqI[:,:nocc,:nocc] + LovR[p0:p1] += LpqR[:,:nocc,nocc:] + LovI[p0:p1] += LpqI[:,:nocc,nocc:] + LvvR[:dr] += lib.pack_tril(LpqR[:,nocc:,nocc:]) + LvvI[:dr] += lib.pack_tril(LpqI[:,nocc:,nocc:]) + LpqR = LpqI = None + w = k2sdf.qpts_ibz_weights[qi] + LooR[p0:p1] *= w + LooI[p0:p1] *= w + LovR[p0:p1] *= w + LovI[p0:p1] *= w + LvvR *= w + LvvI *= w + eris.vvLR[:,p0:p1] = LvvR.T + eris.vvLI[:,p0:p1] = LvvI.T + LvvR = LvvI = None + buf = bufR = bufI = None + + LooR = LooR.reshape(Naux,nocc*nocc) + LooI = LooI.reshape(Naux,nocc*nocc) + LovR = LovR.reshape(Naux,nocc*nvir) + LovI = LovI.reshape(Naux,nocc*nvir) + + eris.oooo[:] = zdotCNtoR(LooR.T, LooI.T, LooR, LooI).reshape(nocc,nocc,nocc,nocc) + eris.ovoo[:] = zdotCNtoR(LovR.T, LovI.T, LooR, LooI).reshape(nocc,nvir,nocc,nocc) + ovov = zdotCNtoR(LovR.T, LovI.T, LovR, LovI).reshape(nocc,nvir,nocc,nvir) + eris.ovov[:] = ovov + eris.ovvo[:] = ovov.transpose(0,1,3,2) + ovov = None + + mem_now = lib.current_memory()[0] + max_memory = max(0, mycc.max_memory - mem_now) + blksize = max(ccsd.BLKMIN, int((max_memory*.9e6/8-nocc**2*nvir_pair)/(nocc**2+Naux))) + oovv_tril = np.empty((nocc*nocc,nvir_pair)) + for p0, p1 in lib.prange(0, nvir_pair, blksize): + oovv_tril[:,p0:p1] = zdotCNtoR(LooR.T, LooI.T, _cp(eris.vvLR[p0:p1]).T, + _cp(eris.vvLI[p0:p1]).T) + eris.oovv[:] = lib.unpack_tril(oovv_tril).reshape(nocc,nocc,nvir,nvir) + oovv_tril = LooR = LooI = None + + LovR = LovR.reshape(Naux,nocc,nvir) + LovI = LovI.reshape(Naux,nocc,nvir) + vblk = max(nocc, int((max_memory*.15e6/8)/(nocc*nvir_pair))) + vvblk = int(min(nvir_pair, 4e8/nocc, max(4, (max_memory*.8e6/8)/(vblk*nocc+Naux)))) + eris.ovvv = eris.feri.create_dataset('ovvv', (nocc,nvir,nvir_pair), 'f8', + chunks=(nocc,1,vvblk)) + for q0, q1 in lib.prange(0, nvir_pair, vvblk): + vvLR = _cp(eris.vvLR[q0:q1]) + vvLI = _cp(eris.vvLI[q0:q1]) + for p0, p1 in lib.prange(0, nvir, vblk): + tmpLovR = _cp(LovR[:,:,p0:p1]).reshape(Naux,-1) + tmpLovI = _cp(LovI[:,:,p0:p1]).reshape(Naux,-1) + eris.ovvv[:,p0:p1,q0:q1] = zdotCNtoR(tmpLovR.T, tmpLovI.T, vvLR.T, + vvLI.T).reshape(nocc,p1-p0,q1-q0) + vvLR = vvLI = None + return eris + +class MODIFIED_K2SCCSD_complex: + pass +class MODIFIED_DFK2SCCSD_complex: + pass + +class KLNOCCSD(KLNO,LNOCCSD): + def __init__(self, kmf, lo_coeff, frag_lolist, lno_type=None, lno_thresh=None, frozen=None, + mf=None): + KLNO.__init__(self, kmf, lo_coeff, frag_lolist, lno_type, lno_thresh, frozen, mf) + + self.efrag_cc = None + self.efrag_pt2 = None + self.efrag_cc_t = None + self.efrag_cc_spin_comp = None + self.efrag_pt2_spin_comp = None + self.ccsd_t = False + + # args for impurity solver + self.kwargs_imp = None + self.verbose_imp = 2 # ERROR and WARNING + + # args for precompute + self._s1e = None + self._h1e = None + self._vhf = None + + def impurity_solve(self, mf, mo_coeff, uocc_loc, eris, frozen=None, log=None): + if log is None: log = logger.new_logger(self) + mo_occ = self.mo_occ + frozen, maskact = get_maskact(frozen, mo_occ.size) + mcc = K2SCCSD(mf, self.with_df, frozen, mo_coeff, mo_occ).set(verbose=self.verbose_imp) + mcc._s1e = self._s1e + mcc._h1e = self._h1e + mcc._vhf = self._vhf + + if self.kwargs_imp is not None: + mcc = mcc.set(**self.kwargs_imp) + + return impurity_solve(mcc, mo_coeff, uocc_loc, mo_occ, maskact, eris, log=log, + ccsd_t=self.ccsd_t, verbose_imp=self.verbose_imp, + max_las_size_ccsd=self._max_las_size_ccsd, + max_las_size_ccsd_t=self._max_las_size_ccsd_t) + +class KLNOCCSD_T(KLNOCCSD): + def __init__(self, *args, **kwargs): + KLNOCCSD.__init__(self, *args, **kwargs) + self.ccsd_t = True + + +if __name__ == '__main__': + from pyscf.pbc import gto, scf, mp + from pyscf import lo + from pyscf.pbc.lno.tools import k2s_scf + from pyscf.pbc.lno.tools import sort_orb_by_cell + from pyscf.lno import LNOCCSD + + atom = ''' + O 0.00000 0.00000 0.11779 + H 0.00000 0.75545 -0.47116 + H 0.00000 -0.75545 -0.47116 + ''' + a = np.eye(3) * 4 + basis = 'cc-pvdz' + kmesh = [3,1,1] + + scaled_center = None + + cell = gto.M(atom=atom, basis=basis, a=a).set(verbose=4) + kpts = cell.make_kpts(kmesh, scaled_center=scaled_center) + + kmf = scf.KRHF(cell, kpts=kpts).density_fit() + kmf.kernel() + + mf = k2s_scf(kmf) + +# KLNO with PM localized orbitals + # PM localization within the BvK supercell + orbocc = mf.mo_coeff[:,mf.mo_occ>1e-6] + mlo = lo.PipekMezey(mf.cell, orbocc) + lo_coeff = mlo.kernel() + while True: # always performing jacobi sweep to avoid trapping in local minimum/saddle point + lo_coeff1 = mlo.stability_jacobi()[1] + if lo_coeff1 is lo_coeff: + break + mlo = lo.PipekMezey(mf.mol, lo_coeff1).set(verbose=4) + mlo.init_guess = None + lo_coeff = mlo.kernel() + + # sort LOs by unit cell + s1e = mf.get_ovlp() + Nk = len(kpts) + nlo = lo_coeff.shape[1]//Nk + lo_coeff = sort_orb_by_cell(mf.cell, lo_coeff, Nk, s=s1e) + + frag_lolist = [[i] for i in range(nlo)] + + # Optional: precompute h1e within supercell from K2S transform + h1e = k2s_aoint(cell, kpts, kmf.get_hcore()) + + # KLNOCCSD(T) calculations + # kmlno = KLNOCCSD_T(kmf, lo_coeff, frag_lolist, mf=mf).set(verbose=5) + kmlno = KLNOCCSD(kmf, lo_coeff, frag_lolist, mf=mf).set(verbose=5) + kmlno._h1e = h1e + kmlno.lno_thresh = [1e-4, 1e-5] + kmlno.kernel() + + # Supercell LNOCCSD(T) calculation (the two should match!) + frag_lolist = [[i] for i in range(nlo*Nk)] + # mlno = LNOCCSD_T(mf, lo_coeff, frag_lolist) + mlno = LNOCCSD(mf, lo_coeff, frag_lolist) + mlno._h1e = h1e + mlno.lno_thresh = [1e-4, 1e-5] + mlno.kernel() + + def print_compare(name, ek, es): + print(f'{name:9s} Ecorr: {ek: 14.9f} {es: 14.9f} diff: {es-ek: 14.9f}') + + print() + print('Comparing KLNO with supercell LNO (normalized to per cell):') + print_compare('LNOMP2', kmlno.e_corr_pt2, mlno.e_corr_pt2/Nk) + print_compare('LNOCCSD', kmlno.e_corr_ccsd, mlno.e_corr_ccsd/Nk) + # print_compare('LNOCCSD_T', kmlno.e_corr_ccsd_t, mlno.e_corr_ccsd_t/Nk) diff --git a/pyscf/pbc/lno/make_lno_rdm1.py b/pyscf/pbc/lno/make_lno_rdm1.py new file mode 100644 index 000000000..08cfe619d --- /dev/null +++ b/pyscf/pbc/lno/make_lno_rdm1.py @@ -0,0 +1,962 @@ +''' Make MP2 rdm1 of different flavors + + Args: + eris : + Provide access to the MO basis DF integral `ovL` through the following methods: + get_occ_blk(i0,i1) -> ([i0:i1]v|L) + get_vir_blk(a0,a1) -> (o[a0:a1]|L) + xform_occ(u) -> einsum('iaL,iI->IaL', ovL, u) + xform_vir(u) -> einsum('iaL,aA->iAL', ovL, u) + orbact_data : tuple + moeocc, moevir, uoccact, uviract = orbact_data + where + - `moeocc` and `moevir` are the MO energy for the occupied and virtual MOs + used to obtain `ovL` + - `uoccact` and `uviract` are the overlap matrix between canonical and active + orbitals, i.e., uoccact[i,I] = and uviract[a,A] = . + dm_type : str + '1h'/'1p'/'2p' for occ and '1p'/'1h'/'2h' for vir. +''' +import sys +import numpy as np + +from pyscf import lib +from pyscf.lib import logger +from pyscf import __config__ + +from pyscf.lno.make_lno_rdm1 import subspace_eigh, _mp2_rdm1_occblksize, _mp2_rdm1_virblksize +from pyscf.pbc.lno.tools import zdotCNtoR + +DEBUG_BLKSIZE = getattr(__config__, 'lno_base_make_rdm1_k2s_DEBUG_BLKSIZE', False) + + +def make_lo_rdm1_occ(eris, moeocc, moevir, uocc, uvir, dm_type): + isreal = eris.dtype == np.float64 + if dm_type == '1h': + if isreal: + dm = make_lo_rdm1_occ_1h_real(eris, moeocc, moevir, uocc) + else: + dm = make_lo_rdm1_occ_1h_complex(eris, moeocc, moevir, uocc) + elif dm_type == '1p': + if isreal: + dm = make_lo_rdm1_occ_1p_real(eris, moeocc, moevir, uvir) + else: + dm = make_lo_rdm1_occ_1p_complex(eris, moeocc, moevir, uvir) + elif dm_type == '2p': + if isreal: + dm = make_lo_rdm1_occ_2p_real(eris, moeocc, moevir, uvir) + else: + dm = make_lo_rdm1_occ_2p_complex(eris, moeocc, moevir, uvir) + else: + raise RuntimeError('Requested occ LNO type "%s" is unknown.' % dm_type) + dm = _check_dm_imag(eris, dm) + return dm + +def make_lo_rdm1_vir(eris, moeocc, moevir, uocc, uvir, dm_type): + isreal = eris.dtype == np.float64 + if dm_type == '1p': + if isreal: + dm = make_lo_rdm1_vir_1p_real(eris, moeocc, moevir, uvir) + else: + dm = make_lo_rdm1_vir_1p_complex(eris, moeocc, moevir, uvir) + elif dm_type == '1h': + if isreal: + dm = make_lo_rdm1_vir_1h_real(eris, moeocc, moevir, uocc) + else: + dm = make_lo_rdm1_vir_1h_complex(eris, moeocc, moevir, uocc) + elif dm_type == '2h': + if isreal: + dm = make_lo_rdm1_vir_2h_real(eris, moeocc, moevir, uocc) + else: + dm = make_lo_rdm1_vir_2h_complex(eris, moeocc, moevir, uocc) + else: + raise RuntimeError('Requested vir LNO type "%s" is unknown.' % dm_type) + dm = _check_dm_imag(eris, dm) + return dm + +''' make lo rdm1 for real orbitals +''' +def make_full_rdm1(eris, moeocc, moevir, with_occ=True, with_vir=True): + r''' Occ-occ and vir-vir blocks of MP2 density matrix + + Math: + dm(i,j) + = 2 * \sum_{kab} t2(ikab).conj() * ( 2*t2(jkab) - t2(jkba) ) + dm(a,b) + = 2 * \sum_{ijc} t2(ijac) * ( 2*t2(ijbc) - t2(ijcb) ).conj() + ''' + assert( with_occ or with_vir ) + + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_full_rdm1 : nocc = %d nvir = %d naux = %d occblksize = %d ' + 'peak mem = %.2f MB', nocc, nvir, naux, occblksize, mem_peak) + bufsize = occblksize*min(occblksize,nocc)*nvir**2 + buf = np.empty(bufsize, dtype=REAL) + + eov = moeocc[:,None] - moevir + + dmoo = np.zeros((nocc,nocc), dtype=REAL) if with_occ else None + dmvv = np.zeros((nvir,nvir), dtype=REAL) if with_vir else None + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + ivLR, ivLI = eris.get_occ_blk(i0,i1) + ivLR = ivLR.reshape(-1,naux) + ivLI = ivLI.reshape(-1,naux) + eiv = eov[i0:i1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + jvLR, jvLI = ivLR, ivLI + ejv = eiv + else: + jvLR, jvLI = eris.get_occ_blk(j0,j1) + jvLR = jvLR.reshape(-1,naux) + jvLI = jvLI.reshape(-1,naux) + ejv = eov[j0:j1] + denom = lib.direct_sum('ia+jb->iajb', eiv, ejv) + t2ijvv = np.ndarray((ivLR.shape[0],jvLR.shape[0]), dtype=REAL, buffer=buf) + zdotCNtoR(ivLR, ivLI, jvLR.T, jvLI.T, cR=t2ijvv) + t2ijvv = t2ijvv.reshape(*denom.shape) + t2ijvv /= denom + jvLR = jvLI = None + denom = None + if with_occ: + dmoo[i0:i1,j0:j1] = 4*lib.einsum('iakc.jbkc->ij', t2ijvv, t2ijvv) + dmoo[i0:i1,j0:j1] -= 2*lib.einsum('iakc.jckb->ij', t2ijvv, t2ijvv) + if with_vir: + dmvv = 4*lib.einsum('iajc,ibjc->ab', t2ijvv, t2ijvv) + dmvv -= 2*lib.einsum('iajc,icjb->ab', t2ijvv, t2ijvv) + t2ijvv = None + ivLR = ivLI = None + buf = None + + return dmoo, dmvv + +def make_lo_rdm1_occ_1h_real(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized hole + + Math: + dm(i,j) + = 2 * \sum_{k'ab} t2(ik'ab) ( 2*t2(jk'ab) - t2(jk'ba) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + assert(u.dtype == REAL) + nOcc = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_lo_rdm1_occ_1h : nocc = %d nvir = %d nOcc = %d naux = %d ' + 'occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, occblksize, mem_peak) + bufsize = occblksize*min(occblksize,nOcc)*nvir**2 + buf1 = np.empty(bufsize, dtype=REAL) + buf2 = np.empty(bufsize, dtype=REAL) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eov = moeocc[:,None] - moevir + eOv = moeOcc[:,None] - moevir + + dm = np.zeros((nocc,nocc), dtype=REAL) + for Kbatch,(K0,K1) in enumerate(lib.prange(0,nOcc,occblksize)): + KvLR, KvLI = eris.xform_occ(u[:,K0:K1]) + KvLR = KvLR.reshape(-1,naux) + KvLI = KvLI.reshape(-1,naux) + eKv = eOv[K0:K1] + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + eiv = eov[i0:i1] + eivKv = lib.direct_sum('ia+Kb->iaKb', eiv, eKv) + ivLR, ivLI = eris.get_occ_blk(i0,i1) + ivLR = ivLR.reshape(-1,naux) + ivLI = ivLI.reshape(-1,naux) + t2ivKv = np.ndarray((ivLR.shape[0],KvLR.shape[0]), dtype=REAL, buffer=buf1) + zdotCNtoR(ivLR, ivLI, KvLR.T, KvLI.T, cR=t2ivKv) + t2ivKv = t2ivKv.reshape(*eivKv.shape) + t2ivKv /= eivKv + ivLR = ivLI = None + eivKv = None + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + t2jvKv = t2ivKv + else: + ejv = eov[j0:j1] + ejvKv = lib.direct_sum('ia+Kb->iaKb', ejv, eKv) + jvLR, jvLI = eris.get_occ_blk(j0,j1) + jvLR = jvLR.reshape(-1,naux) + jvLI = jvLI.reshape(-1,naux) + t2jvKv = np.ndarray((jvLR.shape[0],KvLR.shape[0]), dtype=REAL, buffer=buf2) + zdotCNtoR(jvLR, jvLI, KvLR.T, KvLI.T, cR=t2jvKv) + t2jvKv = t2jvKv.reshape(*ejvKv.shape) + t2jvKv /= ejvKv + jvLR = jvLI = None + ejvKv = None + + dm[i0:i1,j0:j1] -= 4 * lib.einsum('iaKb,jaKb->ij', t2ivKv, t2jvKv) + dm[i0:i1,j0:j1] += 2 * lib.einsum('iaKb,jbKa->ij', t2ivKv, t2jvKv) + + t2jvKv = None + t2ivKv = None + KvLR = KvLI = None + buf1 = buf2 = None + + return dm + +def make_lo_rdm1_occ_1p_real(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized particle + + Math: + dm(i,j) + = 2 * \sum_{k'ab} t2(ik'ab) ( 2*t2(jk'ab) - t2(jk'ba) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized virtual orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + assert(u.dtype == REAL) + nVir = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nvir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: virblksize = max(1,nvir//2) + logger.debug1(eris, 'make_lo_rdm1_occ_1p : nocc = %d nvir = %d nVir = %d naux = %d ' + 'virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, virblksize, mem_peak) + bufsize = virblksize*min(virblksize,nVir)*nocc**2 + buf = np.empty(bufsize, dtype=REAL) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eov = moeocc[:,None] - moevir + eoV = moeocc[:,None] - moeVir + + dm = np.zeros((nocc,nocc), dtype=REAL) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,virblksize)): + oALR, oALI = eris.xform_vir(u[:,A0:A1]) + oALR = oALR.reshape(-1,naux) + oALI = oALI.reshape(-1,naux) + eoA = eoV[:,A0:A1] + for bbatch,(b0,b1) in enumerate(lib.prange(0,nvir,virblksize)): + eob = eov[:,b0:b1] + eoAob = lib.direct_sum('iA+jb->iAjb', eoA, eob) + + obLR, obLI = eris.get_vir_blk(b0,b1) + obLR = obLR.reshape(-1,naux) + obLI = obLI.reshape(-1,naux) + t2oAob = np.ndarray((oALR.shape[0],obLR.shape[0]), dtype=REAL, buffer=buf) + zdotCNtoR(oALR, oALI, obLR.T, obLI.T, cR=t2oAob) + t2oAob = t2oAob.reshape(*eoAob.shape) + t2oAob /= eoAob + eoAob = None + obLR = obLI = None + + dm -= 2 * lib.einsum('iAkb,jAkb->ij', t2oAob, t2oAob) + dm += lib.einsum('iAkb,kAjb->ij', t2oAob, t2oAob) + dm += lib.einsum('kAib,jAkb->ij', t2oAob, t2oAob) + dm -= 2 * lib.einsum('kAib,kAjb->ij', t2oAob, t2oAob) + + t2oAob = None + oALR = oALI = None + buf = None + + return dm + +def make_lo_rdm1_occ_2p_real(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with two localized particles + + Math: + dm(i,j) + = 2 * \sum_{ka'b'} t2(ika'b') ( 2*t2(jka'b') - t2(jkb'a') ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized virtual orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + assert(u.dtype == REAL) + nVir = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + Virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nVir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: Virblksize = max(1,nVir//2) + logger.debug1(eris, 'make_lo_rdm1_occ_2p: nocc = %d nvir = %d nVir = %d naux = %d ' + 'Virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, Virblksize, mem_peak) + bufsize = (Virblksize*nocc)**2 + buf = np.empty(bufsize, dtype=REAL) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eoV = moeocc[:,None] - moeVir + + dm = np.zeros((nocc,nocc), dtype=REAL) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,Virblksize)): + oALR, oALI = eris.xform_vir(u[:,A0:A1]) + oALR = oALR.reshape(-1,naux) + oALI = oALI.reshape(-1,naux) + eoA = eoV[:,A0:A1] + for Bbatch,(B0,B1) in enumerate(lib.prange(0,nVir,Virblksize)): + if Bbatch == Abatch: + eoB = eoA + oBLR = oALR + oBLI = oALI + else: + eoB = eoV[:,B0:B1] + oBLR, oBLI = eris.xform_vir(u[:,B0:B1]) + oBLR = oBLR.reshape(-1,naux) + oBLI = oBLI.reshape(-1,naux) + + eoAoB = lib.direct_sum('iA+jB->iAjB', eoA, eoB) + t2oAoB = np.ndarray((oALR.shape[0], oBLR.shape[0]), dtype=REAL, buffer=buf) + zdotCNtoR(oALR, oALI, oBLR.T, oBLI.T, cR=t2oAoB) + t2oAoB = t2oAoB.reshape(*eoAoB.shape) + t2oAoB /= eoAoB + eoAoB = None + oBLR = oBLI = None + + dm -= 4 * lib.einsum('iAkB,jAkB->ij', t2oAoB, t2oAoB) + dm += 2 * lib.einsum('iAkB,kAjB->ij', t2oAoB, t2oAoB) + + t2oAoB = None + oALR = oALI = None + buf = None + + return dm + +def make_lo_rdm1_vir_1p_real(eris, moeocc, moevir, u): + r''' Virtual MP2 density matrix with one localized particle + + Math: + dm(a,b) + = \sum_{ijc'} 2 * t2(ijac') * ( 2 * t2(ijbc') - t2(jibc') ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + assert(u.dtype == REAL) + nVir = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nvir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: virblksize = max(1,nvir//2) + logger.debug1(eris, 'make_lo_rdm1_vir_1p : nocc = %d nvir = %d nVir = %d naux = %d ' + 'virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, virblksize, mem_peak) + bufsize = nvir*min(nvir,virblksize)*nocc**2 + buf1 = np.empty(bufsize, dtype=REAL) + buf2 = np.empty(bufsize, dtype=REAL) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eov = moeocc[:,None] - moevir + eoV = moeocc[:,None] - moeVir + + # TODO: can we batch over occ index? + dm = np.zeros((nvir,nvir), dtype=REAL) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,virblksize)): + oALR, oALI = eris.xform_vir(u[:,A0:A1]) + oALR = oALR.reshape(-1,naux) + oALI = oALI.reshape(-1,naux) + eoA = eoV[:,A0:A1] + for abatch,(a0,a1) in enumerate(lib.prange(0,nvir,virblksize)): + eoa = eov[:,a0:a1] + eoAoa = lib.direct_sum('iA+jb->iAjb', eoA, eoa) + oaLR, oaLI = eris.get_vir_blk(a0,a1) + oaLR = oaLR.reshape(-1,naux) + oaLI = oaLI.reshape(-1,naux) + t2oAoa = np.ndarray((oALR.shape[0], oaLR.shape[0]), dtype=REAL, buffer=buf1) + zdotCNtoR(oALR, oALI, oaLR.T, oaLI.T, cR=t2oAoa) + t2oAoa = t2oAoa.reshape(*eoAoa.shape) + t2oAoa /= eoAoa + eoAoa = None + oaLR = oaLI = None + for bbatch,(b0,b1) in enumerate(lib.prange(0,nvir,virblksize)): + if abatch == bbatch: + t2oAob = t2oAoa + else: + eob = eov[:,b0:b1] + eoAob = lib.direct_sum('iA+jb->iAjb', eoA, eob) + obLR, obLI = eris.get_vir_blk(b0,b1) + obLR = obLR.reshape(-1,naux) + obLI = obLI.reshape(-1,naux) + t2oAob = np.ndarray((oALR.shape[0], obLR.shape[0]), dtype=REAL, buffer=buf2) + zdotCNtoR(oALR, oALI, obLR.T, obLI.T, cR=t2oAob) + t2oAob = t2oAob.reshape(*eoAob.shape) + t2oAob /= eoAob + eoAob = None + obLR = obLI = None + + dm[a0:a1,b0:b1] += 4 * lib.einsum('iAja,iAjb->ab', t2oAoa, t2oAob) + dm[a0:a1,b0:b1] -= 2 * lib.einsum('iAja,jAib->ab', t2oAoa, t2oAob) + + t2oAob = None + t2oAoa = None + oALR = oALI = None + buf1 = buf2 = None + + return dm + +def make_lo_rdm1_vir_1h_real(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized hole + + Math: + dm(a,b) + = \sum_{i'jc} 2 * t2(i'jac) * t2(i'jbc) + 2 * t2(i'jca) * t2(i'jcb) + - t2(i'jac) * t2(i'jcb) - t2(i'jca) * t2(i'jbc) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + assert(u.dtype == REAL) + nOcc = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_lo_rdm1_vir_1h : nocc = %d nvir = %d nOcc = %d naux = %d ' + 'occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, occblksize, mem_peak) + bufsize = nocc*min(nocc,occblksize)*nvir**2 + buf = np.empty(bufsize, dtype=REAL) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eOv = moeOcc[:,None] - moevir + eov = moeocc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=REAL) + for Ibatch,(I0,I1) in enumerate(lib.prange(0,nOcc,occblksize)): + IvLR, IvLI = eris.xform_occ(u[:,I0:I1]) + IvLR = IvLR.reshape(-1,naux) + IvLI = IvLI.reshape(-1,naux) + eIv = eOv[I0:I1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + ejv = eov[j0:j1] + eIvjv = lib.direct_sum('Ia+jb->Iajb', eIv, ejv) + jvLR, jvLI = eris.get_occ_blk(j0,j1) + jvLR = jvLR.reshape(-1,naux) + jvLI = jvLI.reshape(-1,naux) + t2Ivjv = np.ndarray((IvLR.shape[0], jvLR.shape[0]), dtype=REAL, buffer=buf) + zdotCNtoR(IvLR, IvLI, jvLR.T, jvLI.T, cR=t2Ivjv) + t2Ivjv = t2Ivjv.reshape(*eIvjv.shape) + t2Ivjv /= eIvjv + eIvjv = None + jvLR = jvLI = None + + dm += 2 * lib.einsum('Iajc,Ibjc->ab', t2Ivjv, t2Ivjv) + dm -= lib.einsum('Iajc,Icjb->ab', t2Ivjv, t2Ivjv) + dm -= lib.einsum('Icja,Ibjc->ab', t2Ivjv, t2Ivjv) + dm += 2 * lib.einsum('Icja,Icjb->ab', t2Ivjv, t2Ivjv) + + t2Ivjv = None + IvLR = IvLI = None + buf = None + + return dm + +def make_lo_rdm1_vir_2h_real(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with two localized holes + + Math: + dm(a,b) + = 2 * \sum_{i'j'c} t2(i'j'ac) ( 2*t2(i'j'bc) - t2(i'j'cb) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.Naux_ibz + REAL = np.float64 + dsize = 8 + assert(u.dtype == REAL) + nOcc = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + Occblksize, mem_peak = _mp2_rdm1_occblksize(nOcc,nvir,naux, 4, 3, M, dsize) + if DEBUG_BLKSIZE: Occblksize = max(1,nOcc//2) + logger.debug1(eris, 'make_lo_rdm1_vir_2h: nocc = %d nvir = %d nOcc = %d naux = %d ' + 'Occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, Occblksize, mem_peak) + bufsize = (Occblksize*nvir)**2 + buf = np.empty(bufsize, dtype=REAL) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eOv = moeOcc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=REAL) + for Ibatch,(I0,I1) in enumerate(lib.prange(0,nOcc,Occblksize)): + IvLR, IvLI = eris.xform_occ(u[:,I0:I1]) + IvLR = IvLR.reshape(-1,naux) + IvLI = IvLI.reshape(-1,naux) + eIv = eOv[I0:I1] + for Jbatch,(J0,J1) in enumerate(lib.prange(0,nOcc,Occblksize)): + if Jbatch == Ibatch: + eJv = eIv + JvLR = IvLR + JvLI = IvLI + else: + eJv = eOv[J0:J1] + JvLR, JvLI = eris.xform_occ(u[:,J0:J1]) + JvLR = JvLR.reshape(-1,naux) + JvLI = JvLI.reshape(-1,naux) + + eIvJv = lib.direct_sum('Ia+Jb->IaJb', eIv, eJv) + t2IvJv = np.ndarray((IvLR.shape[0], JvLR.shape[0]), dtype=REAL, buffer=buf) + zdotCNtoR(IvLR, IvLI, JvLR.T, JvLI.T, cR=t2IvJv) + t2IvJv = t2IvJv.reshape(*eIvJv.shape) + t2IvJv /= eIvJv + eIvJv = None + JvLR = JvLI = None + + dm += 4 * lib.einsum('IaJc,IbJc->ab', t2IvJv, t2IvJv) + dm -= 2 * lib.einsum('IaJc,IcJb->ab', t2IvJv, t2IvJv) + + t2IvJv = None + IvLR = IvLI = None + buf = None + + return dm + + +''' make lo rdm1 for complex orbitals +''' +def make_lo_rdm1_occ_1h_complex(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized hole + + Math: + dm(i,j) + = 2 * \sum_{k'ab} t2(ik'ab) ( 2*t2(jk'ab) - t2(jk'ba) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + nqpts = len(eris.qpts) + dtype = eris.dtype + dsize = eris.dsize + nOcc = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, nqpts+2, 3, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_lo_rdm1_occ_1h : nocc = %d nvir = %d nOcc = %d naux = %d ' + 'occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, occblksize, mem_peak) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eov = moeocc[:,None] - moevir + eOv = moeOcc[:,None] - moevir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for Kbatch,(K0,K1) in enumerate(lib.prange(0,nOcc,occblksize)): + KvL = [eris.xform_occ(q, u[:,K0:K1]) for q in range(nqpts)] + eKv = eOv[K0:K1] + for ibatch,(i0,i1) in enumerate(lib.prange(0,nocc,occblksize)): + eiv = eov[i0:i1] + eiKvv = lib.direct_sum('ia+Kb->iKab', eiv, eKv) + t2iKvv = 0 + for q1,q2 in enumerate(eris.qconserv): + t2iKvv += lib.einsum('iax,Kbx->iKab', eris.get_occ_blk(q1,i0,i1), KvL[q2]) + conj_(t2iKvv) + t2iKvv /= eiKvv + eiKvv = None + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + if jbatch == ibatch: + t2jKvv = t2iKvv + else: + ejv = eov[j0:j1] + ejKvv = lib.direct_sum('ia+Kb->iKab', ejv, eKv) + t2jKvv = 0 + for q1,q2 in enumerate(eris.qconserv): + t2jKvv += lib.einsum('iax,Kbx->iKab', eris.get_occ_blk(q1,j0,j1), KvL[q2]) + conj_(t2jKvv) + t2jKvv /= ejKvv + ejKvv = None + + dm[i0:i1,j0:j1] -= 4 * lib.einsum('iKab,jKab->ij', np.conj(t2iKvv), t2jKvv) + dm[i0:i1,j0:j1] += 2 * lib.einsum('iKab,jKba->ij', np.conj(t2iKvv), t2jKvv) + + t2jKvv = None + t2iKvv = None + KvL = None + + return dm + +def make_lo_rdm1_occ_1p_complex(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized particle + + Math: + dm(i,j) + = 2 * \sum_{k'ab} t2(ik'ab) ( 2*t2(jk'ab) - t2(jk'ba) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized virtual orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + nqpts = len(eris.qpts) + dtype = eris.dtype + dsize = eris.dsize + nVir = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nvir,naux, nqpts+1, 2, M, dsize) + if DEBUG_BLKSIZE: virblksize = max(1,nvir//2) + logger.debug1(eris, 'make_lo_rdm1_occ_1p : nocc = %d nvir = %d nVir = %d naux = %d ' + 'virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, virblksize, mem_peak) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eov = moeocc[:,None] - moevir + eoV = moeocc[:,None] - moeVir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,virblksize)): + oAL = [eris.xform_vir(q,u[:,A0:A1]) for q in range(nqpts)] + eoA = eoV[:,A0:A1] + for bbatch,(b0,b1) in enumerate(lib.prange(0,nvir,virblksize)): + eob = eov[:,b0:b1] + eooAb = lib.direct_sum('iA+jb->ijAb', eoA, eob) + t2ooAb = 0 + for q1,q2 in enumerate(eris.qconserv): + t2ooAb += lib.einsum('iAx,jbx->ijAb', oAL[q1], eris.get_vir_blk(q2,b0,b1)) + conj_(t2ooAb) + t2ooAb /= eooAb + eooAb = None + + dm -= 2 * lib.einsum('ikAb,jkAb->ij', np.conj(t2ooAb), t2ooAb) + dm += lib.einsum('ikAb,kjAb->ij', np.conj(t2ooAb), t2ooAb) + dm += lib.einsum('kiAb,jkAb->ij', np.conj(t2ooAb), t2ooAb) + dm -= 2 * lib.einsum('kiAb,kjAb->ij', np.conj(t2ooAb), t2ooAb) + + t2ooAb = None + oAL = None + + return dm + +def make_lo_rdm1_occ_2p_complex(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with two localized particles + + Math: + dm(i,j) + = 2 * \sum_{ka'b'} t2(ika'b') ( 2*t2(jka'b') - t2(jkb'a') ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized virtual orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + nqpts = len(eris.qpts) + dtype = eris.dtype + dsize = eris.dsize + nVir = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + Virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nVir,naux, nqpts+1, 2, M, dsize) + if DEBUG_BLKSIZE: Virblksize = max(1,nVir//2) + logger.debug1(eris, 'make_lo_rdm1_occ_2p: nocc = %d nvir = %d nVir = %d naux = %d ' + 'Virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, Virblksize, mem_peak) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eoV = moeocc[:,None] - moeVir + + dm = np.zeros((nocc,nocc), dtype=dtype) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,Virblksize)): + oAL = [eris.xform_vir(q,u[:,A0:A1]) for q in range(nqpts)] + eoA = eoV[:,A0:A1] + for Bbatch,(B0,B1) in enumerate(lib.prange(0,nVir,Virblksize)): + if Bbatch == Abatch: + eoB = eoA + else: + eoB = eoV[:,B0:B1] + eooAB = lib.direct_sum('iA+jB->ijAB', eoA, eoB) + + t2ooAB = 0 + if Bbatch == Abatch: + for q1,q2 in enumerate(eris.qconserv): + t2ooAB += lib.einsum('iAx,jBx->ijAB', oAL[q1], oAL[q2]) + else: + for q1,q2 in enumerate(eris.qconserv): + t2ooAB += lib.einsum('iAx,jBx->ijAB', oAL[q1], eris.xform_vir(q2, u[:,B0:B1])) + conj_(t2ooAB) + t2ooAB /= eooAB + + eooAB = None + + dm -= 4 * lib.einsum('ikAB,jkAB->ij', np.conj(t2ooAB), t2ooAB) + dm += 2 * lib.einsum('ikAB,kjAB->ij', np.conj(t2ooAB), t2ooAB) + + t2ooAB = None + oAL = None + + return dm + +def make_lo_rdm1_vir_1p_complex(eris, moeocc, moevir, u): + r''' Virtual MP2 density matrix with one localized particle + + Math: + dm(a,b) + = \sum_{ijc'} 2 * t2(ijac') * ( 2 * t2(ijbc') - t2(jibc') ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(a,a') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + nqpts = len(eris.qpts) + dtype = eris.dtype + dsize = eris.dsize + nVir = u.shape[1] + + # determine occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + virblksize, mem_peak = _mp2_rdm1_virblksize(nocc,nvir,naux, nqpts+2, 3, M, dsize) + if DEBUG_BLKSIZE: virblksize = max(1,nvir//2) + logger.debug1(eris, 'make_lo_rdm1_vir_1p : nocc = %d nvir = %d nVir = %d naux = %d ' + 'virblksize = %d peak mem = %.2f MB', + nocc, nvir, nVir, naux, virblksize, mem_peak) + + moeVir, u = subspace_eigh(np.diag(moevir), u) + eov = moeocc[:,None] - moevir + eoV = moeocc[:,None] - moeVir + + # TODO: can we batch over occ index? + dm = np.zeros((nvir,nvir), dtype=dtype) + for Abatch,(A0,A1) in enumerate(lib.prange(0,nVir,virblksize)): + oAL = [eris.xform_vir(q, u[:,A0:A1]) for q in range(nqpts)] + eoA = eoV[:,A0:A1] + for abatch,(a0,a1) in enumerate(lib.prange(0,nvir,virblksize)): + eoa = eov[:,a0:a1] + eooAa = lib.direct_sum('iA+jb->ijAb', eoA, eoa) + t2ooAa = 0 + for q1,q2 in enumerate(eris.qconserv): + t2ooAa += lib.einsum('iAx,jbx->ijAb', oAL[q1], eris.get_vir_blk(q2,a0,a1)) + conj_(t2ooAa) + t2ooAa /= eooAa + eooAa = None + for bbatch,(b0,b1) in enumerate(lib.prange(0,nvir,virblksize)): + if abatch == bbatch: + t2ooAb = t2ooAa + else: + eob = eov[:,b0:b1] + eooAb = lib.direct_sum('iA+jb->ijAb', eoA, eob) + t2ooAb = 0 + for q1,q2 in enumerate(eris.qconserv): + t2ooAb += lib.einsum('iAx,jbx->ijAb', oAL[q1], eris.get_vir_blk(q2,b0,b1)) + conj_(t2ooAb) + t2ooAb /= eooAb + eooAb = None + + dm[a0:a1,b0:b1] += 4 * lib.einsum('ijAa,ijAb->ab', t2ooAa, np.conj(t2ooAb)) + dm[a0:a1,b0:b1] -= 2 * lib.einsum('ijAa,jiAb->ab', t2ooAa, np.conj(t2ooAb)) + + t2ooAb = None + t2ooAa = None + oAL = None + + return dm + +def make_lo_rdm1_vir_1h_complex(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with one localized hole + + Math: + dm(a,b) + = \sum_{i'jc} 2 * t2(i'jac) * t2(i'jbc) + 2 * t2(i'jca) * t2(i'jcb) + - t2(i'jac) * t2(i'jcb) - t2(i'jca) * t2(i'jbc) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + nqpts = len(eris.qpts) + dtype = eris.dtype + dsize = eris.dsize + nOcc = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + occblksize, mem_peak = _mp2_rdm1_occblksize(nocc,nvir,naux, nqpts+1, 2, M, dsize) + if DEBUG_BLKSIZE: occblksize = max(1,nocc//2) + logger.debug1(eris, 'make_lo_rdm1_vir_1h : nocc = %d nvir = %d nOcc = %d naux = %d ' + 'occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, occblksize, mem_peak) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eOv = moeOcc[:,None] - moevir + eov = moeocc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=dtype) + for Ibatch,(I0,I1) in enumerate(lib.prange(0,nOcc,occblksize)): + IvL = [eris.xform_occ(q,u[:,I0:I1]) for q in range(nqpts)] + eIv = eOv[I0:I1] + for jbatch,(j0,j1) in enumerate(lib.prange(0,nocc,occblksize)): + ejv = eov[j0:j1] + eIjvv = lib.direct_sum('Ia+jb->Ijab', eIv, ejv) + t2Ijvv = 0 + for q1,q2 in enumerate(eris.qconserv): + t2Ijvv += lib.einsum('Iax,jbx->Ijab', IvL[q1], eris.get_occ_blk(q2,j0,j1)) + conj_(t2Ijvv) + t2Ijvv /= eIjvv + eIjvv = None + + dm += 2 * lib.einsum('Ijac,Ijbc->ab', t2Ijvv, np.conj(t2Ijvv)) + dm -= lib.einsum('Ijac,Ijcb->ab', t2Ijvv, np.conj(t2Ijvv)) + dm -= lib.einsum('Ijca,Ijbc->ab', t2Ijvv, np.conj(t2Ijvv)) + dm += 2 * lib.einsum('Ijca,Ijcb->ab', t2Ijvv, np.conj(t2Ijvv)) + + t2Ijvv = None + IvL = None + + return dm + +def make_lo_rdm1_vir_2h_complex(eris, moeocc, moevir, u): + r''' Occupied MP2 density matrix with two localized holes + + Math: + dm(a,b) + = 2 * \sum_{i'j'c} t2(i'j'ac) ( 2*t2(i'j'bc) - t2(i'j'cb) ) + + Args: + eris : ERI object + Provides `ovL` in the canonical MOs. + u : np.ndarray + Overlap between the canonical and localized occupied orbitals, i.e., + u(i,i') = + ''' + nocc, nvir, naux = eris.nocc, eris.nvir, eris.naux + nqpts = len(eris.qpts) + dtype = eris.dtype + dsize = eris.dsize + nOcc = u.shape[1] + + # determine Occblksize + mem_avail = eris.max_memory - lib.current_memory()[0] + M = mem_avail * 0.7 * 1e6/dsize + Occblksize, mem_peak = _mp2_rdm1_occblksize(nOcc,nvir,naux, nqpts+1, 2, M, dsize) + if DEBUG_BLKSIZE: Occblksize = max(1,nOcc//2) + logger.debug1(eris, 'make_lo_rdm1_vir_2h: nocc = %d nvir = %d nOcc = %d naux = %d ' + 'Occblksize = %d peak mem = %.2f MB', + nocc, nvir, nOcc, naux, Occblksize, mem_peak) + + moeOcc, u = subspace_eigh(np.diag(moeocc), u) + eOv = moeOcc[:,None] - moevir + + dm = np.zeros((nvir,nvir), dtype=dtype) + for Ibatch,(I0,I1) in enumerate(lib.prange(0,nOcc,Occblksize)): + IvL = [eris.xform_occ(q, u[:,I0:I1]) for q in range(nqpts)] + eIv = eOv[I0:I1] + for Jbatch,(J0,J1) in enumerate(lib.prange(0,nOcc,Occblksize)): + if Jbatch == Ibatch: + eJv = eIv + else: + eJv = eOv[J0:J1] + eIJvv = lib.direct_sum('Ia+Jb->IJab', eIv, eJv) + + t2IJvv = 0 + if Jbatch == Ibatch: + for q1,q2 in enumerate(eris.qconserv): + t2IJvv += lib.einsum('Iax,Jbx->IJab', IvL[q1], IvL[q2]) + else: + for q1,q2 in enumerate(eris.qconserv): + t2IJvv += lib.einsum('Iax,Jbx->IJab', IvL[q1], eris.xform_occ(q2, u[:,J0:J1])) + conj_(t2IJvv) + t2IJvv /= eIJvv + + eIJvv = None + + dm += 4 * lib.einsum('IJac,IJbc->ab', t2IJvv, np.conj(t2IJvv)) + dm -= 2 * lib.einsum('IJac,IJcb->ab', t2IJvv, np.conj(t2IJvv)) + + t2IJvv = None + IvL = None + + return dm + +def conj_(a): + # in-place conjugate + np.conj(a, out=a) + +def _check_dm_imag(eris, dm): + if eris.dtype_eri == np.float64: + dmi = abs(dm.imag).max() + if dmi > 1e-4: + logger.warn(eris, 'Discard large imag part in DM (%s). ' + 'This may lead to error.', dmi) + dm = dm.real + return dm diff --git a/pyscf/pbc/lno/test/test_klnoccsd.py b/pyscf/pbc/lno/test/test_klnoccsd.py new file mode 100644 index 000000000..ff277bbf9 --- /dev/null +++ b/pyscf/pbc/lno/test/test_klnoccsd.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# Copyright 2021 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest +import numpy as np +from pyscf import __config__ +from pyscf.pbc import gto, scf, tools as pbctools +from pyscf.lno import LNOCCSD, LNOCCSD_T +from pyscf.pbc.lno import KLNOCCSD, KLNOCCSD_T +from pyscf.lno.tools import autofrag_iao +from pyscf.pbc.lno.tools import k2s_scf, k2s_iao +from pyscf import lo + + +class WaterDimer(unittest.TestCase): + @classmethod + def setUpClass(cls): + cell = gto.Cell() + cell.verbose = 4 + cell.output = '/dev/null' + cell.atom = ''' + O -1.485163346097 -0.114724564047 0.000000000000 + H -1.868415346097 0.762298435953 0.000000000000 + H -0.533833346097 0.040507435953 0.000000000000 + O 1.416468653903 0.111264435953 0.000000000000 + H 1.746241653903 -0.373945564047 -0.758561000000 + H 1.746241653903 -0.373945564047 0.758561000000 + ''' + cell.a = np.eye(3) * 5 + cell.basis = 'cc-pvdz' + cell.precision = 1e-10 + cell.build() + + kmesh = [3,1,1] + kpts = cell.make_kpts(kmesh) + nkpts = len(kpts) + scell = pbctools.super_cell(cell, kmesh) + + kmf = scf.KRHF(cell, kpts=kpts).density_fit().run() + smf = k2s_scf(kmf) + + cls.cell = cell + cls.kmf = kmf + cls.scell = scell + cls.smf = smf + cls.frozen = 2 * nkpts + @classmethod + def tearDownClass(cls): + cls.cell.stdout.close() + cls.scell.stdout.close() + del cls.cell, cls.kmf, cls.frozen + del cls.scell, cls.smf + + # def test_lno_pm_by_thresh(self): + # cell = self.cell + # mf = self.mf + # frozen = self.frozen + # + # # PM localization + # orbocc = mf.mo_coeff[:,frozen:np.count_nonzero(mf.mo_occ)] + # mlo = lo.PipekMezey(cell, orbocc) + # lo_coeff = mlo.kernel() + # while True: # always performing jacobi sweep to avoid trapping in local minimum/saddle point + # lo_coeff1 = mlo.stability_jacobi()[1] + # if lo_coeff1 is lo_coeff: + # break + # mlo = lo.PipekMezey(mf.cell, lo_coeff1).set(verbose=4) + # mlo.init_guess = None + # lo_coeff = mlo.kernel() + # + # # Fragment list: for PM, every orbital corresponds to a fragment + # frag_lolist = [[i] for i in range(lo_coeff.shape[1])] + # + # gamma = 10 + # threshs = [1e-5,1e-6,1e-100] + # refs = [ + # [-0.4044781783,-0.4231598372,-0.4292049721], + # [-0.4058765086,-0.4244510794,-0.4307864928], + # self.ecano + # ] + # for thresh,ref in zip(threshs,refs): + # # mcc = LNOCCSD_T(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + # mcc = LNOCCSD(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + # mcc.lno_thresh = [thresh*gamma,thresh] + # mcc.kernel() + # emp2 = mcc.e_corr_pt2 + # eccsd = mcc.e_corr_ccsd + # eccsd_t = mcc.e_corr_ccsd_t + # # print('[%s],' % (','.join([f'{x:.10f}' for x in [emp2,eccsd,eccsd_t]]))) + # self.assertAlmostEqual(emp2, ref[0], 6) + # self.assertAlmostEqual(eccsd, ref[1], 6) + # # self.assertAlmostEqual(eccsd_t, ref[2], 6) + + def test_lno_iao_by_thresh(self): + cell = self.cell + kmf = self.kmf + smf = self.smf + scell = smf.cell + frozen = self.frozen + kpts = kmf.kpts + nkpts = len(kpts) + + # IAO localization in supercell + kocc_coeff = [kmf.mo_coeff[k][:,kmf.mo_occ[k]>1e-6] for k in range(nkpts)] + lo_coeff = k2s_iao(cell, kocc_coeff, kpts, orth=True) + + # k-point LNO: only need to compute fragments within a unit cell + cell_iao = lo.iao.reference_mol(cell) + frag_lolist = autofrag_iao(cell_iao) + + # Supercell LNO: + ''' In principle, one needs to treat all fragments within the supercell. But here + the supercell SCF object is from `k2s_scf`. As a result, the MOs, the IAOs and + the fragments are all translationally invariant. We only need to treat fragments + from the first unit cell, i.e., same as k-point LNO. + ''' + sfrag_lolist = frag_lolist + + gamma = 10 + threshs = [1e-5,1e-6,1e-100] + for thresh in threshs: + # mcc = LNOCCSD_T(mf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + kmcc = KLNOCCSD(kmf, lo_coeff, frag_lolist, mf=smf, frozen=frozen).set(verbose=5) + kmcc.lno_thresh = [thresh*gamma,thresh] + kmcc.kernel() + kemp2 = kmcc.e_corr_pt2 + keccsd = kmcc.e_corr_ccsd + keccsd_t = kmcc.e_corr_ccsd_t + + mcc = LNOCCSD(smf, lo_coeff, frag_lolist, frozen=frozen).set(verbose=5) + mcc.lno_thresh = [thresh*gamma,thresh] + mcc.kernel() + + semp2 = mcc.e_corr_pt2 + seccsd = mcc.e_corr_ccsd + seccsd_t = mcc.e_corr_ccsd_t + # print('[%s],' % (','.join([f'{x:.10f}' for x in [emp2,eccsd,eccsd_t]]))) + self.assertAlmostEqual(kemp2, semp2, 6) + self.assertAlmostEqual(keccsd, seccsd, 6) + # self.assertAlmostEqual(keccsd_t, seccsd_t, 6) + + + +if __name__ == "__main__": + print("Full Tests for LNO-CCSD and LNO-CCSD(T)") + unittest.main() diff --git a/pyscf/pbc/lno/tools.py b/pyscf/pbc/lno/tools.py new file mode 100644 index 000000000..38a6213b4 --- /dev/null +++ b/pyscf/pbc/lno/tools.py @@ -0,0 +1,440 @@ +import sys +import numpy as np +from functools import reduce + +from pyscf.pbc.df.df import _load3c +from pyscf.ao2mo import _ao2mo +from pyscf import lib +from pyscf.pbc.lib.kpts_helper import gamma_point, KPT_DIFF_TOL +from pyscf.pbc.tools import k2gamma +from pyscf import lo +from pyscf import __config__ + +MINAO = getattr(__config__, 'lo_iao_minao', 'minao') + + +def k2s_scf(kmf, fock_imag_tol=1e-6): + from pyscf.scf.hf import eig + from pyscf.pbc import scf + + cell = kmf.cell + kpts = kmf.kpts + Nk = len(kpts) + assert( abs((Nk**(1./3.))**3.-Nk) < 0.1 ) + + kmesh = k2gamma.kpts_to_kmesh(cell, kpts-kpts[0]) + scell, phase = k2gamma.get_phase(cell, kpts, kmesh) + + kmo_coeff = kmf.mo_coeff + kmo_energy = kmf.mo_energy + ks1e = kmf.get_ovlp() + kh1e = kmf.get_hcore() + + ksc = [np.dot(s1e,mo_coeff) for s1e,mo_coeff in zip(ks1e,kmo_coeff)] + kfock = np.asarray([np.dot(sc*moe,sc.T.conj()) for sc,moe in zip(ksc,kmo_energy)]) + + s1e = _k2s_aoint(ks1e, kpts, phase, 's1e') + h1e = _k2s_aoint(kh1e, kpts, phase, 'h1e') + fock = _k2s_aoint(kfock, kpts, phase, 'fock') + + mo_energy, mo_coeff = eig(fock, s1e) + + mf = scf.RHF(scell, kpt=kpts[0]) + mf.mo_coeff = mo_coeff + mf.mo_energy = mo_energy + mf.mo_occ = mf.get_occ() + mf.e_tot = kmf.e_tot * Nk + mf.converged = True + mf.get_ovlp = lambda *args: s1e + mf.get_hcore = lambda *args: h1e + + return mf + + +def get_kpts_trsymm(cell, kmesh): + # Nk = np.prod(kmesh).astype(int) + # assert( Nk//2+Nk%2 == len(kpts_ibz) ) + + return cell.make_kpts(kmesh, time_reversal_symmetry=True) + + +def kscf_remove_trsymm(kmf_trsymm, **kwargs): + ''' Return a KSCF object where the translational symmetry is removed + ''' + from pyscf.pbc import scf + + cell = kmf_trsymm.cell + kpts = kmf_trsymm.kpts + assert( gamma_point(kpts.kpts[0]) ) + + nkpts = kpts.nkpts + nkpts_ibz = kpts.nkpts_ibz + mo_coeff = [None] * nkpts + mo_energy = [None] * nkpts + mo_occ = [None] * nkpts + for ki in range(nkpts_ibz): + ids = np.where(kpts.bz2ibz==ki)[0] + if ids.size == 1: + mo_coeff[ids[0]] = kmf_trsymm.mo_coeff[ki] + mo_energy[ids[0]] = kmf_trsymm.mo_energy[ki] + mo_occ[ids[0]] = kmf_trsymm.mo_occ[ki] + elif ids.size == 2: + mo_coeff[ids[0]] = kmf_trsymm.mo_coeff[ki].conj() + mo_coeff[ids[1]] = kmf_trsymm.mo_coeff[ki] + mo_energy[ids[0]] = mo_energy[ids[1]] = kmf_trsymm.mo_energy[ki] + mo_occ[ids[0]] = mo_occ[ids[1]] = kmf_trsymm.mo_occ[ki] + else: + raise RuntimeError + + kmf = scf.KRHF(cell, kpts=kpts.kpts).rs_density_fit() + kmf.set(**kwargs) + + kmf.mo_coeff = mo_coeff + kmf.mo_energy = mo_energy + kmf.mo_occ = mo_occ + + return kmf + + +def s2k_mo_coeff(cell, kpts, mo_coeff): + r''' U(R,k) * C(R\mu,i) -> C(k\mu,i) + + Args: + cell (pyscf.pbc.gto.Cell object): + Unit cell. + kpts (np.ndarray): + Nk k-points. + mo_coeff (np.ndarray): + MO coeff matrx in the supercell AO basis, i.e., + mo_coeff.shape[0] == Nk * cell.nao_nr() + + Returns: + kmo_coeff (np.ndarray): + Shape (Nk,nao,Nmo). + ''' + Nk = len(kpts) + nao = cell.nao_nr() + Nao,Nmo = mo_coeff.shape + assert(Nk*nao == Nao) + + kmesh = k2gamma.kpts_to_kmesh(cell, kpts-kpts[0]) + scell, phase = k2gamma.get_phase(cell, kpts, kmesh) + + kmo_coeff = lib.einsum('Rk,Rpi->kpi', phase.conj(), mo_coeff.reshape(Nk,nao,Nmo)) + return kmo_coeff + + +def k2s_aoint(cell, kpts, kA, name='aoint', phase=None): + r''' U(R,k) * C(k,\mu,\nu) * U(S,k).conj() -> C(R\mu,S\nu) + ''' + if phase is None: + kmesh = k2gamma.kpts_to_kmesh(cell, kpts-kpts[0]) + scell, phase = k2gamma.get_phase(cell, kpts, kmesh) + return _k2s_aoint(kA, kpts, phase) + +def _k2s_aoint(kA, kpts, phase, name='aoint'): + Ncell = phase.shape[0] + nao = kA[0].shape[0] + Nao = Ncell*nao + + sA = lib.einsum('Rk,kpq,Sk->RpSq', phase, np.asarray(kA), phase.conj()).reshape(Nao,Nao) + + if gamma_point(kpts[0]): + sAi = abs(sA.imag).max() + if sAi > 1e-4: + log = lib.logger.Logger(sys.stdout, 6) + log.warn('Discard large imag part in k2s %s (%s). This may lead to error.', + name, sAi) + sA = np.asarray(sA.real, order='C') + + return sA + + +class K2SDF(lib.StreamObject): + def __init__(self, with_df, time_reversal_symmetry=True): + self.with_df = with_df + self.cell = with_df.cell + self.kpts = with_df.kpts + self.qpts = self.kpts - self.kpts[0] + + self.kikj_by_q = get_kikj_by_q(self.cell, self.kpts, self.qpts) + self.qconserv = get_qconserv(self.cell, self.qpts) + + nqpts = len(self.qpts) + if gamma_point(self.kpts[0]) and time_reversal_symmetry: # time reversal symmetry + find = np.zeros(len(self.qpts), dtype=bool) + ibz2bz = [] + qpts_ibz_weights = [] + for q1,q2 in enumerate(self.qconserv): + if find[q1] or find[q2]: continue + ibz2bz.append(min(q1,q2)) + qpts_ibz_weights.append(1. if q1==q2 else 2**0.5) + find[q2] = find[q2] = True + self.ibz2bz = np.asarray(ibz2bz, dtype=int) + self.qpts_ibz = self.qpts[self.ibz2bz] + self.qpts_ibz_weights = np.asarray(qpts_ibz_weights) / nqpts**0.5 + else: + self.ibz2bz = np.arange(nqpts) + self.qpts_ibz = self.qpts + self.qpts_ibz_weights = np.ones(nqpts) / nqpts**0.5 + + self.kmesh = k2gamma.kpts_to_kmesh(self.cell, self.qpts) + self.scell, self.phase = k2gamma.get_phase(self.cell, self.kpts, self.kmesh) + + self._naux = None + self.blockdim = with_df.blockdim + + @property + def naux_by_q(self): + return self.get_naoaux() + @property + def naux(self): + return self.get_naoaux().max() + @property + def Naux(self): + return self.naux*len(self.qpts) + @property + def Naux_ibz(self): + return self.naux*len(self.qpts_ibz) + def get_naoaux(self): + if self._naux is None: + with_df = self.with_df + kpts = self.kpts + nqpts = len(self.qpts) + naux = np.zeros(nqpts, dtype=int) + for q in range(nqpts): + ki,kj = self.kikj_by_q[q][0] + kpti_kptj = np.asarray((kpts[ki],kpts[kj])) + with _load3c(with_df._cderi, with_df._dataname, kpti_kptj=kpti_kptj) as j3c: + naux[q] = j3c.shape[0] + self._naux = naux + return self._naux + + def get_auxslice(self, q): + p0 = self._naux[:q].sum() + return p0, p0+self._naux[q] + + def s2k_mo_coeff(self, mo_coeff): + nao = self.cell.nao_nr() + Nk = len(self.qpts) + Nao,Nmo = mo_coeff.shape + assert(Nao == nao*Nk) + kmo_coeff = lib.einsum('Rk,Rpi->kpi', self.phase.conj(), mo_coeff.reshape(Nk,nao,Nmo)) + return kmo_coeff + + def loop(self, q, auxslice=None): + if auxslice is None: auxslice = (0,self._naux[q]) + p0,p1 = auxslice + with_df = self.with_df + kpts = self.kpts + for ki,kj in self.kikj_by_q[q]: + kpti_kptj = np.asarray((kpts[ki],kpts[kj])) + with _load3c(with_df._cderi, with_df._dataname, kpti_kptj=kpti_kptj) as j3c: + yield (ki,kj), np.asarray(j3c[p0:p1], order='C') + + def loop_ao2mo(self, q, mo1, mo2, buf=None, real_and_imag=False, auxslice=None): + ''' Loop over all Lpq[k1,k2] s.t. kpts[k] = -kpts[k1] + kpts[k2] + ''' + kmo1 = self.s2k_mo_coeff(mo1) + kmo2 = self.s2k_mo_coeff(mo2) + tao = [] + ao_loc = None + nao = self.cell.nao_nr() + for (ki,kj),Lpq_ao in self.loop(q, auxslice=auxslice): + mo = np.asarray(np.hstack((kmo1[ki], kmo2[kj])), order='F') + nmo1 = kmo1[ki].shape[1] + nmo2 = kmo2[kj].shape[1] + ijslice = (0,nmo1,nmo1,nmo1+nmo2) + if Lpq_ao[1].size != nao**2: # aosym = 's2' + Lpq_ao = lib.unpack_tril(Lpq_ao).astype(np.complex128) + Lpq = _ao2mo.r_e2(Lpq_ao, mo, ijslice, tao, ao_loc, out=buf) + if real_and_imag: + yield (ki,kj), np.asarray(Lpq.real), np.asarray(Lpq.imag) + else: + yield (ki,kj), Lpq + Lpq_ao = Lpq = None + + def get_eri_dtype_dsize(self, *arrs): + ''' Get ERI dtype/size given arrays involved (e.g., mo_coeff). + + Explanation: + While the 3c DF integrals is complex due to using kpts, the resulting + ERI can be real if (a) kpt mesh is Gamma-inclusive and (b) MOs are real. + ''' + return _guess_dtype_dsize(self.kpts, *arrs) + +def get_kikj_by_q(cell, kpts, qpts): + ''' Find map such that dot(qpts[q] - kpts[ki] + kpts[kj], alat) = 2pi * n + + Returns: + kikj_by_q (nested list): + Usage: + for q,qpt in enumerate(qpts): + for ki,kj in kikj_by_q[q]: + kpti,kptj = kpts[ki], kpts[kj] + ''' + nkpts = len(kpts) + nqpts = len(qpts) + kptijs = -kpts[:,None,:]+kpts + qkikjs = lib.direct_sum('ax+bcx->abcx', qpts, kptijs) + x = cell.get_scaled_kpts(qkikjs.reshape(-1,3)).reshape(nqpts,nkpts**2,3) + qs, kijs = np.where(abs(x - x.round()).sum(axis=-1) < KPT_DIFF_TOL) + kijs_by_q = [kijs[qs==q] for q in range(nqpts)] + kikjs_by_q = [] + for q in range(nqpts): + kijs = kijs_by_q[q] + k1s = kijs // nkpts + k2s = kijs % nkpts + kikjs = np.vstack((k1s,k2s)).T + kikjs_by_q.append(kikjs) + return kikjs_by_q + +def get_qconserv(cell, qpts): + ''' Find map such that dot(qpts[q1] + qpts[q2], alat) = 2pi * n + + Returns: + qconserv (list): + Usage: + for q1 in range(nqpts): + q2 = qconserv[q1] + ''' + nqpts = len(qpts) + qiqjs = lib.direct_sum('ax+bx->abx', qpts, qpts) + x = cell.get_scaled_kpts(qiqjs.reshape(-1,3)).reshape(nqpts,nqpts,3) + q1s, q2s = np.where(abs(x-x.round()).sum(axis=-1) < KPT_DIFF_TOL) + assert( len(np.unique(q1s)) == nqpts ) + assert( len(q1s) == nqpts ) + qconserv = q2s[np.argsort(q1s)] + return qconserv + +def zdotCNtoR(aR, aI, bR, bI, alpha=1, cR=None, beta=0): + '''c = (a.conj()*b).real''' + cR = lib.ddot(aR, bR, alpha, cR, beta) + cR = lib.ddot(aI, bI, alpha, cR, 1 ) + return cR +def zdotNNtoR(aR, aI, bR, bI, alpha=1, cR=None, beta=0): + '''c = (a.conj()*b).real''' + cR = lib.ddot(aR, bR, alpha, cR, beta) + cR = lib.ddot(aI, bI, -alpha, cR, 1 ) + return cR + +def _guess_dtype_dsize(kpts, *arrs): + dtype = np.float64 if gamma_point(kpts[0]) else np.complex128 + dtype = np.result_type(dtype, *arrs) + dsize = 8 if dtype == np.float64 else 16 + return dtype, dsize + + +def sort_orb_by_cell(scell, lo_coeff, Ncell, s=None, kpt=np.zeros(3), pop_method='meta-lowdin', + Q_tol=1e-3): + r''' Reorder LOs by unit cells in a supercel. + + Args: + scell (pyscf.pbc.gto.Cell object): + Supercell. + lo_coeff (np.ndarray): + AO coefficient matrix of LOs in supercell. + Ncell (int): + Number of cell in supercell. + s (np.ndarray): + AO overlap matrix in supercell. If not given, `s` is calculated by + s = scell.pbc_intor('int1e_ovlp', kpts=kpt) + where `kpt` needs to be specified (vide infra). + kpt (np.ndarray): + k-point for calculating the AO overlap matrix. Default is Gamma point. + pop_method (str): + Population method for assigning LOs to cells. Default is 'meta-lowdin'. + Q_tol (float): + Tolerance for determining degenerate LOs based on the `Q` value defined as: + Q[alpha,i] := ( \sum_{A} pop[alpha,i,A]^2 )^0.5 + where `pop[alpha,i,A]` denotes the population of a LO `phi_alpha` on atom + `A` in cell `i`. `Q[alpha,i]` \in [0,1] serves as a 'fingerprint' of a LO. + The higher `Q` is, the more localized the corresponding LO is, and vice versa. + Finding equal values in the `Q` matrix allows us to find + 1. degenerate LOs in the same cell (e.g., the 4 sp^3 C-C LOs in diamond) + 2. LOs in different cells related by lattice translation. + This argument `Q_tol` determines how close is deemed "equal". + + Returns: + lo_coeff (np.ndarray): + Same LO coefficient matrix but reordered by cell. Let Nlo = Ncell*nlo. + Then lo_coeff[:,i*nlo:(i+1)*nlo] gives the LOs in i-th cell. + ''' + from pyscf.lo.orth import orth_ao + + Natm = scell.natm + natm = Natm//Ncell + assert( natm*Ncell == Natm ) + Nlo = lo_coeff.shape[1] + nlo = Nlo//Ncell + assert( nlo*Ncell == Nlo ) + + if s is None: s = scell.pbc_intor('int1e_ovlp', kpts=kpt) + csc = reduce(np.dot, (lo_coeff.conj().T, s, orth_ao(scell, pop_method, 'ANO', s=s))) + pop_by_atom = np.asarray([lib.einsum('ix,ix->i', csc[:,p0:p1], csc[:,p0:p1].conj()) + for i, (b0, b1, p0, p1) in enumerate(scell.offset_nr_by_atom())]) + Q_by_cell = np.asarray([(pop_by_atom[i*natm:(i+1)*natm]**2.).sum(axis=0)**0.5 + for i in range(Ncell)]) + + order0 = np.argsort(Q_by_cell[0])[::-1] + assigned = np.zeros(Nlo, dtype=bool) + reorder = [[] for i in range(Ncell)] + for i in order0: + if assigned[i]: + continue + Q_ref = Q_by_cell[0,i] + mask = np.repeat(~assigned, Ncell).reshape(-1,Ncell).T # LOs not already assigned + idxcell, idxlo = np.where((abs(Q_by_cell-Q_ref)RpSq', phase, kiao_coeff, + phase.conj()).reshape(nkpts*nao,nkpts*niao) + + if gamma_point(kpts[0]): + iao_coeff_imag = abs(iao_coeff.imag).max() + if iao_coeff_imag > 1e-10: + log.warn('Discard large imag part in k2s_iao: %6.2e.', iao_coeff_imag) + iao_coeff = iao_coeff.real + + return iao_coeff