diff --git a/examples/pwscf/README.md b/examples/pwscf/README.md
new file mode 100644
index 000000000..32b4cd025
--- /dev/null
+++ b/examples/pwscf/README.md
@@ -0,0 +1,35 @@
+# Plane-Wave Mode
+
+The `pyscf.pbc.pwscf` module provides experimental support for Hartree-Fock (HF),
+density functional theory (DFT), second-order Møller-Plesset perturbation theory (MP2),
+and coupled cluster singles doubles (CCSD) in a plane-wave basis.
+The CCECP and GTH pseudopotentials are supported for these methods,
+and SG15 pseudopotentials are supported for HF and DFT calculations.
+Occupation smearing and symmetry reduction of k-point meshes are implemented for HF and DFT.
+
+## Feature Overview
+
+The following self-consistent field (SCF) calculations are supported:
+* Hartree-fock (Restricted and Unrestricted)
+* Kohn-Sham DFT (Restricted and Unrestricted), with LDA, GGA, MGGA, and global hybrid functionals
+
+Currently, the Davidson algorithm is implemented for the effective Hamiltonian diagonalization.
+There are two mixing schemes for the effective potential, "Simple" and "Anderson" (DIIS).
+Symmetry reduction of k-points is supported for SCF calculations, along with occupation
+smearing. The default plane-wave basis set and integration grid are determined by `cell.mesh`,
+but these can be customized using the energy cutoffs `ecut_wf` and `ecut_rho` or by setting
+meshes directly using `PWKSCF.set_meshes()`.
+
+The following post-SCF calculations are supported:
+* MP2 (Restricted and Unrestricted)
+* CCSD (Restricted only)
+
+K-point symmetry and occupation smearing are currently not supported for post-SCF
+methods. The `PWKSCF.get_cpw_virtual()` method can be used to create virtual
+molecular orbitals in a GTO basis for use in post-SCF calculations.
+
+Plane-wave calculations can be performed with GTH or CCECP pseudopotentials
+(or all-electron for very small atoms). There is also basic support for SG15
+norm-conserving pseudopotentials. The post-SCF methods have been tested with GTH
+and CCECP but not SG15, while the SCF methods have been tested with GTH, CCECP, and SG15.
+
diff --git a/examples/pwscf/al.py b/examples/pwscf/al.py
new file mode 100644
index 000000000..b7f99d285
--- /dev/null
+++ b/examples/pwscf/al.py
@@ -0,0 +1,38 @@
+from pyscf.pbc import gto
+from pyscf.pbc.pwscf.smearing import smearing_
+from pyscf.pbc.pwscf import kpt_symm
+import numpy as np
+
+"""
+Simple examples of running DFT for FCC Al. Uses
+k-point symmetrt and smearing
+"""
+
+cell = gto.Cell()
+cell.a = 4.0 * np.eye(3)
+x = 2.0
+cell.atom = f"""
+Al 0 0 0
+Al 0 {x} {x}
+Al {x} 0 {x}
+Al {x} {x} 0
+"""
+cell.pseudo = 'gth-pade'
+cell.basis = 'gth-szv'
+cell.verbose = 4
+cell.space_group_symmetry = True
+cell.symmorphic = True
+cell.build()
+
+kpts = cell.make_kpts(
+    [4, 4, 4],
+    time_reversal_symmetry=True,
+    space_group_symmetry=True
+)
+kmf = kpt_symm.KsymAdaptedPWKRKS(cell, kpts, ecut_wf=40)
+kmf = smearing_(kmf, sigma=0.01, method='gauss')
+kmf.xc = "PBE"
+kmf.nvir = 2
+kmf.conv_tol = 1e-7
+kmf.kernel()
+kmf.dump_scf_summary()
diff --git a/examples/pwscf/kccsd.py b/examples/pwscf/kccsd.py
new file mode 100644
index 000000000..3c42ea26b
--- /dev/null
+++ b/examples/pwscf/kccsd.py
@@ -0,0 +1,34 @@
+import numpy as np
+from pyscf.pbc import cc
+from pyscf.pbc.pwscf.pw_helper import gtomf2pwmf
+from pyscf.pbc.pwscf.kccsd_rhf import PWKRCCSD
+
+"""
+Simple CCSD calculation
+"""
+
+a0 = 1.78339987
+atom = "C 0 0 0; C %.10f %.10f %.10f" % (a0*0.5, a0*0.5, a0*0.5)
+a = np.asarray([
+        [0., a0, a0],
+        [a0, 0., a0],
+        [a0, a0, 0.]])
+
+from pyscf.pbc import gto, scf, pwscf
+cell = gto.Cell(atom=atom, a=a, basis="gth-szv", pseudo="gth-pade",
+                ke_cutoff=50)
+cell.build()
+cell.verbose = 5
+
+kpts = cell.make_kpts([2,1,1])
+
+mf = scf.KRHF(cell, kpts)
+mf.kernel()
+
+mcc = cc.kccsd_rhf.RCCSD(mf)
+mcc.kernel()
+
+pwmf = gtomf2pwmf(mf)
+pwmcc = PWKRCCSD(pwmf).kernel()
+
+assert(np.abs(mcc.e_corr - pwmcc.e_corr) < 1e-5)
\ No newline at end of file
diff --git a/examples/pwscf/kmp2.py b/examples/pwscf/kmp2.py
new file mode 100644
index 000000000..c551478ed
--- /dev/null
+++ b/examples/pwscf/kmp2.py
@@ -0,0 +1,40 @@
+from pyscf.pbc import gto, pwscf
+from pyscf.pbc.pwscf.kmp2 import PWKRMP2
+import numpy as np
+
+"""
+Simple MP2 calculation
+"""
+
+atom = "H 0 0 0; H 0.9 0 0"
+a = np.eye(3) * 3
+basis = "gth-szv"
+pseudo = "gth-pade"
+
+ke_cutoff = 50
+
+cell = gto.Cell(atom=atom, a=a, basis=basis, pseudo=pseudo,
+                ke_cutoff=ke_cutoff)
+cell.build()
+cell.verbose = 6
+
+nk = 2
+kmesh = [nk] * 3
+kpts = cell.make_kpts(kmesh)
+nkpts = len(kpts)
+
+pwmf = pwscf.PWKRHF(cell, kpts)
+pwmf.nvir = 20
+pwmf.kernel()
+
+es = {"5": -0.01363871, "10": -0.01873622, "20": -0.02461560}
+
+pwmp = PWKRMP2(pwmf)
+pwmp.kernel(nvir_lst=[5,10,20])
+pwmp.dump_mp2_summary()
+nvir_lst = pwmp.mp2_summary["nvir_lst"]
+ecorr_lst = pwmp.mp2_summary["e_corr_lst"]
+for nvir,ecorr in zip(nvir_lst,ecorr_lst):
+    err = abs(ecorr - es["%d"%nvir])
+    print(err)
+    assert(err < 1e-5)
diff --git a/examples/pwscf/kpt_symm.py b/examples/pwscf/kpt_symm.py
new file mode 100644
index 000000000..2c5f8d73e
--- /dev/null
+++ b/examples/pwscf/kpt_symm.py
@@ -0,0 +1,56 @@
+from pyscf.pbc import gto
+from pyscf.pbc.pwscf.khf import PWKRHF
+from pyscf.pbc.pwscf.kpt_symm import KsymAdaptedPWKRHF
+import numpy as np
+import time
+
+"""
+Demonstrate the speedup of symmetry-adapted HF over
+non-symmetry-adapted HF.
+"""
+
+cell = gto.Cell(
+    atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+    a = np.asarray([
+            [0.       , 1.78339987, 1.78339987],
+            [1.78339987, 0.        , 1.78339987],
+            [1.78339987, 1.78339987, 0.        ]]),
+    basis="gth-szv",
+    ke_cutoff=50,
+    pseudo="gth-pade",
+    verbose=4,
+    space_group_symmetry=True,
+    symmorphic=True,
+)
+cell.build()
+
+kmesh = [2, 2, 2]
+center = [0, 0, 0]
+kpts = cell.make_kpts(kmesh)
+skpts = cell.make_kpts(
+    kmesh,
+    scaled_center=center,
+    space_group_symmetry=True,
+    time_reversal_symmetry=True,
+)
+
+mf = PWKRHF(cell, kpts, ecut_wf=40)
+mf.nvir = 4
+t0 = time.monotonic()
+mf.kernel()
+t1 = time.monotonic()
+
+mf2 = KsymAdaptedPWKRHF(cell, skpts, ecut_wf=20)
+mf2.damp_type = "simple"
+mf2.damp_factor = 0.7
+mf2.nvir = 4
+t2 = time.monotonic()
+mf2.kernel()
+t3 = time.monotonic()
+
+print(mf.e_tot, mf2.e_tot)
+mf.dump_scf_summary()
+mf2.dump_scf_summary()
+print("nkpts in BZ and IBZ", skpts.nkpts, skpts.nkpts_ibz)
+print("Runtime without symmmetry", t1 - t0)
+print("Runtime with symmetry", t3 - t2)
diff --git a/examples/pwscf/li.py b/examples/pwscf/li.py
new file mode 100644
index 000000000..29086a6a1
--- /dev/null
+++ b/examples/pwscf/li.py
@@ -0,0 +1,61 @@
+from pyscf.pbc import gto
+from pyscf.gto.basis import parse_cp2k_pp
+from pyscf.pbc.pwscf import kpt_symm, khf, smearing
+import numpy as np
+
+"""
+Simple examples of running HF and DFT for BCC Li.
+Both calculations run with smearing.
+The DFT calculation handles a larger k-mesh with
+k-point symmetry.
+"""
+
+cell = gto.Cell()
+a = 3.4393124531669552
+cell.a = a * np.eye(3)
+x = 0.5 * a
+cell.atom = f"""
+Li 0   0   0
+Li {x} {x} {x}
+"""
+cell.pseudo = {'Li': parse_cp2k_pp.parse("""
+#PSEUDOPOTENTIAL
+Li GTH2-HF-q1
+    1    0    0    0
+    0.75910286326041       2   -1.83343584669401    0.32295157976066
+       2
+    0.66792517034256       1    1.83367870276199
+    1.13098354939590       1   -0.00004141168540
+""")}
+cell.basis = 'gth-szv'
+cell.verbose = 4
+cell.space_group_symmetry = True
+cell.symmorphic = True
+cell.mesh = [10, 10, 10]
+cell.build()
+
+# Center at the Baldereshi point
+kpts = cell.make_kpts(
+    [2, 2, 2],
+    scaled_center=[1.0/6, 1.0/6, 0.5]
+)
+kmf = khf.PWKRHF(cell, kpts, ecut_wf=40)
+kmf = smearing.smearing_(kmf, sigma=0.02, method='gauss')
+kmf.xc = "PBE"
+kmf.conv_tol = 1e-7
+kmf.conv_tol_grad = 2e-3
+ehf = kmf.kernel()
+
+kpts = cell.make_kpts(
+    [4, 4, 4],
+    scaled_center=[1.0/6, 1.0/6, 0.5],
+    time_reversal_symmetry=True,
+    space_group_symmetry=True
+)
+kmf = kpt_symm.KsymAdaptedPWKRKS(cell, kpts, ecut_wf=40)
+kmf = smearing.smearing_(kmf, sigma=0.01, method='gauss')
+kmf.xc = "PBE"
+kmf.conv_tol = 1e-7
+kmf.conv_tol_grad = 2e-3
+ehf = kmf.kernel()
+
diff --git a/examples/pwscf/set_meshes.py b/examples/pwscf/set_meshes.py
new file mode 100644
index 000000000..d91b8c5c6
--- /dev/null
+++ b/examples/pwscf/set_meshes.py
@@ -0,0 +1,32 @@
+from pyscf.pbc import gto
+from pyscf.pbc.pwscf.krks import PWKRKS
+import numpy as np
+
+"""
+This example demonstrates converging the energy with respect to
+the mesh size. Note that 
+"""
+
+cell = gto.Cell(
+    atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+    a = np.asarray([
+            [0.       , 1.78339987, 1.78339987],
+            [1.78339987, 0.        , 1.78339987],
+            [1.78339987, 1.78339987, 0.        ]]),
+    basis="gth-szv",
+    ke_cutoff=50,
+    pseudo="gth-pade",
+)
+
+kmesh = [2, 1, 1]
+kpts = cell.make_kpts(kmesh)
+
+mf = PWKRKS(cell, kpts)
+
+# defaults
+print(cell.mesh, mf.wf_mesh, mf.xc_mesh)
+mf.kernel()
+
+mf.set_meshes(wf_mesh=[15, 15, 15], xc_mesh=[33, 33, 33])
+print(mf.wf_mesh, mf.xc_mesh)
+mf.kernel()
diff --git a/examples/pwscf/sg15.py b/examples/pwscf/sg15.py
new file mode 100644
index 000000000..1074bc12a
--- /dev/null
+++ b/examples/pwscf/sg15.py
@@ -0,0 +1,82 @@
+from pyscf.pbc import gto
+from pyscf.pbc.pwscf.krks import PWKRKS
+# from pyscf.pbc.pwscf.kpt_symm import KsymAdaptedPWKRKS as PWKRKS
+from pyscf.pbc.pwscf.ncpp_cell import NCPPCell
+import numpy as np
+
+"""
+This example demonstrates the convergence of the total
+energy with respect to plane-wave energy cutoff for
+GTH and SG15 pseudopotentials. The SG15 converges
+faster, especially up to a 1000 eV cutoff (36.76 Ha),
+because these potentials were designed to converge more
+quickly.
+
+NOTE: Before using this example, you must set
+pbc_pwscf_ncpp_cell_sg15_path in your pyscf config file.
+"""
+
+kwargs = dict(
+    atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+    a = np.asarray([
+            [0.       , 1.78339987, 1.78339987],
+            [1.78339987, 0.        , 1.78339987],
+            [1.78339987, 1.78339987, 0.        ]]),
+    basis="gth-szv",
+    ke_cutoff=50,
+    pseudo="gth-pade",
+    verbose=0,
+)
+
+cell = gto.Cell(**kwargs)
+cell.build()
+
+kwargs.pop("pseudo")
+nccell = NCPPCell(**kwargs)
+nccell.build()
+
+kmesh = [2, 2, 2]
+kpts = cell.make_kpts(kmesh)
+
+ens1 = []
+ens2 = []
+# A larger set of ecuts below is provided in case it's useful.
+# ecuts = [18.38235294, 22.05882353, 25.73529412, 29.41176471, 33.08823529,
+#          36.76470588, 44.11764706, 55.14705882, 73.52941176, 91.91176471]
+ecuts = [18.38235294, 25.73529412, 33.08823529, 36.76470588, 55.14705882]
+for ecut in ecuts:
+    print("ECUT", ecut)
+    # Run the GTH calculations
+    mf = PWKRKS(cell, kpts, xc="PBE", ecut_wf=ecut)
+    mf.damp_type = "simple"
+    mf.damp_factor = 0.7
+    mf.nvir = 4 # converge first 4 virtual bands
+    mf.kernel()
+    ens1.append(mf.e_tot)
+
+    # Run the SG15 calculations
+    mf2 = PWKRKS(nccell, kpts, xc="PBE", ecut_wf=ecut)
+    mf2.damp_type = "simple"
+    mf2.damp_factor = 0.7
+    mf2.nvir = 4 # converge first 4 virtual bands
+    mf2.init_pp()
+    mf2.init_jk()
+    mf2.kernel()
+    ens2.append(mf2.e_tot)
+    print(mf.e_tot, mf2.e_tot)
+    print()
+
+print()
+print("GTH Total Energies (Ha)")
+print(ens1)
+print("Energy cutoffs (Ha)")
+print(ecuts[:-1])
+print("Differences vs Max Cutoff (Ha)")
+print(np.array(ens1[:-1]) - ens1[-1])
+print()
+print("SG15 Total Energies (Ha)")
+print(ens2)
+print("Energy cutoffs (Ha)")
+print(ecuts[:-1])
+print("Differences vs Max Cutoff (Ha)")
+print(np.array(ens2[:-1]) - ens2[-1])
diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt
index 3736e2d39..f05c7b3b7 100644
--- a/pyscf/lib/CMakeLists.txt
+++ b/pyscf/lib/CMakeLists.txt
@@ -199,4 +199,5 @@ set_target_properties (clib_csf PROPERTIES
     OUTPUT_NAME "csf")
 
 add_subdirectory(sfnoci)
+add_subdirectory(pwscf)
 
diff --git a/pyscf/lib/pwscf/CMakeLists.txt b/pyscf/lib/pwscf/CMakeLists.txt
new file mode 100644
index 000000000..c76eeb36e
--- /dev/null
+++ b/pyscf/lib/pwscf/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(pwscf SHARED pwscf.c)
+
+set_target_properties(pwscf PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}
+  COMPILE_FLAGS ${OpenMP_C_FLAGS}
+  LINK_FLAGS ${OpenMP_C_FLAGS})
+
+target_link_libraries(pwscf cgto cint cvhf ${BLAS_LIBRARIES})
+
diff --git a/pyscf/lib/pwscf/pwscf.c b/pyscf/lib/pwscf/pwscf.c
new file mode 100644
index 000000000..842725585
--- /dev/null
+++ b/pyscf/lib/pwscf/pwscf.c
@@ -0,0 +1,168 @@
+/* Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+ *
+ * Author: Hong-Zhou Ye <hzyechem@gmail.com>
+ * Author: Kyle Bystrom <kylebystrom@gmail.com>
+ */
+
+#include <stdio.h>
+#include <math.h>
+#include <complex.h>
+#include "config.h"
+
+
+void fast_SphBsli0(double * xs, int n, double * out)
+{
+#pragma omp parallel
+{
+    int i;
+    double x;
+#pragma omp for schedule(static)
+    for (i = 0; i < n; ++i) {
+        x = xs[i];
+        out[i] = sinh(x) / x;
+    }
+}
+}
+
+void fast_SphBsli1(double * xs, int n, double * out)
+{
+#pragma omp parallel
+{
+    int i;
+    double x;
+#pragma omp for schedule(static)
+    for (i = 0; i < n; ++i) {
+        x = xs[i];
+        out[i] = (x*cosh(x) - sinh(x)) / (x*x);
+    }
+}
+}
+
+void fast_SphBsli2(double * xs, int n, double * out)
+{
+#pragma omp parallel
+{
+    int i;
+    double x;
+#pragma omp for schedule(static)
+    for (i = 0; i < n; ++i) {
+        x = xs[i];
+        out[i] = ((x*x+3.)*sinh(x) - 3.*x*cosh(x)) / (x*x*x);
+    }
+}
+}
+
+void fast_SphBsli3(double * xs, int n, double * out)
+{
+#pragma omp parallel
+{
+    int i;
+    double x;
+#pragma omp for schedule(static)
+    for (i = 0; i < n; ++i) {
+        x = xs[i];
+        out[i] = ((x*x*x+15.*x)*cosh(x) -
+                (6.*x*x+15.)*sinh(x)) / (x*x*x*x);
+    }
+}
+}
+
+void fast_SphBslin(double * xs, int n, int l, double * out)
+// n: size of xs; l: order
+{
+    if (l == 0)
+        fast_SphBsli0(xs, n, out);
+    else if (l == 1)
+        fast_SphBsli1(xs, n, out);
+    else if (l == 2)
+        fast_SphBsli2(xs, n, out);
+    else if (l == 3)
+        fast_SphBsli3(xs, n, out);
+}
+
+inline static int modulo(int i, int j) {
+    return (i % j + j) % j;
+}
+
+inline static size_t rotated_index(const int *c, const size_t *N,
+                                   const int *shift,
+                                   int xi, int yi, int zi)
+{
+    int xo = modulo(c[0] * xi + c[1] * yi + c[2] * zi + shift[0], N[0]);
+    int yo = modulo(c[3] * xi + c[4] * yi + c[5] * zi + shift[1], N[1]);
+    int zo = modulo(c[6] * xi + c[7] * yi + c[8] * zi + shift[2], N[2]);
+    return zo + N[2] * (yo + N[1] * xo);
+}
+
+// f is the function shape (n[0], n[1], n[2])
+// c is the 3x3 rotation matrix
+// assumes that each coord in fin maps to 1 coord in fout,
+// which should always be the case.
+// Otherwise there will be race conditions.
+// This function essentially applies
+// fout += rot(wt * fin)
+void add_rotated_realspace_func(const double *fin, double *fout, const int *n,
+                                const int *c, const double wt)
+{
+#pragma omp parallel
+{
+    const size_t N[3] = {n[0], n[1], n[2]};
+    const int shift[3] = {0, 0, 0};
+    size_t indi;
+    size_t indo;
+    int xi, yi, zi;
+    // int xo, yo, zo;
+#pragma omp for schedule(static)
+    for (xi = 0; xi < n[0]; xi++) {
+        indi = xi * N[1] * N[2];
+        for (yi = 0; yi < n[1]; yi++) {
+            for (zi = 0; zi < n[2]; zi++) {
+                //xo = modulo(c[0] * xi + c[1] * yi + c[2] * zi, n[0]);
+                //yo = modulo(c[3] * xi + c[4] * yi + c[5] * zi, n[1]);
+                //zo = modulo(c[6] * xi + c[7] * yi + c[8] * zi, n[2]);
+                //indo = zo + N[2] * (yo + N[1] * xo);
+                indo = rotated_index(c, N, shift, xi, yi, zi);
+                fout[indo] += wt * fin[indi];
+                indi++;
+            }
+        }
+    }
+}
+}
+
+void get_rotated_complex_func(const double complex *fin,
+                              double complex *fout, const int *n,
+                              const int *c, const int *shift) {
+#pragma omp parallel
+{
+    const size_t N[3] = {n[0], n[1], n[2]};
+    size_t indi;
+    size_t indo;
+    int xi, yi, zi;
+#pragma omp for schedule(static)
+    for (xi = 0; xi < n[0]; xi++) {
+        indi = xi * N[1] * N[2];
+        for (yi = 0; yi < n[1]; yi++) {
+            for (zi = 0; zi < n[2]; zi++) {
+                indo = rotated_index(c, N, shift, xi, yi, zi);
+                fout[indo] = fin[indi];
+                indi++;
+            }
+        }
+    }
+}
+}
+
diff --git a/pyscf/pbc/pwscf/__init__.py b/pyscf/pbc/pwscf/__init__.py
new file mode 100644
index 000000000..4e1963d9b
--- /dev/null
+++ b/pyscf/pbc/pwscf/__init__.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+'''Plane wave-based Hartree-Fock for periodic systems
+'''
+
+from pyscf.pbc.pwscf import khf, kuhf
+
+PWKRHF = KRHF = khf.PWKRHF
+PWKUHF = KUHF = kuhf.PWKUHF
+
+from pyscf.pbc.pwscf import krks, kuks
+
+PWKRKS = KRKS = krks.PWKRKS
+PWKUKS = KUKS = kuks.PWKUKS
+
+from pyscf.pbc.pwscf import kmp2, kump2
+PWKRMP2 = KRMP2 = PWKMP2 = KMP2 = kmp2.PWKRMP2
+PWKUMP2 = KUMP2 = kump2.PWKUMP2
+
+from pyscf.pbc.pwscf import kccsd_rhf
+PWKRCCSD = KRCCSD = PWKCCSD = KCCSD = kccsd_rhf.PWKRCCSD
diff --git a/pyscf/pbc/pwscf/ao2mo/__init__.py b/pyscf/pbc/pwscf/ao2mo/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pyscf/pbc/pwscf/ao2mo/molint.py b/pyscf/pbc/pwscf/ao2mo/molint.py
new file mode 100644
index 000000000..a7a64bc7b
--- /dev/null
+++ b/pyscf/pbc/pwscf/ao2mo/molint.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+
+""" Generating MO integrals
+"""
+
+import time
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf import lib
+from pyscf.lib import logger
+from pyscf.pbc import tools
+
+from pyscf.pbc.pwscf.pw_helper import get_kcomp, set_kcomp, wf_ifft
+
+dot = lib.dot
+einsum = np.einsum
+
+
+def get_molint(mf_or_fchk, kpts, nvir=None, erifile=None, dataname="eri_mo"):
+    """
+    Args:
+        mf_or_fchk : PWSCF object or str of
+    """
+    pass
+
+
+def kconserv(kpt123, reduce_latvec, kdota):
+    tmp = dot(kpt123.reshape(1,-1), reduce_latvec) + kdota
+    return np.where(abs(tmp - np.rint(tmp)).sum(axis=1)<1e-6)[0][0]
+
+
+def get_molint_from_C(cell, C_ks, kpts, mo_slices=None, exxdiv=None,
+                      erifile=None, dataname="eris", basis_ks=None):
+    """
+    Args:
+        C_ks : list or h5py group
+            If list, the MO coeff for the k-th kpt is C_ks[k]
+            If h5py, the MO coeff for the k-th kpt is C_ks["%d"%k][()]
+            Note: this function assumes that MOs from different kpts are appropriately padded.
+        mo_slices
+        erifile: str, h5py File or h5py Group
+            The file to store the ERIs. If not given, the ERIs are held in memory.
+    """
+    cput0 = (logger.process_clock(), logger.perf_counter())
+
+    nkpts = len(kpts)
+    if basis_ks is None:
+        mesh = cell.mesh
+    else:
+        mesh = basis_ks[0].mesh
+    coords = cell.get_uniform_grids(mesh=mesh)
+    ngrids = coords.shape[0]
+    fac = ngrids**3. / cell.vol / nkpts
+
+    reduce_latvec = cell.lattice_vectors() / (2*np.pi)
+    kdota = dot(kpts, reduce_latvec)
+
+    swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+    fswap = lib.H5TmpFile(swapfile.name)
+    swapfile = None
+
+    C_ks_R = fswap.create_group("C_ks_R")
+    for k in range(nkpts):
+        C_k = get_kcomp(C_ks, k)
+        basis = None if basis_ks is None else basis_ks[k]
+        # C_k = tools.ifft(C_k, mesh)
+        C_k = wf_ifft(C_k, mesh, basis=basis)
+        set_kcomp(C_k, C_ks_R, k)
+        C_k = None
+
+    dtype = np.complex128
+    if mo_slices is None:
+        nmo = get_kcomp(C_ks, 0, load=False).shape[0]
+        mo_slices = [(0,nmo)] * 4
+    nmos = [mo_slice[1]-mo_slice[0] for mo_slice in mo_slices]
+    buf = np.empty(nmos[0]*nmos[1]*ngrids, dtype=dtype)
+    mo_ranges = [list(range(mo_slice[0],mo_slice[1])) for mo_slice in mo_slices]
+
+    if erifile is None:
+        incore = True
+        deri = np.zeros((nkpts,nkpts,nkpts,*nmos), dtype=dtype)
+    elif isinstance(erifile, (str, h5py.Group)):
+        incore = False
+        if isinstance(erifile, str):
+            if h5py.is_hdf5(erifile):
+                feri = h5py.File(erifile, "a")
+            else:
+                feri = h5py.File(erifile, "w")
+        else:
+            assert(isinstance(erifile, h5py.Group))
+            feri = erifile
+        if dataname in feri: del feri[dataname]
+        deri = feri.create_dataset(dataname, (nkpts,nkpts,nkpts,*nmos),
+                                   dtype=dtype)
+        buf2 = np.empty(nmos, dtype=dtype)
+    else:
+        raise RuntimeError
+
+    cput1 = logger.timer(cell, 'initialize pwmolint', *cput0)
+
+    tick = np.zeros(2)
+    tock = np.zeros(2)
+    tspans = np.zeros((4,2))
+    tcomps = ["init", "v_ks_R", "eri", "tot"]
+
+    for k1 in range(nkpts):
+        kpt1 = kpts[k1]
+        p0,p1 = mo_slices[0]
+        C_k1_R = get_kcomp(C_ks_R, k1, occ=mo_ranges[0])
+        for k2 in range(nkpts):
+            tick[:] = (logger.process_clock(), logger.perf_counter())
+
+            kpt2 = kpts[k2]
+            kpt12 = kpt2 - kpt1
+            q0,q1 = mo_slices[1]
+            C_k2_R = get_kcomp(C_ks_R, k2, occ=mo_ranges[1])
+            coulG_k12 = tools.get_coulG(cell, kpt12, exx=exxdiv, mesh=mesh)
+# FIXME: batch appropriately
+            v_pq_k12 = np.ndarray((nmos[0],nmos[1],ngrids), dtype=dtype,
+                                  buffer=buf)
+            for p in range(p0,p1):
+                ip = p - p0
+                v_pq_k12[ip] = tools.ifft(tools.fft(C_k1_R[ip].conj() * C_k2_R,
+                                          mesh) * coulG_k12, mesh)
+
+            tock[:] = (logger.process_clock(), logger.perf_counter())
+            tspans[1] += tock - tick
+
+            for k3 in range(nkpts):
+                kpt3 = kpts[k3]
+                kpt123 = kpt12 - kpt3
+                k4 = kconserv(kpt123, reduce_latvec, kdota)
+                kpt4 = kpts[k4]
+                kpt1234 = kpt123 + kpt4
+                phase = np.exp(1j*lib.dot(coords,
+                               kpt1234.reshape(-1,1))).reshape(-1)
+
+                r0,r1 = mo_slices[2]
+                C_k3_R = get_kcomp(C_ks_R, k3, occ=mo_ranges[2])
+                s0,s1 = mo_slices[3]
+                C_k4_R = get_kcomp(C_ks_R, k4, occ=mo_ranges[3]) * phase
+
+                if incore:
+                    vpqrs = deri[k1,k2,k3]
+                else:
+                    vpqrs = np.ndarray(nmos, dtype=dtype, buffer=buf2)
+                for r in range(r0,r1):
+                    ir = r - r0
+                    rho_rs_k34 = C_k3_R[ir].conj() * C_k4_R
+                    vpqrs[:,:,ir] = dot(v_pq_k12.reshape(-1,ngrids),
+                                        rho_rs_k34.T).reshape(nmos[0],nmos[1],nmos[-1])
+                vpqrs *= fac
+                if not incore:
+                    deri[k1,k2,k3,:] = vpqrs
+                    vpqrs = None
+            tick[:] = (logger.process_clock(), logger.perf_counter())
+            tspans[2] += tick - tock
+
+        tock[:] = (logger.process_clock(), logger.perf_counter())
+        cput1 = logger.timer(cell, 'kpt %d (%6.3f %6.3f %6.3f)'%(k1,*kpt1),
+                             *cput1)
+
+    fswap.close()
+
+    cput1 = logger.timer(cell, 'pwmolint', *cput0)
+    tspans[3] = np.asarray(cput1) - np.asarray(cput0)
+
+# dump timing
+    def write_time(comp, t_comp, t_tot):
+        tc, tw = t_comp
+        tct, twt = t_tot
+        rc = tc / tct * 100
+        rw = tw / twt * 100
+        fmtstr = 'CPU time for %10s %9.2f  ( %6.2f%% ), wall time %9.2f  ( %6.2f%% )'
+        logger.debug1(cell, fmtstr, comp.ljust(10), tc, rc, tw, rw)
+
+    t_tot = tspans[-1]
+    for icomp,comp in enumerate(tcomps):
+        write_time(comp, tspans[icomp], t_tot)
+
+    return deri
+
+
+if __name__ == "__main__":
+    from pyscf.pbc import pwscf, gto
+
+    atom = "H 0 0 0; H 0.9 0 0"
+    a = np.eye(3) * 3
+    basis = "gth-szv"
+    pseudo = "gth-pade"
+
+    ke_cutoff = 30
+
+    cell = gto.Cell(atom=atom, a=a, basis=basis, pseudo=pseudo,
+                    ke_cutoff=ke_cutoff)
+    cell.build()
+    cell.verbose = 5
+
+    kmesh = [2,1,1]
+    kpts = cell.make_kpts(kmesh)
+    nkpts = len(kpts)
+
+    nvir = 5
+    chkfile = "mf.chk"
+    mf = pwscf.KRHF(cell, kpts)
+    mf.nvir = nvir
+    # mf.init_guess = "chk"
+    mf.chkfile = chkfile
+    mf.kernel()
+
+    mmp = pwscf.KMP2(mf)
+    mmp.kernel()
+
+    fchk = h5py.File(chkfile, "r")
+    C_ks = fchk["mo_coeff"]
+
+    swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+    erifile = swapfile.name
+    swapfile = None
+
+    no = cell.nelectron // 2
+    nmo = no + nvir
+    mo_slices = [(0,no),(no,nmo),(0,no),(no,nmo)]
+    feri = get_molint_from_C(cell, C_ks, mo_slices, kpts, exxdiv=None,
+                             erifile=erifile, dataname="eris")
+
+    fchk.close()
diff --git a/pyscf/pbc/pwscf/chkfile.py b/pyscf/pbc/pwscf/chkfile.py
new file mode 100644
index 000000000..b8edbfb85
--- /dev/null
+++ b/pyscf/pbc/pwscf/chkfile.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+import h5py
+import numpy as np
+from pyscf.lib.chkfile import load, save, save_mol
+from pyscf.pbc.lib.chkfile import load_cell
+from pyscf.pbc.pwscf.pw_helper import get_kcomp
+
+
+def load_scf(chkfile):
+    return load_cell(chkfile), load(chkfile, 'scf')
+
+
+def dump_scf(mol, chkfile, e_tot, mo_energy, mo_occ, mo_coeff,
+             overwrite_mol=True):
+    if h5py.is_hdf5(chkfile) and not overwrite_mol:
+        with h5py.File(chkfile, 'a') as fh5:
+            if 'mol' not in fh5:
+                fh5['mol'] = mol.dumps()
+    else:
+        save_mol(mol, chkfile)
+
+    scf_dic = {'e_tot'    : e_tot,
+               'mo_energy': mo_energy,
+               'mo_occ'   : mo_occ,}
+    save(chkfile, 'scf', scf_dic)
+
+    # save mo_coeff only if incore mode
+    if mo_coeff is not None:
+        with h5py.File(chkfile, "a") as f:
+            if "mo_coeff" in f: del f["mo_coeff"]
+            C_ks = f.create_group("mo_coeff")
+
+            if isinstance(mo_energy[0], np.ndarray):
+                nkpts = len(mo_coeff)
+                for k in range(nkpts):
+                    C_ks["%d"%k] = get_kcomp(mo_coeff, k)
+            else:
+                ncomp = len(mo_energy)
+                nkpts = len(mo_energy[0])
+                for comp in range(ncomp):
+                    C_ks_comp = C_ks.create_group("%d"%comp)
+                    mo_coeff_comp = get_kcomp(mo_coeff, comp, load=False)
+                    for k in range(nkpts):
+                        C_ks_comp["%d"%k] = get_kcomp(mo_coeff_comp, k)
+
+
+def load_mo_coeff(C_ks):
+    if isinstance(C_ks["0"], h5py.Group):
+        ncomp = len(C_ks)
+        mo_coeff = [load_mo_coeff(C_ks["%d"%comp]) for comp in range(ncomp)]
+    else:
+        nkpts = len(C_ks)
+        mo_coeff = [C_ks["%d"%k][()] for k in range(nkpts)]
+
+    return mo_coeff
diff --git a/pyscf/pbc/pwscf/jk.py b/pyscf/pbc/pwscf/jk.py
new file mode 100644
index 000000000..66e8bfed6
--- /dev/null
+++ b/pyscf/pbc/pwscf/jk.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+'''J/K builder for PW-SCF
+'''
+
+import tempfile
+import numpy as np
+
+from pyscf.pbc import tools
+from pyscf.pbc.pwscf.pw_helper import (get_kcomp, set_kcomp, acc_kcomp,
+                                       scale_kcomp, wf_fft, wf_ifft)
+from pyscf.pbc.lib.kpts_helper import member, is_zero
+from pyscf import lib
+from pyscf import __config__
+
+
+THR_OCC = 1e-10
+
+
+def _mul_by_occ_(C_k, mocc_k, occ=None):
+    # TODO this won't work with some types of smearing
+    # because it assumes positive occupations
+    if occ is None:
+        occ = np.where(mocc_k > THR_OCC)[0].tolist()
+    occ = np.sqrt(mocc_k[occ])
+    C_k[:] *= occ[:, None]
+
+
+def get_rho_R(C_ks, mocc_ks, mesh, basis_ks=None):
+    """
+    Normalization is (1.0 / nkpts) * ng * rho_R.sum() = nelec
+    """
+    nkpts = len(C_ks)
+    rho_R = 0.
+    for k in range(nkpts):
+        occ = np.where(mocc_ks[k] > THR_OCC)[0].tolist()
+        Co_k = get_kcomp(C_ks, k, occ=occ)
+        basis = None if basis_ks is None else basis_ks[k]
+        Co_k_R = wf_ifft(Co_k, mesh, basis)
+        _mul_by_occ_(Co_k_R, mocc_ks[k], occ)
+        rho_R += np.einsum("ig,ig->g", Co_k_R.conj(), Co_k_R).real
+    return rho_R
+
+
+def apply_j_kpt(C_k, mesh, vj_R, C_k_R=None, basis=None):
+    if C_k_R is None: C_k_R = wf_ifft(C_k, mesh, basis)
+    return wf_fft(C_k_R * vj_R, mesh, basis)
+
+
+def apply_k_kpt(cell, C_k, kpt1, C_ks, mocc_ks, kpts, mesh, Gv,
+                C_k_R=None, C_ks_R=None, exxdiv=None,
+                basis=None, basis_ks=None):
+    r""" Apply the EXX operator to given MOs
+
+    Math:
+        Cbar_k(G) = \sum_{j,k'} \sum_{G'} rho_{jk',ik}(G') v(k-k'+G') C_k(G-G')
+    Code:
+        rho_r = C_ik_r * C_jk'_r.conj()
+        rho_G = FFT(rho_r)
+        coulG = get_coulG(k-k')
+        v_r = iFFT(rho_G * coulG)
+        Cbar_ik_G = FFT(v_r * C_jk'_r)
+    """
+    ngrids = Gv.shape[0]
+    nkpts = len(kpts)
+    fac = ngrids**2./(cell.vol*nkpts)
+    if basis_ks is None:
+        basis_ks = [None] * len(mocc_ks)
+
+    Cbar_k = np.zeros_like(C_k)
+    if C_k_R is None: C_k_R = wf_ifft(C_k, mesh, basis=basis)
+
+    for k2 in range(nkpts):
+        kpt2 = kpts[k2]
+        coulG = tools.get_coulG(cell, kpt1-kpt2, exx=False, mesh=mesh, Gv=Gv)
+
+        occ = np.where(mocc_ks[k2]>THR_OCC)[0]
+        no_k2 = occ.size
+        if C_ks_R is None:
+            Co_k2 = get_kcomp(C_ks, k2, occ=occ)
+            Co_k2_R = wf_ifft(Co_k2, mesh, basis=basis_ks[k2])
+            Co_k2 = None
+        else:
+            Co_k2_R = get_kcomp(C_ks_R, k2, occ=occ)
+        _mul_by_occ_(Co_k2_R, mocc_ks[k2], occ)
+        for j in range(no_k2):
+            Cj_k2_R = Co_k2_R[j]
+            vij_R = tools.ifft(
+                tools.fft(C_k_R * Cj_k2_R.conj(), mesh) * coulG, mesh)
+            Cbar_k += vij_R * Cj_k2_R
+
+    Cbar_k = wf_fft(Cbar_k, mesh, basis=basis) * fac
+
+    return Cbar_k
+
+
+def apply_k_kpt_support_vec(C_k, W_k):
+    Cbar_k = lib.dot(lib.dot(C_k, W_k.conj().T), W_k)
+    return Cbar_k
+
+
+def apply_k_s1(cell, C_ks, mocc_ks, kpts, Ct_ks, ktpts, mesh, Gv, out=None,
+               outcore=False, basis_ks=None):
+    nkpts = len(kpts)
+    nktpts = len(ktpts)
+    ngrids = np.prod(mesh)
+    fac = ngrids**2./(cell.vol*nkpts)
+    occ_ks = [np.where(mocc_ks[k] > THR_OCC)[0] for k in range(nkpts)]
+    if basis_ks is None:
+        basis_ks = [None] * len(C_ks)
+
+    if out is None: out = [None] * nktpts
+
+# swap file to hold FFTs
+    if outcore:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        swapfile = None
+        Co_ks_R = fswap.create_group("Co_ks_R")
+        Ct_ks_R = fswap.create_group("Ct_ks_R")
+    else:
+        Co_ks_R = [None] * nkpts
+        Ct_ks_R = [None] * nktpts
+
+    for k in range(nkpts):
+        Co_k = get_kcomp(C_ks, k, occ=occ_ks[k])
+        _mul_by_occ_(Co_k, mocc_ks[k], occ_ks[k])
+        set_kcomp(wf_ifft(Co_k, mesh, basis_ks[k]), Co_ks_R, k)
+        Co_k = None
+
+    for k in range(nktpts):
+        Ct_k = get_kcomp(Ct_ks, k)
+        set_kcomp(wf_ifft(Ct_k, mesh, basis_ks[k]), Ct_ks_R, k)
+        Ct_k = None
+
+    for k1,kpt1 in enumerate(ktpts):
+        Ct_k1_R = get_kcomp(Ct_ks_R, k1)
+        Ctbar_k1 = np.zeros_like(Ct_k1_R)
+        for k2,kpt2 in enumerate(kpts):
+            coulG = tools.get_coulG(cell, kpt1-kpt2, exx=False, mesh=mesh,
+                                    Gv=Gv)
+            Co_k2_R = get_kcomp(Co_ks_R, k2)
+            for j in occ_ks[k2]:
+                Cj_k2_R = Co_k2_R[j]
+                vij_R = tools.ifft(tools.fft(Ct_k1_R * Cj_k2_R.conj(), mesh) *
+                                   coulG, mesh)
+                Ctbar_k1 += vij_R * Cj_k2_R
+
+        Ctbar_k1 = wf_fft(Ctbar_k1, mesh, basis_ks[k1]) * fac
+        set_kcomp(Ctbar_k1, out, k1)
+        Ctbar_k1 = None
+
+    return out
+
+
+def apply_k_s2(cell, C_ks, mocc_ks, kpts, mesh, Gv, out=None, outcore=False,
+               basis_ks=None):
+    nkpts = len(kpts)
+    ngrids = np.prod(mesh)
+    fac = ngrids**2./(cell.vol*nkpts)
+    occ_ks = [np.where(mocc_ks[k] > THR_OCC)[0] for k in range(nkpts)]
+    if basis_ks is None:
+        basis_ks = [None] * len(C_ks)
+
+    if out is None: out = [None] * nkpts
+
+    if isinstance(C_ks, list):
+        n_ks = [C_ks[k].shape[0] for k in range(nkpts)]
+    else:
+        n_ks = [C_ks["%d"%k].shape[0] for k in range(nkpts)]
+    no_ks = [np.sum(mocc_ks[k]>THR_OCC) for k in range(nkpts)]
+    n_max = np.max(n_ks)
+    no_max = np.max(no_ks)
+
+    # TODO: non-aufbau configurations
+    for k in range(nkpts):
+        if np.sum(mocc_ks[k][:no_ks[k]]>THR_OCC) != no_ks[k]:
+            raise NotImplementedError("Non-aufbau configurations are not supported.")
+
+    if outcore:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        swapfile = None
+        C_ks_R = fswap.create_group("C_ks_R")
+    else:
+        C_ks_R = [None] * nkpts
+
+    for k in range(nkpts):
+        C_k = get_kcomp(C_ks, k)
+        set_kcomp(wf_ifft(C_k, mesh, basis_ks[k]), C_ks_R, k)
+        set_kcomp(np.zeros((C_k.shape[0], np.prod(mesh)), dtype=np.complex128),
+                  out, k)
+        C_k = None
+
+    dtype = np.complex128
+
+    buf1 = np.empty(n_max*ngrids, dtype=dtype)
+    buf2 = np.empty(no_max*ngrids, dtype=dtype)
+    for k1, kpt1 in enumerate(kpts):
+        C_k1_R = get_kcomp(C_ks_R, k1)
+        no_k1 = no_ks[k1]
+        n_k1 = n_ks[k1]
+        Cbar_k1 = np.ndarray((n_k1,ngrids), dtype=dtype, buffer=buf1)
+        Cbar_k1.fill(0)
+
+        mocc_k1 = mocc_ks[k1][:no_k1]
+        for k2, kpt2 in enumerate(kpts):
+            if n_k1 == no_k1 and k2 > k1: continue
+
+            C_k2_R = get_kcomp(C_ks_R, k2)
+            no_k2 = no_ks[k2]
+
+            coulG = tools.get_coulG(cell, kpt1-kpt2, exx=False, mesh=mesh,
+                                    Gv=Gv)
+            mocc_k2 = mocc_ks[k2][:no_k2]
+
+            # o --> o
+            if k2 <= k1:
+                Cbar_k2 = np.ndarray((no_k2,ngrids), dtype=dtype, buffer=buf2)
+                Cbar_k2.fill(0)
+
+                for i in range(no_k1):
+                    jmax = i+1 if k2 == k1 else no_k2
+                    jmax2 = i if k2 == k1 else no_k2
+                    vji_R = tools.ifft(tools.fft(C_k2_R[:jmax].conj() *
+                                       C_k1_R[i], mesh) * coulG, mesh)
+                    Cbar_k1[i] += np.sum(
+                        vji_R * C_k2_R[:jmax] * mocc_k2[:jmax, None], axis=0
+                    )
+                    if jmax2 > 0:
+                        Co_k1_R_i = C_k1_R[i] * mocc_k1[i]
+                        Cbar_k2[:jmax2] += vji_R[:jmax2].conj() * Co_k1_R_i
+
+                acc_kcomp(Cbar_k2, out, k2, occ=occ_ks[k2])
+
+            # o --> v
+            if n_k1 > no_k1:
+                for j in range(no_ks[k2]):
+                    vij_R = tools.ifft(tools.fft(C_k1_R[no_k1:] *
+                                                 C_k2_R[j].conj(), mesh) *
+                                       coulG, mesh)
+                    Cbar_k1[no_k1:] += vij_R  * C_k2_R[j] * mocc_k2[j]
+
+        acc_kcomp(Cbar_k1, out, k1)
+
+    for k in range(nkpts):
+        set_kcomp(wf_fft(get_kcomp(out, k), mesh, basis_ks[k]) * fac, out, k)
+
+    return out
+
+
+def apply_k(cell, C_ks, mocc_ks, kpts, mesh, Gv, Ct_ks=None, ktpts=None,
+            exxdiv=None, out=None, outcore=False, basis_ks=None):
+    if Ct_ks is None:
+        return apply_k_s2(cell, C_ks, mocc_ks, kpts, mesh, Gv, out, outcore,
+                          basis_ks=basis_ks)
+    else:
+        return apply_k_s1(cell, C_ks, mocc_ks, kpts, Ct_ks, ktpts, mesh, Gv,
+                          out, outcore, basis_ks=basis_ks)
+
+
+def jk(mf, with_jk=None, ace_exx=True, outcore=False, mesh=None,
+       basis_ks=None):
+    if with_jk is None:
+        with_jk = PWJK(mf.cell, mf.kpts, exxdiv=mf.exxdiv,
+                       mesh=mesh, basis_ks=basis_ks)
+        with_jk.ace_exx = ace_exx
+        with_jk.outcore = outcore
+
+    mf.with_jk = with_jk
+
+    return mf
+
+
+def get_ace_support_vec(cell, C1_ks, mocc1_ks, k1pts, C2_ks=None, k2pts=None,
+                        out=None, mesh=None, Gv=None, exxdiv=None, method="cd",
+                        outcore=False, basis_ks=None):
+    """
+    Compute the ACE support vectors for orbitals given by C2_ks and the
+    corresponding k-points given by k2pts, using the Fock matrix obtained from
+    C1_ks, mocc1_ks, k1pts. If C2_ks and/or k2pts are not provided, their
+    values will be set to the C1_ks and/or k1pts. The results are saved to out
+    and returned.
+    """
+    from pyscf.pbc.pwscf.pseudo import get_support_vec
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+
+    if outcore:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        dname0 = "W_ks"
+        W_ks = fswap.create_group(dname0)
+    else:
+        W_ks = None
+
+    W_ks = apply_k(cell, C1_ks, mocc1_ks, k1pts, mesh, Gv,
+                   Ct_ks=C2_ks, ktpts=k2pts, exxdiv=exxdiv, out=W_ks,
+                   outcore=outcore, basis_ks=basis_ks)
+
+    if C2_ks is None: C2_ks = C1_ks
+    if k2pts is None: k2pts = k1pts
+    nk2pts = len(k2pts)
+
+    for k in range(nk2pts):
+        C_k = get_kcomp(C2_ks, k)
+        W_k = get_kcomp(W_ks, k)
+        W_k = get_support_vec(C_k, W_k, method=method)
+        set_kcomp(W_k, out, k)
+        W_k = None
+
+    if outcore:
+        del fswap[dname0]
+
+    return out
+
+
+class PWJK:
+
+    def __init__(self, cell, kpts, mesh=None, exxdiv=None, **kwargs):
+        self.cell = cell
+        self.kpts = kpts
+        if mesh is None: mesh = cell.mesh
+        self.mesh = mesh
+        self.Gv = cell.get_Gv(mesh)
+        self.exxdiv = exxdiv
+
+        # kwargs
+        self.ace_exx = kwargs.get("ace_exx", True)
+        self.outcore = kwargs.get("outcore", False)
+        self.basis_ks = kwargs.get("basis_ks", None)
+
+        # the following are not input options
+        self.exx_W_ks = None
+
+    def __init_exx(self):
+        if self.outcore:
+            self.swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+            self.fswap = lib.H5TmpFile(self.swapfile.name)
+            self.exx_W_ks = self.fswap.create_group("exx_W_ks")
+        else:
+            self.exx_W_ks = {}
+
+    def get_Gv(self, mesh):
+        if is_zero(np.asarray(mesh)-np.asarray(self.mesh)):
+            return self.Gv
+        else:
+            return self.cell.get_Gv(mesh)
+
+    def get_rho_R(self, C_ks, mocc_ks, mesh=None, Gv=None, ncomp=1):
+        if mesh is None: mesh = self.mesh
+        if Gv is None: Gv = self.get_Gv(mesh)
+        if ncomp == 1:
+            rho_R = get_rho_R(C_ks, mocc_ks, mesh, self.basis_ks)
+        else:
+            rho_R = 0.
+            for comp in range(ncomp):
+                C_ks_comp = get_kcomp(C_ks, comp, load=False)
+                rho_R += get_rho_R(C_ks_comp, mocc_ks[comp], mesh,
+                                   self.basis_ks)
+        return rho_R
+
+    def get_vj_R_from_rho_R(self, rho_R, mesh=None, Gv=None):
+        if mesh is None: mesh = self.mesh
+        if Gv is None: Gv = self.get_Gv(mesh)
+        cell = self.cell
+        nkpts = len(self.kpts)
+        ngrids = Gv.shape[0]
+        fac = ngrids**2 / (cell.vol*nkpts)
+        vj_R = tools.ifft(tools.fft(rho_R, mesh) * tools.get_coulG(cell, Gv=Gv),
+                          mesh).real * fac
+        return vj_R
+
+    def get_vj_R(self, C_ks, mocc_ks, mesh=None, Gv=None, ncomp=1):
+        if mesh is None: mesh = self.mesh
+        if Gv is None: Gv = self.get_Gv(mesh)
+        rho_R = self.get_rho_R(C_ks, mocc_ks, mesh, Gv, ncomp)
+        vj_R = self.get_vj_R_from_rho_R(rho_R, mesh, Gv)
+
+        return vj_R
+
+    def update_k_support_vec(self, C_ks, mocc_ks, kpts, Ct_ks=None,
+                             mesh=None, Gv=None, exxdiv=None, comp=None):
+        if self.exx_W_ks is None:
+            self.__init_exx()
+
+        if mesh is None:
+            mesh = self.mesh
+
+        nkpts = len(kpts)
+
+        if comp is None:
+            out = self.exx_W_ks
+        elif isinstance(comp, int):
+            keycomp = "%d" % comp
+            if keycomp not in self.exx_W_ks:
+                if self.outcore:
+                    self.exx_W_ks.create_group(keycomp)
+                else:
+                    self.exx_W_ks[keycomp] = {}
+            out = self.exx_W_ks[keycomp]
+        else:
+            raise RuntimeError("comp must be None or int")
+
+        if self.ace_exx:
+            out = get_ace_support_vec(self.cell, C_ks, mocc_ks, kpts,
+                                      C2_ks=Ct_ks, k2pts=kpts, out=out,
+                                      mesh=mesh, Gv=Gv, exxdiv=exxdiv,
+                                      method="cd", outcore=self.outcore,
+                                      basis_ks=self.basis_ks)
+        else:   # store ifft of Co_ks
+            if mesh is None: mesh = self.mesh
+            for k in range(nkpts):
+                occ = np.where(mocc_ks[k]>THR_OCC)[0]
+                Co_k = get_kcomp(C_ks, k, occ=occ)
+                basis = None if self.basis_ks is None else self.basis_ks[k]
+                set_kcomp(wf_ifft(Co_k, mesh, basis), out, k)
+
+    def apply_j_kpt(self, C_k, mesh=None, vj_R=None, C_k_R=None, basis=None):
+        if mesh is None: mesh = self.mesh
+        if vj_R is None: vj_R = self.vj_R
+        return apply_j_kpt(C_k, mesh, vj_R, C_k_R=C_k_R, basis=basis)
+
+    def apply_k_kpt(self, C_k, kpt, mesh=None, Gv=None, exxdiv=None, comp=None,
+                    basis=None):
+        if comp is None:
+            W_ks = self.exx_W_ks
+        elif isinstance(comp, int):
+            W_ks = get_kcomp(self.exx_W_ks, comp, load=False)
+        else:
+            raise RuntimeError("comp must be None or int.")
+
+        if self.ace_exx:
+            k = member(kpt, self.kpts)[0]
+            W_k = get_kcomp(W_ks, k)
+            return apply_k_kpt_support_vec(C_k, W_k)
+        else:
+            cell = self.cell
+            kpts = self.kpts
+            nkpts = len(kpts)
+            if mesh is None: mesh = self.mesh
+            if Gv is None: Gv = self.get_Gv(mesh)
+            if exxdiv is None: exxdiv = self.exxdiv
+            mocc_ks = [np.ones(get_kcomp(W_ks, k, load=False).shape[0])*2
+                       for k in range(nkpts)]
+            return apply_k_kpt(cell, C_k, kpt, None, mocc_ks, kpts, mesh, Gv,
+                               C_ks_R=W_ks, exxdiv=exxdiv, basis=basis,
+                               basis_ks=self.basis_ks)
diff --git a/pyscf/pbc/pwscf/kccsd_rhf.py b/pyscf/pbc/pwscf/kccsd_rhf.py
new file mode 100644
index 000000000..4a49af389
--- /dev/null
+++ b/pyscf/pbc/pwscf/kccsd_rhf.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+""" Restricted CCSD in plane-wave basis
+"""
+
+import h5py
+import numpy as np
+
+from pyscf.pbc import cc
+from pyscf.pbc.mp.kmp2 import (get_nocc, get_nmo, get_frozen_mask,
+                               padded_mo_energy, padding_k_idx)
+from pyscf.pbc.pwscf.pw_helper import get_kcomp
+from pyscf.pbc.pwscf.ao2mo.molint import get_molint_from_C
+from pyscf.pbc.pwscf.khf import THR_OCC
+from pyscf import lib
+from pyscf.lib import logger
+
+
+def padded_mo_coeff(mp, mo_coeff):
+    frozen_mask = get_frozen_mask(mp)
+    padding_convention = padding_k_idx(mp, kind="joint")
+    nkpts = mp.nkpts
+
+    result = []
+    for k in range(nkpts):
+        res = np.zeros((mp.nmo, mo_coeff[k].shape[1]),
+                        dtype=mo_coeff[k].dtype)
+        slicing = np.ix_(padding_convention[k], np.arange(res.shape[1]))
+        res[slicing] = mo_coeff[k][frozen_mask[k], :]
+        result.append(res)
+
+    return result
+
+
+class PWKRCCSD:
+    """
+    Restricted CCSD in plane-wave basis.
+
+    Attribute `frozen` is the same as in GTO CCSD.
+
+    Correlation energy can be accessed from the `e_corr` property.
+    Other CCSD results can be accessed from the underlaying
+    `mcc` (Periodic RCCSD, which contains e_tot, t-amplitudes, etc.)
+    after `kernel` is called.
+    """
+
+    def __init__(self, mf, frozen=None):
+        self._scf = mf
+        self.mo_occ = mf.mo_occ
+        self.mo_energy = mf.mo_energy
+        self.frozen = frozen
+        self.kpts = mf.kpts
+        self.mcc = None
+
+        # not input options
+        self._nmo = None
+        self._nocc = None
+        self.nkpts = len(self.kpts)
+
+    def kernel(self, eris=None):
+        cput0 = (logger.process_clock(), logger.perf_counter())
+        if eris is None: eris = self.ao2mo()
+        cput0 = logger.timer(self._scf, 'CCSD init eri', *cput0)
+        self.mcc = cc.kccsd_rhf.RCCSD(self._scf)
+        self.mcc.kernel(eris=eris)
+
+        return self.mcc
+
+    def ao2mo(self):
+        return _ERIS(self)
+
+    @property
+    def e_corr(self):
+        if self.mcc is None:
+            raise RuntimeError("kernel must be called first.")
+        return self.mcc.e_corr
+
+# mimic KMP2
+    @property
+    def nmo(self):
+        return self.get_nmo()
+    @nmo.setter
+    def nmo(self, n):
+        self._nmo = n
+
+    @property
+    def nocc(self):
+        return self.get_nocc()
+    @nocc.setter
+    def nocc(self, n):
+        self._nocc = n
+
+    get_nocc = get_nocc
+    get_nmo = get_nmo
+    get_frozen_mask = get_frozen_mask
+
+
+class _ERIS:
+    def __init__(self, cc):
+        mf = cc._scf
+        cell = mf.cell
+        kpts = mf.kpts
+        nkpts = len(kpts)
+
+        mo_energy = mf.mo_energy
+        mo_occ = mf.mo_occ
+        with h5py.File(mf.chkfile, "r") as f:
+            mo_coeff = [get_kcomp(f["mo_coeff"], k) for k in range(nkpts)]
+
+# padding
+        mo_coeff = padded_mo_coeff(cc, mo_coeff)
+        mo_energy = padded_mo_energy(cc, mo_energy)
+        mo_occ = padded_mo_energy(cc, mo_occ)
+
+        self.e_hf = mf.e_tot
+        self.mo_energy = np.asarray(mo_energy)
+# remove ewald correction
+        moe_noewald = np.zeros_like(self.mo_energy)
+        for k in range(nkpts):
+            moe = self.mo_energy[k].copy()
+            moe[mo_occ[k]>THR_OCC] += mf._madelung
+            moe_noewald[k] = moe
+        self.fock = np.asarray(
+            [np.diag(moe.astype(np.complex128)) for moe in moe_noewald]
+        )
+
+        eris = get_molint_from_C(
+            cell, mo_coeff, kpts, basis_ks=mf._basis_data
+        ).transpose(0,2,1,3,5,4,6)
+
+        no = cc.nocc
+        self.oooo = eris[:,:,:,:no,:no,:no,:no]
+        self.ooov = eris[:,:,:,:no,:no,:no,no:]
+        self.oovv = eris[:,:,:,:no,:no,no:,no:]
+        self.ovov = eris[:,:,:,:no,no:,:no,no:]
+        self.voov = eris[:,:,:,no:,:no,:no,no:]
+        self.vovv = eris[:,:,:,no:,:no,no:,no:]
+        self.vvvv = eris[:,:,:,no:,no:,no:,no:]
+
+        eris = None
+
+
+if __name__ == "__main__":
+    a0 = 1.78339987
+    atom = "C 0 0 0; C %.10f %.10f %.10f" % (a0*0.5, a0*0.5, a0*0.5)
+    a = np.asarray([
+            [0., a0, a0],
+            [a0, 0., a0],
+            [a0, a0, 0.]])
+
+    from pyscf.pbc import gto, scf, pwscf
+    cell = gto.Cell(atom=atom, a=a, basis="gth-szv", pseudo="gth-pade",
+                    ke_cutoff=50)
+    cell.build()
+    cell.verbose = 5
+
+    kpts = cell.make_kpts([2,1,1])
+
+    mf = scf.KRHF(cell, kpts)
+    mf.kernel()
+
+    mcc = cc.kccsd_rhf.RCCSD(mf)
+    mcc.kernel()
+
+    from pyscf.pbc.pwscf.pw_helper import gtomf2pwmf
+    pwmf = gtomf2pwmf(mf)
+    pwmcc = PWKRCCSD(pwmf).kernel()
+
+    assert(np.abs(mcc.e_corr - pwmcc.e_corr) < 1e-5)
diff --git a/pyscf/pbc/pwscf/khf.py b/pyscf/pbc/pwscf/khf.py
new file mode 100644
index 000000000..82f15cff7
--- /dev/null
+++ b/pyscf/pbc/pwscf/khf.py
@@ -0,0 +1,2036 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+""" Hartree-Fock in the Plane Wave Basis
+"""
+
+
+import os
+import sys
+import copy
+import h5py
+import tempfile
+import numpy as np
+import scipy.linalg
+
+from pyscf import lib
+from pyscf import __config__
+from pyscf.scf import hf as mol_hf
+from pyscf.pbc.scf import khf as pbc_hf
+from pyscf.scf import chkfile as mol_chkfile
+from pyscf.pbc.pwscf import chkfile
+from pyscf.pbc import gto, scf, tools
+from pyscf.pbc.pwscf import pw_helper
+from pyscf.pbc.pwscf.pw_helper import get_kcomp, set_kcomp
+from pyscf.pbc.pwscf import pseudo as pw_pseudo
+from pyscf.pbc.pwscf import jk as pw_jk
+from pyscf.lib import logger
+import pyscf.lib.parameters as param
+
+from pyscf.pbc.lib.kpts_helper import member
+
+
+# TODO APIs for getting CPW and CPW virtuals
+
+THR_OCC = 1E-3
+
+
+def kernel_doubleloop(mf, C0=None,
+                      nbandv=0, nbandv_extra=1,
+                      conv_tol=1.E-6,
+                      conv_tol_davidson=1.E-6, conv_tol_band=1e-4,
+                      max_cycle=100, max_cycle_davidson=10, verbose_davidson=0,
+                      ace_exx=True, damp_type="anderson", damp_factor=0.3,
+                      dump_chk=True, conv_check=True, callback=None, **kwargs):
+    '''Kernel function for SCF in a PW basis
+
+    Note:
+        This double-loop implementation follows closely the implementation in Quantum ESPRESSO.
+
+    Args:
+        C0 (list of numpy arrays):
+            A list of nkpts numpy arrays, each of size nocc(k) * Npw.
+        nbandv (int):
+            How many virtual bands to compute? Default is zero.
+        nbandv_extra (int):
+            How many extra virtual bands to include to facilitate the
+            convergence of the davidson algorithm for the highest few
+            virtual bands? Default is 1.
+    '''
+    log = logger.Logger(mf.stdout, mf.verbose)
+    cput0 = (logger.process_clock(), logger.perf_counter())
+
+    cell = mf.cell
+
+    nbando, nbandv_tot, nband, nband_tot = mf.get_nband(nbandv, nbandv_extra)
+    log.info("Num of occ bands= %s", nbando)
+    log.info("Num of vir bands= %s", nbandv)
+    log.info("Num of all bands= %s", nband)
+    log.info("Num of extra vir bands= %s", nbandv_extra)
+
+    # init guess and SCF chkfile
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+
+    if mf.outcore:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        swapfile = None
+        C_ks = fswap.create_group("C_ks")
+    else:
+        fswap = None
+        C_ks = None
+    C_ks, mocc_ks = mf.get_init_guess(nvir=nbandv_tot, C0=C0, out=C_ks)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    mf.scf_summary["t-init"] = tock - tick
+
+    single_loop = False
+    chg_conv_tol = 0.1
+    chg_conv_tol_band = None
+    chg_conv_tol_davidson = 0.001
+    chg_max_cycle = max_cycle
+    if mf.istype("KRKS") or mf.istype("KUKS"):
+        if not mf._numint.libxc.is_hybrid_xc(mf.xc):
+            if mf.with_pp.pptype != "ccecp":
+                single_loop = True
+                chg_conv_tol = conv_tol
+                chg_conv_tol_band = conv_tol_band
+                chg_conv_tol_davidson = 0.01 * conv_tol
+                chg_max_cycle = 4 * max_cycle
+
+    # init E
+    mesh = mf.wf_mesh
+    Gv = cell.get_Gv(mesh)
+    vj_R = mf.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+    mf.update_pp(C_ks)
+    mf.update_k(C_ks, mocc_ks)
+    # charge SCF with very looooooose convergence threshold
+    log.debug("Init charge cycle")
+    chg_scf_conv, fc_init, vj_R, C_ks, moe_ks, mocc_ks, e_tot = \
+                        mf.kernel_charge(
+                            C_ks, mocc_ks, nband, mesh=mesh, Gv=Gv,
+                            max_cycle=chg_max_cycle, conv_tol=chg_conv_tol,
+                            max_cycle_davidson=max_cycle_davidson,
+                            conv_tol_davidson=chg_conv_tol_davidson,
+                            verbose_davidson=verbose_davidson,
+                            damp_type=damp_type, damp_factor=damp_factor,
+                            vj_R=vj_R, single_loop=single_loop,
+                            conv_tol_band=chg_conv_tol_band)
+
+    if single_loop:
+        scf_conv = chg_scf_conv
+        # remove extra virtual bands before return
+        remove_extra_virbands(C_ks, moe_ks, mocc_ks, nbandv_extra)
+        if dump_chk:
+            mf.dump_chk(locals())
+        if callable(callback):
+            callback(locals())
+        if mf.outcore:
+            C_ks = chkfile.load_mo_coeff(C_ks)
+            fswap.close()
+        cput1 = (logger.process_clock(), logger.perf_counter())
+        mf.scf_summary["t-tot"] = np.asarray(cput1) - np.asarray(cput0)
+        log.debug('    CPU time for %s %9.2f sec, wall time %9.2f sec',
+                  "scf_cycle", *mf.scf_summary["t-tot"])
+        # A post-processing hook before return
+        mf.post_kernel(locals())
+        return scf_conv, e_tot, moe_ks, C_ks, mocc_ks
+
+    log.info('init E= %.15g', e_tot)
+    if mf.exxdiv == "ewald":
+        moe_ks = ewald_correction(moe_ks, mocc_ks, mf.madelung)
+    mf.dump_moe(moe_ks, mocc_ks, nband=nband)
+
+    scf_conv = False
+
+    if mf.max_cycle <= 0:
+        remove_extra_virbands(C_ks, moe_ks, mocc_ks, nbandv_extra)
+        return scf_conv, e_tot, moe_ks, C_ks, mocc_ks
+
+    if dump_chk and mf.chkfile:
+        # Explicit overwrite the mol object in chkfile
+        # Note in pbc.scf, mf.mol == mf.cell, cell is saved under key "mol"
+        mol_chkfile.save_mol(cell, mf.chkfile)
+
+    cput1 = log.timer('initialize pwscf', *cput0)
+
+    fc_tot = fc_init
+    fc_this = 0
+    chg_conv_tol = 0.1
+    de = None
+    for cycle in range(max_cycle):
+
+        last_hf_e = e_tot
+        last_hf_moe = moe_ks
+
+        if cycle == 0:
+            # update coulomb potential, support vecs for PP & EXX
+            vj_R = mf.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+            mf.update_pp(C_ks)
+            mf.update_k(C_ks, mocc_ks)
+
+        if cycle > 0:
+            chg_conv_tol = min(chg_conv_tol, max(conv_tol, 0.1*abs(de)))
+        conv_tol_davidson = max(conv_tol*0.1, chg_conv_tol*0.01)
+        log.debug("  Performing charge SCF with conv_tol= %.3g "
+                  "conv_tol_davidson= %.3g", chg_conv_tol, conv_tol_davidson)
+
+        # charge SCF
+        chg_scf_conv, fc_this, vj_R, C_ks, moe_ks, mocc_ks, e_tot = \
+                            mf.kernel_charge(
+                                C_ks, mocc_ks, nband, mesh=mesh, Gv=Gv,
+                                max_cycle=max_cycle, conv_tol=chg_conv_tol,
+                                max_cycle_davidson=max_cycle_davidson,
+                                conv_tol_davidson=conv_tol_davidson,
+                                verbose_davidson=verbose_davidson,
+                                damp_type=damp_type, damp_factor=damp_factor,
+                                vj_R=vj_R,
+                                last_hf_e=e_tot)
+        fc_tot += fc_this
+        if not chg_scf_conv:
+            log.warn("  Charge SCF not converged.")
+
+        if mf.exxdiv == "ewald":
+            moe_ks = ewald_correction(moe_ks, mocc_ks, mf.madelung)
+        de_band = get_band_err(moe_ks, last_hf_moe, nband, joint=True)
+        de = e_tot - last_hf_e
+
+        # update coulomb potential, support vecs for PP & EXX
+        vj_R = mf.get_vj_R(C_ks, mocc_ks)
+        mf.update_pp(C_ks)
+        mf.update_k(C_ks, mocc_ks)
+
+        # ACE error
+        err_R = get_ace_error(mf, C_ks, moe_ks, mocc_ks, nband=nband,
+                              mesh=mesh, Gv=Gv, vj_R=vj_R)
+
+        log.info('cycle= %d E= %.15g  delta_E= %4.3g  |dEbnd|= %4.3g  '
+                 'R= %4.3g  %d FC (%d tot)', cycle+1, e_tot, de, de_band,
+                 err_R, fc_this, fc_tot)
+        mf.dump_moe(moe_ks, mocc_ks, nband=nband)
+
+        if callable(mf.check_convergence):
+            scf_conv = mf.check_convergence(locals())
+        elif abs(de) < conv_tol and abs(de_band) < conv_tol_band:
+            scf_conv = True
+
+        if dump_chk:
+            mf.dump_chk(locals())
+
+        if callable(callback):
+            callback(locals())
+
+        cput1 = log.timer('cycle= %d'%(cycle+1), *cput1)
+
+        if scf_conv:
+            break
+
+    if scf_conv and conv_check:
+        # An extra diagonalization, to remove level shift
+        last_hf_e = e_tot
+        last_hf_moe = moe_ks
+
+        chg_conv_tol = min(chg_conv_tol, max(conv_tol, 0.1*abs(de)))
+        conv_tol_davidson = max(conv_tol*0.1, chg_conv_tol*0.01)
+        log.debug("  Performing charge SCF with conv_tol= %.3g"
+                  " conv_tol_davidson= %.3g", chg_conv_tol, conv_tol_davidson)
+
+        chg_scf_conv, fc_this, vj_R, C_ks, moe_ks, mocc_ks, e_tot = \
+                            mf.kernel_charge(
+                                C_ks, mocc_ks, nband, mesh=mesh, Gv=Gv,
+                                max_cycle=max_cycle, conv_tol=chg_conv_tol,
+                                max_cycle_davidson=max_cycle_davidson,
+                                conv_tol_davidson=conv_tol_davidson,
+                                verbose_davidson=verbose_davidson,
+                                damp_type=damp_type, damp_factor=damp_factor,
+                                last_hf_e=e_tot)
+        fc_tot += fc_this
+
+        if mf.exxdiv == "ewald":
+            moe_ks = ewald_correction(moe_ks, mocc_ks, mf.madelung)
+        de_band = get_band_err(moe_ks, last_hf_moe, nband, joint=True)
+        de = e_tot - last_hf_e
+
+        # update coulomb potential, support vecs for PP & EXX
+        vj_R = mf.get_vj_R(C_ks, mocc_ks)
+        mf.update_pp(C_ks)
+        mf.update_k(C_ks, mocc_ks)
+
+        # ACE error
+        err_R = get_ace_error(mf, C_ks, moe_ks, mocc_ks, nband=nband,
+                              mesh=mesh, Gv=Gv, vj_R=vj_R)
+
+        log.info('Extra cycle  E= %.15g  delta_E= %4.3g  dEbnd= %4.3g  '
+                 'R= %4.3g  %d FC (%d tot)', e_tot, de, de_band, err_R,
+                 fc_this, fc_tot)
+        mf.dump_moe(moe_ks, mocc_ks, nband=nband)
+
+        if callable(mf.check_convergence):
+            scf_conv = mf.check_convergence(locals())
+        elif abs(de) < conv_tol and abs(de_band) < conv_tol_band:
+            scf_conv = True
+
+    # remove extra virtual bands before return
+    remove_extra_virbands(C_ks, moe_ks, mocc_ks, nbandv_extra)
+
+    if dump_chk:
+        mf.dump_chk(locals())
+
+    if callable(callback):
+        callback(locals())
+
+    if mf.outcore:
+        C_ks = chkfile.load_mo_coeff(C_ks)
+        fswap.close()
+
+    cput1 = (logger.process_clock(), logger.perf_counter())
+    mf.scf_summary["t-tot"] = np.asarray(cput1) - np.asarray(cput0)
+    log.debug('    CPU time for %s %9.2f sec, wall time %9.2f sec',
+              "scf_cycle", *mf.scf_summary["t-tot"])
+    # A post-processing hook before return
+    mf.post_kernel(locals())
+    return scf_conv, e_tot, moe_ks, C_ks, mocc_ks
+
+
+def get_nband(mf, nbandv, nbandv_extra):
+    cell = mf.cell
+    nbando = cell.nelectron // 2
+    nbandv_tot = nbandv + nbandv_extra
+    nband = nbando + nbandv
+    nband_tot = nbando + nbandv_tot
+
+    return nbando, nbandv_tot, nband, nband_tot
+
+
+# def sort_mo(C_ks, moe_ks, mocc_ks, occ0=None):
+#     if occ0 is None: occ0 = 2
+#     if isinstance(moe_ks[0], np.ndarray):
+#         nkpts = len(moe_ks)
+#         for k in range(nkpts):
+#             idxocc = np.where(mocc_ks[k]>THR_OCC)[0]
+#             idxvir = np.where(mocc_ks[k]<THR_OCC)[0]
+#             order = np.concatenate([idxocc, idxvir])
+#             mocc_ks[k] = np.asarray([occ0 if i < len(idxocc) else 0
+#                                     for i in range(len(order))])
+#             moe_ks[k] = moe_ks[k][order]
+#             set_kcomp(get_kcomp(C_ks, k)[order], C_ks, k)
+#         return C_ks, moe_ks, mocc_ks
+#     else:
+#         ncomp = len(moe_ks)
+#         for comp in range(ncomp):
+#             C_ks_comp = get_kcomp(C_ks, comp, load=False)
+#             C_ks_comp, moe_ks[comp], mocc_ks[comp] = sort_mo(C_ks_comp,
+#                                                              moe_ks[comp],
+#                                                              mocc_ks[comp],
+#                                                              occ0=1)
+#             if isinstance(C_ks, list): C_ks[comp] = C_ks_comp
+#         return C_ks, moe_ks, mocc_ks
+
+
+def get_band_err(moe_ks, last_hf_moe, nband, joint=False):
+    if isinstance(moe_ks[0], np.ndarray):
+        if nband == 0: return 0.
+        nkpts = len(moe_ks)
+        if joint:
+            moe1 = np.sort(np.concatenate([moe_ks[k][:nband]
+                           for k in range(nkpts)]))
+            moe0 = np.sort(np.concatenate([last_hf_moe[k][:nband]
+                           for k in range(nkpts)]))
+            return np.max(abs(moe1 - moe0))
+        else:
+            return np.max([np.max(abs(moe_ks[k] - last_hf_moe[k])[:nband])
+                          for k in range(nkpts)])
+    else:
+        ncomp = len(moe_ks)
+        if isinstance(nband, int): nband = [nband] * ncomp
+        if sum(nband) == 0: return 0.
+        err = np.zeros(ncomp)
+        for comp in range(ncomp):
+            err[comp] = get_band_err(moe_ks[comp], last_hf_moe[comp],
+                                     nband[comp])
+        return np.max(err)
+
+
+def get_ace_error(mf, C_ks, moe_ks, mocc_ks, nband=None, comp=None, exxdiv=None,
+                  mesh=None, Gv=None, vj_R=None):
+    kpts = mf.kpts
+    nkpts = len(kpts)
+    cell = mf.cell
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+    if exxdiv is None: exxdiv = mf.exxdiv
+
+    if isinstance(moe_ks[0][0], float): # RHF
+        if exxdiv == "ewald":
+            moe_ks_noewald = ewald_correction(moe_ks, mocc_ks, -mf.madelung)
+        else:
+            moe_ks_noewald = moe_ks
+        err_Rs = np.zeros(nkpts)
+        for k in range(nkpts):
+            nband_ = len(mocc_ks[k]) if nband is None else nband
+            if nband_ == 0:
+                err_Rs[k] = 0.
+            else:
+                C_k = get_kcomp(C_ks, k)[:nband_]
+                Cbar_k = mf.apply_Fock_kpt(C_k, kpts[k], mocc_ks, mesh, Gv, vj_R,
+                                           "none", comp=comp, ret_E=False)
+                R_k = lib.dot(C_k.conj(),
+                              (Cbar_k - C_k*moe_ks_noewald[k][:nband_,None]).T)
+                err_Rs[k] = abs(R_k).max()
+                C_k = Cbar_k = None
+    else:
+        ncomp = len(moe_ks)
+        err_Rs = np.zeros(ncomp)
+        for comp in range(ncomp):
+            C_ks_comp = get_kcomp(C_ks, comp, load=False)
+            nband_ = None if nband is None else nband[comp]
+            err_Rs[comp] = get_ace_error(mf, C_ks_comp,
+                                         moe_ks[comp], mocc_ks[comp],
+                                         nband=nband_, comp=comp, exxdiv=exxdiv,
+                                         mesh=mesh, Gv=Gv, vj_R=vj_R)
+    err_R = np.max(err_Rs)
+    return err_R
+
+
+def remove_extra_virbands(C_ks, moe_ks, mocc_ks, nbandv_extra):
+    if isinstance(moe_ks[0], np.ndarray):
+        if nbandv_extra > 0:
+            nkpts = len(moe_ks)
+            for k in range(nkpts):
+                n_k = len(moe_ks[k])
+                occ = list(range(n_k-nbandv_extra))
+                moe_ks[k] = moe_ks[k][occ]
+                mocc_ks[k] = mocc_ks[k][occ]
+                C = get_kcomp(C_ks, k, occ=occ)
+                set_kcomp(C, C_ks, k)
+    else:
+        ncomp = len(moe_ks)
+        if isinstance(nbandv_extra, int):
+            nbandv_extra = [nbandv_extra] * ncomp
+        for comp in range(ncomp):
+            C_ks_comp = get_kcomp(C_ks, comp, load=False)
+            remove_extra_virbands(C_ks_comp, moe_ks[comp], mocc_ks[comp],
+                                  nbandv_extra[comp])
+
+
+def kernel_charge(mf, C_ks, mocc_ks, nband, mesh=None, Gv=None,
+                  max_cycle=50, conv_tol=1e-6,
+                  max_cycle_davidson=10, conv_tol_davidson=1e-8,
+                  verbose_davidson=0,
+                  damp_type="anderson", damp_factor=0.3,
+                  vj_R=None,
+                  last_hf_e=None,
+                  single_loop=False,
+                  last_hf_moe=None,
+                  conv_tol_band=None):
+    """
+    For a given nonlocal potential and EXX (K) potential, run a
+    charge self-consistency loop. If neither CCECP potentials nor
+    EXX are used (e.g. LDA DFT with GTH potential), this loop
+    achieves full self-consistency.
+
+    Args:
+        C_ks (list of numpy arrays):
+            Orbital plane-wave coefficients at each spin/k-point
+        mocc_ks (list of numpy arrays):
+            Orbital occupations of each orbital/band at each spin/k-point.
+        nband (int):
+            Number of band energies to print
+        mesh (3-tuple):
+            FFT grid size
+        Gv (numpy array):
+            G-vectors of the mesh
+        max_cycle (int):
+            Max number of SCF cycles
+        conv_tol (float):
+            Convergence tolerance (Ha)
+        max_cycle_davidson (int):
+            Max number of cycles to converge each Davidson solver call.
+        conv_tol_davidson (float):
+            Threshold to converge Davidson solver (Ha)
+        verbose_davidson (bool):
+            Print extra info for Davidson solver
+        damp_type (str):
+            "simple" or "anderson", charge mixing method
+        damp_factor (float):
+            Damping for simple mixing, smaller is faster mixing.
+        vj_R (numpy array):
+            Initial Coulomb potential
+        last_hf_e (numpy array):
+            Initial total energy
+        single_loop (bool):
+            Whether this the double-SCF loop is required.
+        last_hf_moe (list of arrays):
+            Initial mo energies
+        conv_tol_band (float):
+            Convergence tolerance for band energies
+    """
+    log = logger.Logger(mf.stdout, mf.verbose)
+
+    cell = mf.cell
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+
+    scf_conv = False
+
+    fc_tot = 0
+
+    if damp_type.lower() == "simple":
+        chgmixer = pw_helper.SimpleMixing(mf, damp_factor)
+    elif damp_type.lower() == "anderson":
+        chgmixer = pw_helper.AndersonMixing(mf)
+
+    cput1 = (logger.process_clock(), logger.perf_counter())
+
+    de = float("inf")
+    moe_ks = None
+    e_tot = None
+    for cycle in range(max_cycle):
+
+        if cycle > 0:   # charge mixing
+            # update mo occ
+            mocc_ks = mf.get_mo_occ(moe_ks)
+            # update coulomb potential
+            last_vj_R = vj_R
+            vj_R = mf.get_vj_R(C_ks, mocc_ks)
+            # vj_R = chgmixer.next_step(mf, vj_R, vj_R-last_vj_R)
+            vj_R = chgmixer.next_step(mf, vj_R, last_vj_R)
+
+        if single_loop:
+            ctd_tmp = min(1e-5, max(0.01 * conv_tol, 0.001 * abs(de)))
+        else:
+            ctd_tmp = conv_tol_davidson
+        conv_ks, moe_ks, C_ks, fc_ks = mf.converge_band(
+                            C_ks, mocc_ks, mf.kpts,
+                            mesh=mesh, Gv=Gv,
+                            vj_R=vj_R,
+                            conv_tol_davidson=ctd_tmp,
+                            max_cycle_davidson=max_cycle_davidson,
+                            verbose_davidson=verbose_davidson)
+        fc_this = sum(fc_ks)
+        fc_tot += fc_this
+
+        if cycle > 0:
+            last_hf_e = e_tot
+        e_tot = mf.energy_tot(C_ks, mocc_ks, vj_R=vj_R)
+        if last_hf_e is not None:
+            de = e_tot-last_hf_e
+        else:
+            de = float("inf")
+        de_band = None
+        if conv_tol_band is not None:
+            if last_hf_moe is None:
+                band_check = False
+            else:
+                de_band = get_band_err(moe_ks, last_hf_moe, nband, joint=True)
+                band_check = de_band < conv_tol_band
+            last_hf_moe = moe_ks
+        else:
+            band_check = True
+        args = [cycle+1, e_tot, de, fc_this, fc_tot]
+        if single_loop:
+            fmt_str = 'cycle= %d E= %.15g  delta_E= %4.3g  %d FC (%d tot)'
+            fn = log.info
+        else:
+            fmt_str = '  chg cyc= %d E= %.15g  delta_E= %4.3g  %d FC (%d tot)'
+            fn = log.debug
+        if de_band is not None:
+            fmt_str = fmt_str + '   |dEbnd|= %4.3g'
+            args.append(de_band)
+        fn(fmt_str, *args)
+        mf.dump_moe(moe_ks, mocc_ks, nband=nband, trigger_level=logger.DEBUG3)
+
+        if abs(de) < conv_tol and band_check:
+            scf_conv = True
+
+        if not single_loop:
+            cput1 = log.timer_debug1('chg cyc= %d'%(cycle+1), *cput1)
+
+        if scf_conv:
+            break
+
+    if scf_conv and single_loop:
+        mocc_ks = mf.get_mo_occ(moe_ks)
+        last_vj_R = vj_R
+        vj_R = mf.get_vj_R(C_ks, mocc_ks)
+        vj_R = chgmixer.next_step(mf, vj_R, last_vj_R)
+
+        conv_ks, moe_ks, C_ks, fc_ks = mf.converge_band(
+                            C_ks, mocc_ks, mf.kpts,
+                            mesh=mesh, Gv=Gv,
+                            vj_R=vj_R,
+                            conv_tol_davidson=conv_tol_davidson,
+                            max_cycle_davidson=max_cycle_davidson,
+                            verbose_davidson=verbose_davidson)
+        fc_this = sum(fc_ks)
+        fc_tot += fc_this
+        last_hf_e = e_tot
+        e_tot = mf.energy_tot(C_ks, mocc_ks, vj_R=vj_R)
+        de = e_tot-last_hf_e
+        fmt_str = 'Extra cycle= %d E= %.15g  delta_E= %4.3g  %d FC (%d tot)'
+        log.info(fmt_str, cycle+1, e_tot, de, fc_this, fc_tot)
+        mf.dump_moe(moe_ks, mocc_ks, nband=nband, trigger_level=logger.DEBUG3)
+
+    return scf_conv, fc_tot, vj_R, C_ks, moe_ks, mocc_ks, e_tot
+
+
+def get_mo_occ(cell, moe_ks=None, C_ks=None, nocc=None):
+    if nocc is None: nocc = cell.nelectron // 2
+    if moe_ks is not None:
+        nkpts = len(moe_ks)
+        if nocc == 0:
+            mocc_ks = [np.zeros(moe_ks[k].size) for k in range(nkpts)]
+        else:
+            nocc_tot = nocc * nkpts
+            e_fermi = np.sort(np.concatenate(moe_ks))[nocc_tot-1]
+            EPSILON = 1e-10
+            mocc_ks = [None] * nkpts
+            for k in range(nkpts):
+                mocc_k = np.zeros(moe_ks[k].size)
+                mocc_k[moe_ks[k] < e_fermi+EPSILON] = 2
+                mocc_ks[k] = mocc_k
+    elif C_ks is not None:
+        nkpts = len(C_ks)
+        if nocc == 0:
+            mocc_ks = [np.zeros(get_kcomp(C_ks,k,load=False).shape[0])
+                       for k in range(nkpts)]
+        else:
+            mocc_ks = [None] * nkpts
+            for k in range(nkpts):
+                C_k = get_kcomp(C_ks, k, load=False)
+                mocc_ks[k] = np.asarray([2 if i < nocc else 0
+                                         for i in range(C_k.shape[0])])
+    else:
+        raise RuntimeError
+
+    return mocc_ks
+
+
+def dump_moe(mf, moe_ks_, mocc_ks_, nband=None, trigger_level=logger.DEBUG):
+    log = logger.Logger(mf.stdout, mf.verbose)
+
+    if mf.verbose >= trigger_level:
+        kpts = mf.cell.get_scaled_kpts(mf.kpts)
+        nkpts = len(kpts)
+        if nband is not None:
+            moe_ks = [moe_ks_[k][:nband] for k in range(nkpts)]
+            mocc_ks = [mocc_ks_[k][:nband] for k in range(nkpts)]
+        else:
+            moe_ks = moe_ks_
+            mocc_ks = mocc_ks_
+
+        has_occ = np.where([(mocc_ks[k] > THR_OCC).any()
+                           for k in range(nkpts)])[0]
+        if len(has_occ) > 0:
+            ehomo_ks = np.asarray([np.max(moe_ks[k][mocc_ks[k]>THR_OCC])
+                                  for k in has_occ])
+            ehomo = np.max(ehomo_ks)
+            khomos = has_occ[np.where(abs(ehomo_ks-ehomo) < 1e-4)[0]]
+
+            log.info('  HOMO = %.15g  kpt'+' %d'*khomos.size, ehomo, *khomos)
+
+        has_vir = np.where([(mocc_ks[k] < THR_OCC).any()
+                           for k in range(nkpts)])[0]
+        if len(has_vir) > 0:
+            elumo_ks = np.asarray([np.min(moe_ks[k][mocc_ks[k]<THR_OCC])
+                                  for k in has_vir])
+            elumo = np.min(elumo_ks)
+            klumos = has_vir[np.where(abs(elumo_ks-elumo) < 1e-4)[0]]
+
+            log.info('  LUMO = %.15g  kpt'+' %d'*klumos.size, elumo, *klumos)
+
+        if len(has_occ) >0 and len(has_vir) > 0:
+            log.debug('  Egap = %.15g', elumo-ehomo)
+
+        np.set_printoptions(threshold=len(moe_ks[0]))
+        log.debug('     k-point                  mo_energy')
+        for k,kpt in enumerate(kpts):
+            if mocc_ks is None:
+                log.debug('  %2d (%6.3f %6.3f %6.3f)   %s',
+                          k, kpt[0], kpt[1], kpt[2], moe_ks[k].real)
+            else:
+                log.debug('  %2d (%6.3f %6.3f %6.3f)   %s  %s',
+                          k, kpt[0], kpt[1], kpt[2],
+                          moe_ks[k][mocc_ks[k]>0].real,
+                          moe_ks[k][mocc_ks[k]==0].real)
+        np.set_printoptions(threshold=1000)
+
+
+def orth_mo1(cell, C, mocc, thr_nonorth=1e-6, thr_lindep=1e-8, follow=True):
+    """ orth occupieds and virtuals separately
+    """
+    orth = pw_helper.orth
+    Co = C[mocc>THR_OCC]
+    Cv = C[mocc<THR_OCC]
+    # orth occ
+    if Co.shape[0] > 0:
+        Co = orth(cell, Co, thr_nonorth, thr_lindep, follow)
+    # project out occ from vir and orth vir
+    if Cv.shape[0] > 0:
+        Cv -= lib.dot(lib.dot(Cv, Co.conj().T), Co)
+        Cv = orth(cell, Cv, thr_nonorth, thr_lindep, follow)
+
+    C = np.vstack([Co,Cv])
+
+    return C
+
+
+def orth_mo(cell, C_ks, mocc_ks, thr=1e-3):
+    nkpts = len(mocc_ks)
+    for k in range(nkpts):
+        C_k = get_kcomp(C_ks, k)
+        C_k = orth_mo1(cell, C_k, mocc_ks[k], thr)
+        set_kcomp(C_k, C_ks, k)
+        C_k = None
+
+    return C_ks
+
+
+def get_init_guess(cell0, kpts, basis=None, pseudo=None, nvir=0,
+                   key="hcore", out=None, kpts_obj=None, mesh=None,
+                   xc=None):
+    """
+    Initial guess for the plane-wave coefficients of the bands,
+    using a GTO calculation based on key="hcore", "h1e", "cycle1", "scf".
+
+    Args:
+        nvir (int):
+            Number of virtual bands to be evaluated. Default is zero.
+        out (h5py group):
+            If provided, the orbitals are written to it.
+    """
+
+    log = logger.Logger(cell0.stdout, cell0.verbose)
+
+    if out is not None:
+        assert(isinstance(out, h5py.Group))
+
+    nkpts = len(kpts)
+
+    if basis is None: basis = cell0.basis
+    if pseudo is None: pseudo = cell0.pseudo
+    cell = cell0.copy()
+    if cell.__class__ != gto.Cell:
+        cell.__class__ = gto.Cell
+        cell.pseudo = None
+        cell._pseudo = None
+    cell.basis = basis
+    if len(cell._ecp) > 0 or pseudo == "SG15":
+        # use GTH to avoid the slow init time of ECP
+        gth_pseudo = {}
+        for iatm in range(cell0.natm):
+            atm = cell0.atom_symbol(iatm)
+            if atm in gth_pseudo:
+                continue
+            q = cell0.atom_charge(iatm)
+            if q == 0:  # Ghost atom
+                continue
+            else:
+                gth_pseudo[atm] = "gth-pade-q%d"%q
+        log.debug("Using the GTH-PP for init guess: %s", gth_pseudo)
+        cell.pseudo = gth_pseudo
+        cell.ecp = None
+        cell._ecp = cell._ecpbas = {}
+    else:
+        cell.pseudo = pseudo
+    cell.ke_cutoff = cell0.ke_cutoff
+    cell.verbose = 0
+    cell.build()
+
+    log.info("generating init guess using %s basis", cell.basis)
+
+    if kpts_obj is None:
+        kpts_obj = kpts
+    if xc is None:
+        if len(kpts) < 30:
+            pmf = scf.KRHF(cell, kpts_obj)
+        else:
+            pmf = scf.KRHF(cell, kpts_obj).density_fit()
+    else:
+        if len(kpts) < 30:
+            pmf = scf.KRKS(cell, kpts_obj, xc=xc)
+        else:
+            pmf = scf.KRKS(cell, kpts_obj, xc=xc).density_fit()
+
+    if key.lower() == "cycle1":
+        pmf.max_cycle = 0
+        pmf.kernel()
+        mo_coeff = pmf.mo_coeff
+        mo_occ = pmf.mo_occ
+    elif key.lower() in ["hcore", "h1e"]:
+        h1e = pmf.get_hcore()
+        s1e = pmf.get_ovlp()
+        mo_energy, mo_coeff = pmf.eig(h1e, s1e)
+        mo_occ = pmf.get_occ(mo_energy, mo_coeff)
+    elif key.lower() == "scf":
+        pmf.kernel()
+        mo_coeff = pmf.mo_coeff
+        mo_occ = pmf.mo_occ
+    else:
+        raise NotImplementedError("Init guess %s not implemented" % key)
+
+    # TODO: support specifying nvir for each kpt (useful for e.g., metals)
+    assert(isinstance(nvir, int) and nvir >= 0)
+    nocc = cell0.nelectron // 2
+    nmo_ks = [len(mo_occ[k]) for k in range(nkpts)]
+    ntot = nocc + nvir
+    ntot_ks = [min(ntot,nmo_ks[k]) for k in range(nkpts)]
+
+    log.debug1("converting init MOs from GTO basis to PW basis")
+    C_ks = pw_helper.get_C_ks_G(cell, kpts, mo_coeff, ntot_ks, out=out,
+                                verbose=cell0.verbose, mesh=mesh)
+    mocc_ks = [mo_occ[k][:ntot_ks[k]] for k in range(nkpts)]
+
+    C_ks = orth_mo(cell0, C_ks, mocc_ks)
+
+    C_ks, mocc_ks = add_random_mo(cell0, [ntot]*nkpts, C_ks, mocc_ks)
+
+    return C_ks, mocc_ks
+
+
+def add_random_mo(cell, n_ks, C_ks, mocc_ks):
+    """ Add random MOs if C_ks[k].shape[0] < n_ks[k] for any k
+    """
+    log = logger.Logger(cell.stdout, cell.verbose)
+
+    nkpts = len(n_ks)
+    for k in range(nkpts):
+        n = n_ks[k]
+        C0 = get_kcomp(C_ks, k)
+        n0 = C0.shape[0]
+        if n0 < n:
+            n1 = n - n0
+            log.warn("Requesting more orbitals than currently have "
+                     "(%d > %d) for kpt %d. Adding %d random orbitals.",
+                     n, n0, k, n1)
+            C = add_random_mo1(cell, n, C0)
+            set_kcomp(C, C_ks, k)
+            C = None
+
+            mocc = mocc_ks[k]
+            mocc_ks[k] = np.concatenate([mocc, np.zeros(n1,dtype=mocc.dtype)])
+        C0 = None
+
+    return C_ks, mocc_ks
+
+
+def add_random_mo1(cell, n, C0):
+    n0, ngrids = C0.shape
+    if n == n0:
+        return C0
+
+    C1 = np.random.rand(n-n0, ngrids) + 0j
+    C1 -= lib.dot(lib.dot(C1, C0.conj().T), C0)
+    C1 = pw_helper.orth(cell, C1, 1e-3, follow=False)
+
+    return np.vstack([C0,C1])
+
+
+def init_guess_by_chkfile(cell, chkfile_name, nvir, project=True, out=None,
+                          basis_ks=None):
+    from pyscf.pbc.scf import chkfile
+    scf_dict = chkfile.load_scf(chkfile_name)[1]
+    mocc_ks = scf_dict["mo_occ"]
+    nkpts = len(mocc_ks)
+    ntot_ks = [None] * nkpts
+    for k in range(nkpts):
+        nocc = np.sum(mocc_ks[k]>THR_OCC)
+        ntot_ks[k] = max(nocc+nvir, len(mocc_ks[k]))
+
+    if out is None: out = [None] * nkpts
+    C_ks = out
+    with h5py.File(chkfile_name, "r") as f:
+        C0_ks = f["mo_coeff"]
+        for k in range(nkpts):
+            set_kcomp(get_kcomp(C0_ks, k), C_ks, k)
+
+    C_ks, mocc_ks = init_guess_from_C0(cell, C_ks, ntot_ks, project=project,
+                                       out=C_ks, mocc_ks=mocc_ks, basis_ks=basis_ks)
+
+    return C_ks, mocc_ks
+
+
+def init_guess_from_C0(cell, C0_ks, ntot_ks, project=True, out=None,
+                       mocc_ks=None, basis_ks=None):
+
+    log = logger.Logger(cell.stdout, cell.verbose)
+
+    nkpts = len(C0_ks)
+    if out is None: out = [None] * nkpts
+    C_ks = out
+
+    # discarded high-energy orbitals if chkfile has more than requested
+    for k in range(nkpts):
+        ntot = ntot_ks[k]
+        C0_k = get_kcomp(C0_ks, k)
+        if C0_k.shape[0] > ntot:
+            C = C0_k[:ntot]
+            if mocc_ks is not None:
+                mocc_ks[k] = mocc_ks[k][:ntot]
+        else:
+            C = C0_k
+        # project if needed
+        if basis_ks is None:
+            npw = np.prod(cell.mesh)
+        else:
+            npw = basis_ks[k].npw
+        npw0 = C.shape[1]
+        if npw != npw0:
+            if project:
+                if "mesh_map" not in locals():
+                    mesh = cell.mesh
+                    nmesh0 = int(np.round(npw0**(0.3333333333)))
+                    if not nmesh0**3 == npw0:
+                        raise NotImplementedError("Project MOs not implemented "
+                                                  "for non-cubic crystals.")
+                    mesh0 = np.array([nmesh0]*3)
+                    log.warn("Input orbitals use mesh %s while cell uses mesh "
+                             "%s. Performing projection.", mesh0, mesh)
+                    if npw > npw0:
+                        mesh_map = pw_helper.get_mesh_map(cell, 0, 0, mesh,
+                                                          mesh0)
+                    else:
+                        mesh_map = pw_helper.get_mesh_map(cell, 0, 0, mesh0,
+                                                          mesh)
+                nmo = C.shape[0]
+                if npw > npw0:
+                    C_ = C
+                    C = np.zeros((nmo,npw), dtype=C_.dtype)
+                    C[:,mesh_map] = C_
+                    C_ = None
+                else:
+                    C = C[:,mesh_map]
+            else:
+                raise RuntimeError("Input C0 has wrong shape. Expected %d PWs; "
+                                   "got %d." % (npw, npw0))
+        set_kcomp(C, C_ks, k)
+        C = None
+
+    if mocc_ks is None:
+        mocc_ks = get_mo_occ(cell, C_ks=C_ks)
+
+    C_ks = orth_mo(cell, C_ks, mocc_ks)
+
+    C_ks, mocc_ks = add_random_mo(cell, ntot_ks, C_ks, mocc_ks)
+
+    return C_ks, mocc_ks
+
+
+def update_pp(mf, C_ks):
+    """
+    Update the pseudopotential for a given set of bands C_ks. This is
+    only needed for CCECP because computing the nonlocal part is
+    expensive, and so is done only for each outer iteration in
+    the SCF double-loop.
+    """
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    if "t-ppnl" not in mf.scf_summary:
+        mf.scf_summary["t-ppnl"] = np.zeros(2)
+
+    mf.with_pp.update_vppnloc_support_vec(C_ks, basis_ks=mf._basis_data)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    mf.scf_summary["t-ppnl"] += tock - tick
+
+
+def update_k(mf, C_ks, mocc_ks):
+    """
+    Update the K potential (EXX) for a given set of bands C_ks
+    and their occupations mocc_ks
+    """
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    if "t-ace" not in mf.scf_summary:
+        mf.scf_summary["t-ace"] = np.zeros(2)
+
+    mesh = np.array(mf.wf_mesh)
+    if np.all(abs(mesh-mesh//2*2)>0):   # all odd --> s2 symm for occ bands
+        mf.with_jk.update_k_support_vec(C_ks, mocc_ks, mf.kpts)
+    else:
+        mf.with_jk.update_k_support_vec(C_ks, mocc_ks, mf.kpts, Ct_ks=C_ks)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    mf.scf_summary["t-ace"] += tock - tick
+
+
+def eig_subspace(mf, C_ks, mocc_ks, mesh=None, Gv=None, vj_R=None, exxdiv=None,
+                 comp=None):
+    """
+    Diagonalize the effective Hamiltonian in the space of the bands C_ks.
+    If vj_R is not provided, it is computed from C_ks and the occupations
+    mocc_ks. Note that update_k and update_pp are not called, so whatever
+    EXX potential/nonlocal CCECP part is already stored in mf is used.
+
+    Args:
+        C_ks: Bands
+        mocc_ks: Occupations
+        mesh: FFT grid
+        Gv: Plane-wave wave-vectors of FFT grid
+        vj_R: Coulomb potential in real-space
+        exxdiv: Divergence approach for EXX operator
+        comp (int):
+            If not None, apply the effective Hamiltonian at this spin index.
+    """
+    cell = mf.cell
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if exxdiv is None: exxdiv = mf.exxdiv
+
+    kpts = mf.kpts
+    nkpts = len(kpts)
+    moe_ks = [None] * nkpts
+    for k in range(nkpts):
+        kpt = kpts[k]
+        C_k = get_kcomp(C_ks, k)
+        Cbar_k = mf.apply_Fock_kpt(C_k, kpt, mocc_ks, mesh, Gv, vj_R, exxdiv,
+                                   comp=comp, ret_E=False)
+        F_k = lib.dot(C_k.conj(), Cbar_k.T)
+        e, u = scipy.linalg.eigh(F_k)
+        moe_ks[k] = e
+        C_k = lib.dot(u.T, C_k)
+        set_kcomp(C_k, C_ks, k)
+        C_k = Cbar_k = None
+
+    if comp is None:
+        mocc_ks = mf.get_mo_occ(moe_ks=moe_ks)
+        if mf.exxdiv == "ewald":
+            moe_ks = ewald_correction(moe_ks, mocc_ks, mf.madelung)
+
+    return C_ks, moe_ks, mocc_ks
+
+
+def apply_hcore_kpt(mf, C_k, kpt, mesh, Gv, with_pp, C_k_R=None, comp=None,
+                    ret_E=False, mocc_ks=None):
+    r""" Apply hcore (kinetic and PP) opeartor to orbitals at given k-point.
+    """
+
+    log = logger.Logger(mf.stdout, mf.verbose)
+
+    es = np.zeros(3, dtype=np.complex128)
+
+    if mocc_ks is None:
+        mocc_k = 2
+    elif isinstance(mocc_ks, np.ndarray) and mocc_ks.ndim == 1:
+        mocc_k = mocc_ks
+    else:
+        k = member(kpt, mf.kpts)[0]
+        mocc_k = mocc_ks[k][:C_k.shape[0]]
+
+    basis = mf.get_basis_kpt(kpt)
+
+    tspans = np.zeros((3,2))
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+
+    tmp = pw_helper.apply_kin_kpt(C_k, kpt, Gv, basis=basis)
+    Cbar_k = tmp
+    es[0] = (np.einsum("ig,ig->i", C_k.conj(), tmp) * mocc_k).sum()
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[0] = tock - tick
+
+    if C_k_R is None:
+        C_k_R = pw_helper.wf_ifft(C_k, mesh, mf.get_basis_kpt(kpt))
+    tmp = with_pp.apply_vppl_kpt(C_k, mesh=mesh, C_k_R=C_k_R, basis=basis)
+    Cbar_k += tmp
+    es[1] = (np.einsum("ig,ig->i", C_k.conj(), tmp) * mocc_k).sum()
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[1] = tick - tock
+
+    tmp = with_pp.apply_vppnl_kpt(C_k, kpt, mesh=mesh, Gv=Gv, comp=comp,
+                                  basis=basis)
+    Cbar_k += tmp
+    es[2] = (np.einsum("ig,ig->i", C_k.conj(), tmp) * mocc_k).sum()
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[2] = tock - tick
+
+    for ie_comp, e_comp in enumerate(mf.scf_summary["e_comp_name_lst"][:3]):
+        key = "t-%s" % e_comp
+        if key not in mf.scf_summary:
+            mf.scf_summary[key] = np.zeros(2)
+        mf.scf_summary[key] += tspans[ie_comp]
+
+    if ret_E:
+        if (np.abs(es.imag) > 1e-6).any():
+            e_comp = mf.scf_summary["e_comp_name_lst"][:3]
+            icomps = np.where(np.abs(es.imag) > 1e-6)[0]
+            log.warn("Energy has large imaginary part:" +
+                     "%s : %s\n" * len(icomps),
+                     *[s for i in icomps for s in [e_comp[i],es[i]]])
+        es = es.real
+        return Cbar_k, es
+    else:
+        return Cbar_k
+
+
+def apply_veff_kpt(mf, C_k, kpt, mocc_ks, kpts, mesh, Gv, vj_R, with_jk,
+                   exxdiv, C_k_R=None, comp=None, ret_E=False):
+    r""" Apply non-local part of the Fock opeartor to orbitals at given
+    k-point. The non-local part includes the exact exchange.
+    """
+    log = logger.Logger(mf.stdout, mf.verbose)
+
+    tspans = np.zeros((2,2))
+    es = np.zeros(2, dtype=np.complex128)
+
+    if mocc_ks is None:
+        mocc_k = 2
+    else:
+        k = member(kpt, mf.kpts)[0]
+        mocc_k = mocc_ks[k][:C_k.shape[0]]
+    Cto_k = C_k.conj() * mocc_k[:, None]
+
+    basis = mf.get_basis_kpt(kpt)
+
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tmp = with_jk.apply_j_kpt(C_k, mesh, vj_R, C_k_R=C_k_R, basis=basis)
+    Cbar_k = tmp
+    es[0] = np.einsum("ig,ig->", Cto_k, tmp) * 0.5
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[0] = np.asarray(tock - tick).reshape(1,2)
+
+    tmp = -with_jk.apply_k_kpt(C_k, kpt, mesh=mesh, Gv=Gv, exxdiv=exxdiv,
+                               comp=comp, basis=basis)
+    if comp is None:
+        tmp *= 0.5
+    Cbar_k += tmp
+    es[1] = np.einsum("ig,ig->", Cto_k, tmp) * 0.5
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[1] = np.asarray(tick - tock).reshape(1,2)
+
+    for ie_comp,e_comp in enumerate(mf.scf_summary["e_comp_name_lst"][-2:]):
+        key = "t-%s" % e_comp
+        if key not in mf.scf_summary:
+            mf.scf_summary[key] = np.zeros(2)
+        mf.scf_summary[key] += tspans[ie_comp]
+
+    if ret_E:
+        if (np.abs(es.imag) > 1e-6).any():
+            e_comp = mf.scf_summary["e_comp_name_lst"][-2:]
+            icomps = np.where(np.abs(es.imag) > 1e-6)[0]
+            log.warn("Energy has large imaginary part:" +
+                     "%s : %s\n" * len(icomps),
+                     *[s for i in icomps for s in [e_comp[i],es[i]]])
+        es = es.real
+        return Cbar_k, es
+    else:
+        return Cbar_k
+
+
+def apply_Fock_kpt(mf, C_k, kpt, mocc_ks, mesh, Gv, vj_R, exxdiv,
+                   comp=None, ret_E=False):
+    """ Apply Fock operator to orbitals at given k-point.
+    """
+    kpts = mf.kpts
+    with_pp = mf.with_pp
+    with_jk = mf.with_jk
+    C_k_R = pw_helper.wf_ifft(C_k, mesh, mf.get_basis_kpt(kpt))
+# 1e part
+    res_1e = mf.apply_hcore_kpt(C_k, kpt, mesh, Gv, with_pp, comp=comp,
+                                C_k_R=C_k_R, ret_E=ret_E, mocc_ks=mocc_ks)
+# 2e part
+    res_2e = mf.apply_veff_kpt(C_k, kpt, mocc_ks, kpts, mesh, Gv, vj_R, with_jk,
+                               exxdiv, C_k_R=C_k_R, comp=comp, ret_E=ret_E)
+    C_k_R = None
+
+    if ret_E:
+        Cbar_k = res_1e[0] + res_2e[0]
+        es = np.concatenate([res_1e[1], res_2e[1]])
+        return Cbar_k, es
+    else:
+        Cbar_k = res_1e + res_2e
+        return Cbar_k
+
+
+def ewald_correction(moe_ks, mocc_ks, madelung):
+    if isinstance(moe_ks[0][0], float): # RHF
+        nkpts = len(moe_ks)
+        moe_ks_new = [None] * nkpts
+        for k in range(nkpts):
+            moe_ks_new[k] = moe_ks[k].copy()
+            moe_ks_new[k][:] -= 0.5 * mocc_ks[k] * madelung
+    else:                               # UHF
+        ncomp = len(moe_ks)
+        moe_ks_new = [None] * ncomp
+        for comp in range(ncomp):
+            moe_ks_new[comp] = ewald_correction(moe_ks[comp], mocc_ks[comp],
+                                                ncomp * madelung)
+    return moe_ks_new
+
+
+def get_mo_energy(mf, C_ks, mocc_ks, mesh=None, Gv=None, exxdiv=None,
+                  vj_R=None, comp=None, ret_mocc=True, full_ham=False):
+    """
+    Get the molecular orbital energies of C_ks without diagonalizing
+    in the C_ks subspace. The effective Hamiltonian is constructed
+    as in `eig_subspace`. `full_ham=True` returns the effective
+    Hamiltonian matrix in the basis of C_ks (as opposed to the orbital
+    energies, which is just the diagonal of that matrix).
+    """
+    log = logger.Logger(mf.stdout, mf.verbose)
+
+    cell = mf.cell
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if exxdiv is None: exxdiv = mf.exxdiv
+
+    kpts = mf.kpts
+    nkpts = len(kpts)
+    moe_ks = [None] * nkpts
+    for k in range(nkpts):
+        kpt = kpts[k]
+        C_k = get_kcomp(C_ks, k)
+        Cbar_k = mf.apply_Fock_kpt(C_k, kpt, mocc_ks, mesh, Gv, vj_R,
+                                   exxdiv, comp=comp, ret_E=False)
+        if full_ham:
+            moe_k = np.dot(C_k.conj(), Cbar_k.T)
+        else:
+            moe_k = np.einsum("ig,ig->i", C_k.conj(), Cbar_k)
+        if full_ham:
+            moe_ks[k] = 0.5 * (moe_k + moe_k.conj().T)
+            set_kcomp(C_k, C_ks, k)
+            C_k = Cbar_k = None
+        else:
+            if (np.abs(moe_k.imag) > 1e-6).any():
+                log.warn("MO energies have imaginary part %s for kpt %d", moe_k, k)
+            moe_ks[k] = moe_k.real
+            C_k = Cbar_k = None
+
+    if full_ham:
+        return moe_ks
+
+    if ret_mocc or comp is None:
+        mocc_ks = mf.get_mo_occ(moe_ks=moe_ks)
+    if mf.exxdiv == "ewald" and comp is None:
+        moe_ks = ewald_correction(moe_ks, mocc_ks, mf.madelung)
+
+    if ret_mocc:
+        return moe_ks, mocc_ks
+    else:
+        return moe_ks
+
+
+def energy_elec(mf, C_ks, mocc_ks, mesh=None, Gv=None, moe_ks=None,
+                vj_R=None, exxdiv=None):
+    ''' Compute the electronic energy
+    Pass `moe_ks` to avoid the cost of applying the expensive vj and vk.
+    '''
+    cell = mf.cell
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if exxdiv is None: exxdiv = mf.exxdiv
+
+    kpts = mf.kpts
+    nkpts = len(kpts)
+
+    wts = mf.weights
+    e_ks = np.zeros(nkpts)
+    if moe_ks is None:
+        if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+        e_comp = 0  # np.zeros(5)
+        for k in range(nkpts):
+            kpt = kpts[k]
+            occ = np.where(mocc_ks[k] > THR_OCC)[0]
+            Co_k = get_kcomp(C_ks, k, occ=occ)
+            e_comp_k = mf.apply_Fock_kpt(Co_k, kpt, mocc_ks, mesh, Gv,
+                                         vj_R, exxdiv, ret_E=True)[1]
+            e_ks[k] = np.sum(e_comp_k)
+            e_comp += e_comp_k * wts[k]
+
+        if exxdiv == "ewald":
+            e_comp[mf.scf_summary["e_comp_name_lst"].index("ex")] += \
+                                                        mf.etot_shift_ewald
+
+        for comp,e in zip(mf.scf_summary["e_comp_name_lst"], e_comp):
+            mf.scf_summary[comp] = e
+    else:
+        for k in range(nkpts):
+            kpt = kpts[k]
+            occ = np.where(mocc_ks[k] > THR_OCC)[0]
+            Co_k = get_kcomp(C_ks, k, occ=occ)
+            mocc_k = mocc_ks[k][occ]
+            e1_comp = mf.apply_hcore_kpt(Co_k, kpt, mesh, Gv, mf.with_pp,
+                                         mocc_ks=mocc_k, ret_E=True)[1]
+            e_ks[k] = 0.5 * np.sum(e1_comp)
+            e_ks[k] += 0.5 * np.sum(moe_ks[k][occ] * mocc_k)
+    e_scf = np.dot(e_ks, wts)
+
+    if moe_ks is None and exxdiv == "ewald":
+        # Note: ewald correction is not needed if e_tot is computed from
+        # moe_ks since the correction is already in the mo energy
+        e_scf += mf.etot_shift_ewald
+
+    return e_scf
+
+
+def energy_tot(mf, C_ks, mocc_ks, moe_ks=None, mesh=None, Gv=None,
+               vj_R=None, exxdiv=None):
+    e_nuc = mf.scf_summary["nuc"]
+    e_scf = mf.energy_elec(C_ks, mocc_ks, moe_ks=moe_ks, mesh=mesh, Gv=Gv,
+                           vj_R=vj_R, exxdiv=exxdiv)
+    e_tot = e_scf + e_nuc
+    return e_tot
+
+
+def get_precond_davidson(kpt, Gv, basis=None):
+    if basis is None:
+        kG = kpt + Gv if np.sum(np.abs(kpt)) > 1.E-9 else Gv
+    else:
+        kG = basis.Gk
+    dF = np.einsum("gj,gj->g", kG, kG) * 0.5
+    # precond = lambda dx, e, x0: dx/(dF - e)
+    def precond(dx, e, x0):
+        """ G Kresse and J. Furthmüller PRB, 54, 1996: 11169 - 11186
+        """
+        Ek = np.einsum("g,g,g->", dF, dx.conj(), dx)
+        dX = dF / (1.5 * Ek)
+        num = 27.+18.*dX+12.*dX**2.+8.*dX**3.
+        denom = num + 16*dX**4.
+        return (num/denom) * dx
+    return precond
+
+
+def converge_band_kpt(mf, C_k, kpt, mocc_ks, nband=None, mesh=None, Gv=None,
+                      vj_R=None, comp=None,
+                      conv_tol_davidson=1e-6,
+                      max_cycle_davidson=100,
+                      verbose_davidson=0):
+    ''' Converge all occupied orbitals for a given k-point using davidson algorithm
+    '''
+    cell = mf.cell
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+
+    fc = [0]
+    def FC(C_k_, ret_E=False):
+        fc[0] += 1
+        C_k_ = np.asarray(C_k_)
+        Cbar_k_ = mf.apply_Fock_kpt(C_k_, kpt, mocc_ks, mesh, Gv,
+                                    vj_R, "none",
+                                    comp=comp, ret_E=False)
+        return Cbar_k_
+
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+
+    precond = get_precond_davidson(kpt, Gv, basis=mf.get_basis_kpt(kpt))
+
+    nroots = C_k.shape[0] if nband is None else nband
+
+    conv, e, c = lib.davidson1(FC, C_k, precond,
+                               nroots=nroots,
+                               verbose=verbose_davidson,
+                               tol=conv_tol_davidson,
+                               max_cycle=max_cycle_davidson)
+    c = np.asarray(c)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    key = "t-dvds"
+    if key not in mf.scf_summary:
+        mf.scf_summary[key] = np.zeros(2)
+    mf.scf_summary[key] += tock - tick
+
+    return conv, e, c, fc[0]
+
+
+def converge_band(mf, C_ks, mocc_ks, kpts, Cout_ks=None,
+                  mesh=None, Gv=None,
+                  vj_R=None, comp=None,
+                  conv_tol_davidson=1e-6,
+                  max_cycle_davidson=100,
+                  verbose_davidson=0):
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+    if comp is not None:
+        mocc_ks = mocc_ks[comp]
+
+    nkpts = len(kpts)
+    if Cout_ks is None: Cout_ks = C_ks
+    conv_ks = [None] * nkpts
+    moeout_ks = [None] * nkpts
+    fc_ks = [None] * nkpts
+
+    for k in range(nkpts):
+        kpt = kpts[k]
+        C_k = get_kcomp(C_ks, k)
+        conv_, moeout_ks[k], Cout_k, fc_ks[k] = \
+                    mf.converge_band_kpt(C_k, kpt, mocc_ks,
+                                         mesh=mesh, Gv=Gv,
+                                         vj_R=vj_R, comp=comp,
+                                         conv_tol_davidson=conv_tol_davidson,
+                                         max_cycle_davidson=max_cycle_davidson,
+                                         verbose_davidson=verbose_davidson)
+        set_kcomp(Cout_k, Cout_ks, k)
+        conv_ks[k] = np.prod(conv_)
+
+    return conv_ks, moeout_ks, Cout_ks, fc_ks
+
+
+def get_cpw_virtual(mf, basis, amin=None, amax=None, thr_lindep=1e-14,
+                    erifile=None):
+    """ Turn input GTO basis into a set of contracted PWs, project out the
+    occupied PW bands, and then diagonalize the vir-vir block of the Fock
+    matrix.
+
+    Args:
+        basis/amin/amax:
+            see docs for gto2cpw in pyscf.pbc.pwscf.pw_helper
+        thr_lindep (float):
+            linear dependency threshold for canonicalization of the CPWs.
+        erifile (hdf5 file):
+            C_ks (PW occ + CPW vir), mo_energy, mo_occ will be written to this
+            file. If not provided, mf.chkfile is used. A RuntimeError is raised
+            if the latter is None.
+    """
+    from pyscf.pbc.pwscf.smearing import has_smearing
+    log = logger.Logger(mf.stdout, mf.verbose)
+    if has_smearing(mf):
+        raise NotImplementedError("CPW Virtuals with occupation smearing")
+
+    assert(mf.converged)
+    if erifile is None: erifile = mf.chkfile
+    assert(erifile)
+    kpts = mf.kpts
+    nkpts = len(kpts)
+    cell = mf.cell
+    # formating basis
+    atmsymbs = cell._basis.keys()
+    if isinstance(basis, str):
+        basisdict = {atmsymb: basis for atmsymb in atmsymbs}
+    elif isinstance(basis, dict):
+        assert(basis.keys() == atmsymbs)
+        basisdict = basis
+    else:
+        raise TypeError("Input basis must be either a str or dict.")
+    # pruning pGTOs that have unwanted exponents
+    basisdict = pw_helper.remove_pGTO_from_cGTO_(basisdict, amax=amax,
+                                                 amin=amin, verbose=mf.verbose)
+    # make a new cell with the modified GTO basis
+    cell_cpw = cell.copy()
+    cell_cpw.basis = basisdict
+    cell_cpw.verbose = 0
+    cell_cpw.build()
+    # make CPW for all kpts
+    nao = cell_cpw.nao_nr()
+    Cao = np.eye(nao)+0.j
+    Co_ks = mf.mo_coeff
+    mocc_ks0 = mf.mo_occ
+    mesh = mf.wf_mesh
+    # estimate memory usage and decide incore/outcore mode
+    max_memory = (cell.max_memory - lib.current_memory()[0]) * 0.8
+    nocc_max = np.max([sum(mocc_ks0[k]>THR_OCC) for k in range(nkpts)])
+    ngrids = Co_ks[0].shape[1]
+    est_memory = (nao+nocc_max) * ngrids * (nkpts+2) * 16 / 1024**2.
+    incore = est_memory < max_memory
+    if incore:
+        C_ks = [None] * nkpts
+    else:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        swapfile = None
+        C_ks = fswap.create_group("C_ks")
+    mocc_ks = [None] * nkpts
+    for k in range(nkpts):
+        Cv = pw_helper.get_C_ks_G(cell_cpw, [kpts[k]], [Cao], [nao],
+                                  mesh=mesh)[0]
+        if mf._basis_data is not None:
+            Cv = Cv[:, mf._basis_data[k].indexes]
+        occ = np.where(mocc_ks0[k]>THR_OCC)[0]
+        Co = get_kcomp(Co_ks, k, occ=occ)
+        Cv -= lib.dot(lib.dot(Cv, Co.conj().T), Co)
+        Cv = pw_helper.orth(cell, Cv, thr_lindep=thr_lindep, follow=False)
+        C = np.vstack([Co,Cv])
+        set_kcomp(C, C_ks, k)
+        mocc_ks[k] = np.asarray([2.]*len(occ) + [0.]*Cv.shape[0])
+        C = Co = Cv = None
+    # build and diagonalize fock vv
+    mf.update_pp(C_ks)
+    mf.update_k(C_ks, mocc_ks)
+    Gv = cell.get_Gv(mesh)
+    vj_R = mf.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+    exxdiv = mf.exxdiv
+    moe_ks = [None] * nkpts
+    for k in range(nkpts):
+        C = get_kcomp(C_ks, k)
+        Cbar = mf.apply_Fock_kpt(C, kpts[k], mocc_ks, mesh, Gv, vj_R, exxdiv)
+        F = lib.dot(C.conj(), Cbar.T)
+        Fov = F[mocc_ks[k]>THR_OCC][:,mocc_ks[k]<THR_OCC]
+        err_Fov = np.max(np.abs(Fov))
+        log.debug1("kpt %d [% .4f % .4f % .4f]  no = %2d  nv = %3d "
+                   " ||Fov|| = %.3e", k, *kpts[k], sum(mocc_ks[k]>THR_OCC),
+                   sum(mocc_ks[k]<THR_OCC), err_Fov)
+        e, u = scipy.linalg.eigh(F)
+        C = lib.dot(u.T, C)
+        set_kcomp(C, C_ks, k)
+        Cbar = C = None
+        moe_ks[k] = e
+    if mf.exxdiv == "ewald":
+        moe_ks = ewald_correction(moe_ks, mocc_ks, mf.madelung)
+    e_tot = mf.energy_tot(C_ks, mocc_ks, vj_R=vj_R)
+    de_tot = e_tot - mf.e_tot
+    log.info("SCF energy before %.10f  after CPW %.10f  change %.10f",
+             mf.e_tot, e_tot, de_tot)
+    if abs(de_tot) > 1e-4:
+        log.warn("CPW causes a significant change in SCF energy. "
+                 "Please check the SCF convergence.")
+    log.debug("CPW band energies")
+    mf.dump_moe(moe_ks, mocc_ks)
+    # dump to chkfile
+    chkfile.dump_scf(cell, erifile, e_tot, moe_ks, mocc_ks, C_ks)
+
+    if not incore: fswap.close()
+
+    return e_tot, moe_ks, mocc_ks
+
+
+class PWKSCF(pbc_hf.KSCF):
+    '''PWKSCF base class. non-relativistic RHF using PW basis.
+    '''
+
+    outcore = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_outcore', False)
+    conv_tol = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_conv_tol', 1e-6)
+    conv_tol_davidson = getattr(__config__,
+                                'pbc_pwscf_khf_PWKRHF_conv_tol_davidson', 1e-7)
+    conv_tol_band = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_conv_tol_band',
+                            1e-4)
+    max_cycle = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_max_cycle', 100)
+    max_cycle_davidson = getattr(__config__,
+                                 'pbc_pwscf_khf_PWKRHF_max_cycle_davidson',
+                                 100)
+    verbose_davidson = getattr(__config__,
+                               'pbc_pwscf_khf_PWKRHF_verbose_davidson', 0)
+    ace_exx = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_ace_exx', True)
+    damp_type = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_damp_type',
+                        "anderson")
+    damp_factor = getattr(__config__, 'pbc_pwscf_khf_PWKRHF_damp_factor', 0.3)
+    conv_check = getattr(__config__, 'scf_hf_SCF_conv_check', True)
+    check_convergence = None
+    callback = None
+
+    def __init__(self, cell, kpts=np.zeros((1,3)), ecut_wf=None, ecut_rho=None,
+                 exxdiv=getattr(__config__, 'pbc_scf_PWKRHF_exxdiv', 'ewald')):
+        """
+        Initialize a PWKSCF object. Note that the wf_mesh (FFT grid dimensions
+        for computing wave functions, densities, and EXX) and xc_mesh
+        (FFT grid dimensions for computing XC energy and potential)
+        are initially set based on ecut_wf and ecut_rho, but they can
+        be modified using the set_meshes function.
+
+        Args:
+            cell (Cell object): Chemical system to compute.
+            kpts (numpy.array): List of k-points to sample Brillouin Zone.
+            ecut_wf (float):
+                Kinetic energy cutoff of the plane-wave basis,
+                in Hartree. If provided, all plane-waves with energy < ecut_wf
+                are included in the basis. If not provided, all plane-waves
+                on a uniform grid of size wf_mesh (cell.mesh by default)
+                are used. By default, wf_mesh is constructed with a cutoff
+                of 4 * ecut_wf. Also, if ecut_rho is not provided, then by
+                default xc_mesh is constructed with a cutoff of 16 * ecut_wf.
+            ecut_rho (float):
+                Kinetic energy cutoff for constructing the dense grid xc_mesh,
+                in Hartree. Currently this is only used for constructing the XC
+                integration grid and is not needed for Hartree-Fock.
+                If neither ecut_rho nor ecut_wf is provided, then by
+                default xc_mesh = wf_mesh = cell.mesh.
+            exxdiv (str):
+                Method for curing the divergence of exact exchange.
+                Must be 'ewald' or None.
+        """
+        if not cell._built:
+            sys.stderr.write('Warning: cell.build() is not called in input\n')
+            cell.build()
+
+        self.cell = cell
+        mol_hf.SCF.__init__(self, cell)
+
+        if ecut_wf is None:
+            self._ecut_wf = None
+            self._ecut_rho = None
+        else:
+            self._ecut_wf = ecut_wf
+            if ecut_rho is None:
+                ecut_rho = 16 * ecut_wf
+            self._ecut_rho = ecut_rho
+
+        self.kpts = kpts
+        self.exxdiv = exxdiv
+        if self.exxdiv == "ewald":
+            self._set_madelung()
+        self.scf_summary["nuc"] = self.cell.energy_nuc()
+        self.scf_summary["e_comp_name_lst"] = ["kin", "ppl", "ppnl", "coul", "ex"]
+
+        self.nvir = 0 # number of virtual bands to compute
+        self.nvir_extra = 1 # to facilitate converging the highest virtual
+        self.init_guess = "hcore"
+
+        self.with_pp = None
+        self.with_jk = None
+
+        self._keys = self._keys.union(['cell', 'exxdiv'])
+
+    def _set_madelung(self):
+        self._madelung = tools.pbc.madelung(self.cell, self.kpts)
+        self._etot_shift_ewald = -0.5*self._madelung*self.cell.nelectron
+
+    @property
+    def wf_mesh(self):
+        """
+        Mesh for storing wave functions, pseudo-densities, and
+        wave function products.
+        """
+        if self._wf_mesh is None:
+            return np.asarray(self.cell.mesh)
+        else:
+            return self._wf_mesh
+
+    @property
+    def xc_mesh(self):
+        """
+        Mesh for integrating the XC energy. Only used for DFT.
+        """
+        if self._xc_mesh is None:
+            return np.asarray(self.cell.mesh)
+        else:
+            return self._xc_mesh
+
+    def set_meshes(self, wf_mesh=None, xc_mesh=None):
+        """
+        Set meshes to be different from their defaults.
+        init_pp and init_jk must be called again after setting
+        the meshes. Note that xc_mesh must be larger in all dimensions
+        than wf_mesh, and xc_mesh is only used for DFT calculations.
+        """
+        if self._ecut_wf is None:
+            self._wf_mesh = np.array(wf_mesh)
+            self._xc_mesh = np.array(xc_mesh)
+            self._wf2xc = pw_helper.get_mesh_map(
+                self.cell, None, None, mesh=xc_mesh, mesh2=wf_mesh
+            )
+        else:
+            self._wf_mesh, self._xc_mesh, self._wf2xc, self._basis_data = (
+                pw_helper.get_basis_data(self.cell, self.kpts, self._ecut_wf,
+                                         self._ecut_rho,
+                                         wf_mesh=wf_mesh, xc_mesh=xc_mesh)
+            )
+        self.with_pp = None
+        self.with_jk = None
+
+    @property
+    def ecut_wf(self):
+        """
+        Plane-wave cutoff energy in Hartree
+        """
+        return self._ecut_wf
+
+    def get_basis_kpt(self, kpt):
+        """
+        Get the PWBasis object for a given k-point. K-point
+        must be in self.kpts.
+        """
+        if self._basis_data is None:
+            return None
+        else:
+            k = member(kpt, self.kpts)[0]
+            return self._basis_data[k]
+
+    @property
+    def kpts(self):
+        return self._kpts
+
+    @property
+    def kpts_obj(self):
+        """
+        For calculations with symmetry reduction of k-points,
+        return the Kpoints object for this calculation.
+        Otherwise return None.
+        """
+        return None
+
+    @property
+    def weights(self):
+        """
+        Array with weight for each k-point. Sums to 1.
+        """
+        return [1.0 / len(self._kpts)] * len(self._kpts)
+
+    @kpts.setter
+    def kpts(self, x):
+        """
+        Set the k-points. Also resets the meshes to their defaults
+        because the PW basis must be reset when the k-points are reset.
+        """
+        self._kpts = np.reshape(x, (-1,3))
+        # update madelung constant and energy shift for exxdiv
+        self._set_madelung()
+        if self._ecut_wf is None:
+            self._wf_mesh = None
+            self._xc_mesh = None
+            self._wf2xc = None
+            self._basis_data = None
+        else:
+            self.set_meshes()
+
+    @property
+    def etot_shift_ewald(self):
+        return self._etot_shift_ewald
+
+    @etot_shift_ewald.setter
+    def etot_shift_ewald(self, x):
+        raise RuntimeError("Cannot set etot_shift_ewald directly")
+
+    @property
+    def madelung(self):
+        return self._madelung
+
+    @madelung.setter
+    def madelung(self, x):
+        raise RuntimeError("Cannot set madelung directly")
+
+    def istype(self, type_code):
+        """
+        This is to make sure code elsewhere in PySCF that checks istype
+        treats PWKRHF as KRHF, PWKUKS as KUKS, etc.
+        """
+        if not type_code.startswith("PW"):
+            type_code = "PW" + type_code
+        return super().istype(type_code)
+
+    def dump_flags(self):
+
+        log = logger.Logger(self.stdout, self.verbose)
+
+        log.info('******** PBC PWSCF flags ********')
+        log.info("ke_cutoff = %s", self.cell.ke_cutoff)
+        log.info("mesh = %s (%d PWs)", self.cell.mesh, np.prod(self.cell.mesh))
+        log.info("outcore mode = %s", self.outcore)
+        log.info("SCF init guess = %s", self.init_guess)
+        log.info("SCF conv_tol = %s", self.conv_tol)
+        log.info("SCF max_cycle = %d", self.max_cycle)
+        log.info("Num virtual bands to compute = %s", self.nvir)
+        log.info("Num extra v-bands included to help convergence = %s",
+                 self.nvir_extra)
+        log.info("Band energy conv_tol = %s", self.conv_tol_band)
+        log.info("Davidson conv_tol = %s", self.conv_tol_davidson)
+        log.info("Davidson max_cycle = %d", self.max_cycle_davidson)
+        log.info("Use ACE = %s", self.ace_exx)
+        log.info("Damping method = %s", self.damp_type)
+        if self.damp_type.lower() == "simple":
+            log.info("Damping factor = %s", self.damp_factor)
+        if self.chkfile:
+            log.info('chkfile to save SCF result = %s', self.chkfile)
+        log.info('max_memory %d MB (current use %d MB)', self.max_memory,
+                 lib.current_memory()[0])
+
+        log.info('kpts = %s', self.kpts)
+        log.info('Exchange divergence treatment (exxdiv) = %s', self.exxdiv)
+
+        cell = self.cell
+        if ((cell.dimension >= 2 and cell.low_dim_ft_type != 'inf_vacuum') and
+            isinstance(self.exxdiv, str) and self.exxdiv.lower() == 'ewald'):
+            madelung = self.madelung
+            log.info('    madelung (= occupied orbital energy shift) = %s',
+                     madelung)
+            log.info('    Total energy shift due to Ewald probe charge'
+                     ' = -1/2 * Nelec*madelung = %.12g',
+                     madelung*cell.nelectron * -.5)
+
+    def dump_scf_summary(self, verbose=logger.DEBUG):
+        log = logger.new_logger(self, verbose)
+        summary = self.scf_summary
+        def write(fmt, key):
+            if key in summary:
+                log.info(fmt, summary[key])
+        log.info('**** SCF Summaries ****')
+        log.info('Total Energy =                    %24.15f', self.e_tot)
+        write('Nuclear Repulsion Energy =        %24.15f', 'nuc')
+        write('Kinetic Energy =                  %24.15f', 'kin')
+        write('Local PP Energy =                 %24.15f', 'ppl')
+        write('Non-local PP Energy =             %24.15f', 'ppnl')
+        write('Two-electron Coulomb Energy =     %24.15f', 'coul')
+        write('Two-electron Exchange Energy =    %24.15f', 'ex')
+        write('Semilocal XC Energy =             %24.15f', 'xc')
+        write('Empirical Dispersion Energy =     %24.15f', 'dispersion')
+        write('PCM Polarization Energy =         %24.15f', 'epcm')
+        write('EFP Energy =                      %24.15f', 'efp')
+        if getattr(self, 'entropy', None):
+            log.info('(Electronic) Entropy              %24.15f', self.entropy)
+            log.info('(Electronic) Zero Point Energy    %24.15f', self.e_zero)
+            log.info('Free Energy =                     %24.15f', self.e_free)
+
+        def write_time(comp, t_comp, t_tot):
+            tc, tw = t_comp
+            tct, twt = t_tot
+            rc = tc / tct * 100
+            rw = tw / twt * 100
+            log.info('CPU time for %10s %9.2f  ( %6.2f%% ), wall time %9.2f '
+                     ' ( %6.2f%% )', comp.ljust(10), tc, rc, tw, rw)
+
+        t_tot = summary["t-tot"]
+        write_time("init guess", summary["t-init"], t_tot)
+        write_time("init ACE", summary["t-ace"], t_tot)
+        t_fock = np.zeros(2)
+        for op in summary["e_comp_name_lst"]:
+            write_time("op %s"%op, summary["t-%s"%op], t_tot)
+            t_fock += summary["t-%s"%op]
+        t_dvds = np.clip(summary['t-dvds']-t_fock, 0, None)
+        write_time("dvds other", t_dvds, t_tot)
+        t_other = t_tot - summary["t-init"] - summary["t-ace"] - \
+                    summary["t-dvds"]
+        write_time("all other", t_other, t_tot)
+        write_time("full SCF", t_tot, t_tot)
+
+    def get_mo_occ(mf, moe_ks=None, C_ks=None, nocc=None):
+        return get_mo_occ(mf.cell, moe_ks, C_ks, nocc)
+
+    def get_init_guess(self, init_guess=None, nvir=None, chkfile=None, C0=None,
+                       out=None):
+        if init_guess is None: init_guess = self.init_guess
+        if nvir is None: nvir = self.nvir
+        if chkfile is None: chkfile = self.chkfile
+
+        if C0 is not None:
+            C_ks, mocc_ks = self.get_init_guess_C0(C0, nvir=nvir, out=out)
+        else:
+            if os.path.isfile(chkfile) and init_guess[:3] == "chk":
+                C_ks, mocc_ks = self.init_guess_by_chkfile(
+                    chk=chkfile, nvir=nvir, out=out
+                )
+            else:
+                C_ks, mocc_ks = self.get_init_guess_key(nvir=nvir,
+                                                        key=init_guess,
+                                                        out=out)
+
+        return C_ks, mocc_ks
+
+    def get_init_guess_key(self, cell=None, kpts=None, basis=None, pseudo=None,
+                           nvir=None, key="hcore", out=None):
+        raise NotImplementedError
+
+    def init_guess_by_chkfile(self, chk=None, nvir=None, project=True,
+                              out=None):
+        raise NotImplementedError
+
+    def from_chk(self, chk=None, project=None, kpts=None):
+        return self.init_guess_by_chkfile(chk, project, kpts)
+
+    def dump_chk(self, envs):
+        if self.chkfile:
+            chkfile.dump_scf(self.mol, self.chkfile,
+                             envs['e_tot'], envs['moe_ks'],
+                             envs['mocc_ks'], envs['C_ks'])
+        return self
+
+    def get_init_guess_C0(self, C0, nvir=None, out=None):
+        raise NotImplementedError
+
+    def get_rho_R(self, C_ks, mocc_ks, mesh=None, Gv=None):
+        return self.with_jk.get_rho_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+
+    def get_vj_R_from_rho_R(self, rho_R, mesh=None, Gv=None):
+        return self.with_jk.get_vj_R_from_rho_R(rho_R, mesh=mesh, Gv=Gv)
+
+    def get_vj_R(self, C_ks, mocc_ks, mesh=None, Gv=None):
+        return self.with_jk.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv)
+
+    def init_pp(self, with_pp=None, **kwargs):
+        if self.wf_mesh is None:
+            mesh = None
+        else:
+            mesh = self.wf_mesh
+        return pw_pseudo.pseudopotential(self, with_pp=with_pp, mesh=mesh,
+                                         outcore=self.outcore, **kwargs)
+
+    def init_jk(self, with_jk=None, ace_exx=None):
+        if ace_exx is None: ace_exx = self.ace_exx
+        return pw_jk.jk(self, with_jk=with_jk, ace_exx=ace_exx,
+                        outcore=self.outcore, mesh=self.wf_mesh,
+                        basis_ks=self._basis_data)
+
+    def scf(self, C0=None, **kwargs):
+        self.dump_flags()
+
+        if self.with_pp is None:
+            with_pp = getattr(kwargs, "with_pp", None)
+            self.init_pp(with_pp=with_pp)
+
+        if self.with_jk is None:
+            with_jk = getattr(kwargs, "with_jk", None)
+            self.init_jk(with_jk=with_jk)
+
+        self.converged, self.e_tot, self.mo_energy, self.mo_coeff, \
+                self.mo_occ = kernel_doubleloop(
+                            self, C0=C0,
+                            nbandv=self.nvir, nbandv_extra=self.nvir_extra,
+                            conv_tol=self.conv_tol, max_cycle=self.max_cycle,
+                            conv_tol_band=self.conv_tol_band,
+                            conv_tol_davidson=self.conv_tol_davidson,
+                            max_cycle_davidson=self.max_cycle_davidson,
+                            verbose_davidson=self.verbose_davidson,
+                            ace_exx=self.ace_exx,
+                            damp_type=self.damp_type,
+                            damp_factor=self.damp_factor,
+                            conv_check=self.conv_check,
+                            callback=self.callback, **kwargs)
+        self._finalize(**kwargs)
+        return self.e_tot
+    kernel = lib.alias(scf, alias_name='kernel')
+
+    def _finalize(self, **kwargs):
+        pbc_hf.KSCF._finalize(self)
+
+        with_pp = self.with_pp
+        if not with_pp.outcore:
+            if with_pp.pptype == "ccecp":
+                save_ccecp_kb = kwargs.get("save_ccecp_kb", False)
+                if not save_ccecp_kb:
+                    # release memory of support vec
+                    with_pp._ecpnloc_initialized = False
+                    with_pp.vppnlocWks = None
+
+    def get_cpw_virtual(self, basis, amin=None, amax=None, thr_lindep=1e-14):
+        self.e_tot, self.mo_energy, self.mo_occ = get_cpw_virtual(
+                                                        self, basis,
+                                                        amin=amin, amax=amax,
+                                                        thr_lindep=thr_lindep,
+                                                        erifile=None)
+        return self.mo_energy, self.mo_occ
+
+    kernel_charge = kernel_charge
+    apply_hcore_kpt = apply_hcore_kpt
+    apply_veff_kpt = apply_veff_kpt
+    apply_Fock_kpt = apply_Fock_kpt
+    energy_tot = energy_tot
+    converge_band_kpt = converge_band_kpt
+
+    def get_nband(self, nbandv, nbandv_extra):
+        raise NotImplementedError
+
+    def dump_moe(self, moe_ks_, mocc_ks_, nband=None,
+                 trigger_level=logger.DEBUG):
+        raise NotImplementedError
+
+    def update_pp(mf, C_ks):
+        raise NotImplementedError
+
+    def update_k(mf, C_ks, mocc_ks):
+        raise NotImplementedError
+
+    def eig_subspace(mf, C_ks, mocc_ks, mesh=None, Gv=None, vj_R=None,
+                     exxdiv=None, comp=None):
+        raise NotImplementedError
+
+    def get_mo_energy(mf, C_ks, mocc_ks, mesh=None, Gv=None, exxdiv=None,
+                      vj_R=None, comp=None, ret_mocc=True, full_ham=False):
+        raise NotImplementedError
+
+    def energy_elec(mf, C_ks, mocc_ks, mesh=None, Gv=None, moe_ks=None,
+                    vj_R=None, exxdiv=None):
+        raise NotImplementedError
+
+    def converge_band(mf, C_ks, mocc_ks, kpts, Cout_ks=None,
+                      mesh=None, Gv=None,
+                      vj_R=None, comp=None,
+                      conv_tol_davidson=1e-6,
+                      max_cycle_davidson=100,
+                      verbose_davidson=0):
+        raise NotImplementedError
+
+
+class PWKRHF(PWKSCF):
+    """
+    Spin-restricted Plane-wave Hartree-Fock.
+    """
+    get_nband = get_nband
+    dump_moe = dump_moe
+    update_pp = update_pp
+    update_k = update_k
+    eig_subspace = eig_subspace
+    get_mo_energy = get_mo_energy
+    energy_elec = energy_elec
+    converge_band = converge_band
+
+    def get_init_guess_key(self, cell=None, kpts=None, basis=None, pseudo=None,
+                           nvir=None, key="hcore", out=None):
+        if cell is None: cell = self.cell
+        if kpts is None: kpts = self.kpts
+        if nvir is None: nvir = self.nvir
+
+        if key in ["h1e", "hcore", "cycle1", "scf"]:
+            if hasattr(self, "xc"):
+                # This is DFT, use fast initial guess
+                xc = "LDA,VWN"
+            else:
+                xc = None
+            C_ks, mocc_ks = get_init_guess(cell, kpts,
+                                           basis=basis, pseudo=pseudo,
+                                           nvir=nvir, key=key, out=out,
+                                           mesh=self.wf_mesh, xc=xc)
+        else:
+            logger.warn(self, "Unknown init guess %s", key)
+            raise RuntimeError
+
+        if self._basis_data is not None:
+            for k, kpt in enumerate(self.kpts):
+                inds = self.get_basis_kpt(kpt).indexes
+                set_kcomp(np.ascontiguousarray(C_ks[k][:, inds]), C_ks, k)
+
+        return C_ks, mocc_ks
+
+    def init_guess_by_chkfile(self, chk=None, nvir=None, project=True,
+                              out=None):
+        if chk is None: chk = self.chkfile
+        if nvir is None: nvir = self.nvir
+        return init_guess_by_chkfile(self.cell, chk, nvir, project=project,
+                                     out=out, basis_ks=self._basis_data)
+
+    def get_init_guess_C0(self, C0, nvir=None, out=None):
+        if nvir is None: nvir = self.nvir
+        nocc = self.cell.nelectron // 2
+        ntot_ks = [nocc+nvir] * len(self.kpts)
+        return init_guess_from_C0(self.cell, C0, ntot_ks, out=out,
+                                  basis_ks=self._basis_data)
+
+
+if __name__ == "__main__":
+    cell = gto.Cell(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+    )
+    MESH = [13, 13, 13]
+    cell.mesh = MESH
+    cell.build()
+    cell.verbose = 6
+
+    res = pw_helper.get_mesh_map(cell, None, None, (3, 3, 3), (2, 2, 2))
+
+    kmesh = [2, 1, 1]
+    kpts = cell.make_kpts(kmesh)
+
+    mf = PWKRHF(cell, kpts, ecut_wf=None)
+    mf.damp_type = "simple"
+    mf.damp_factor = 0.7
+    mf.nvir = 4  # converge first 4 virtual bands
+    mf.kernel()
+    mf.dump_scf_summary()
+    assert(abs(mf.e_tot - -10.673452914596) < 1.e-5)
diff --git a/pyscf/pbc/pwscf/kmp2.py b/pyscf/pbc/pwscf/kmp2.py
new file mode 100644
index 000000000..72cea381f
--- /dev/null
+++ b/pyscf/pbc/pwscf/kmp2.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+""" kpt-sampled periodic MP2 using a plane wave basis
+"""
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc.pwscf.pw_helper import (get_nocc_ks_from_mocc, get_kcomp,
+                                       set_kcomp, wf_ifft)
+from pyscf.pbc import tools
+from pyscf import lib
+from pyscf.lib import logger
+
+
+def read_fchk(chkfile_name):
+    from pyscf.lib.chkfile import load
+    scf_dict = load(chkfile_name, "scf")
+    mocc_ks = scf_dict["mo_occ"]
+    moe_ks = scf_dict["mo_energy"]
+    scf_dict = None
+
+    fchk = h5py.File(chkfile_name, "r")
+    C_ks = fchk["mo_coeff"]
+
+    return fchk, C_ks, moe_ks, mocc_ks
+
+
+def kconserv(kptija, reduce_latvec, kdota):
+    tmp = lib.dot(kptija.reshape(1,-1), reduce_latvec) - kdota
+    return np.where(abs(tmp - np.rint(tmp)).sum(axis=1)<1e-6)[0][0]
+
+
+def fill_oovv(oovv, v_ia, Co_kj_R, Cv_kb_R, fac=None):
+    r"""
+    Math:
+        oovv = \sum_G rho_ia^kika(G)*coulG(ki-ka) * rho_jb^kjkb(kptijab-G)
+             = \sum_G V_ia^kika(G) * rho_jb^kjkb(kptijab-G)
+             = \sum_r V_ia^kika(r)*phase * rho_jb^kjkb(r)
+             = \sum_r v_ia^kika(r) * rho_jb^kjkb(r)
+    """
+    nocc_i, nocc_j = oovv.shape[:2]
+    rho_shape = Cv_kb_R.shape
+    rho_dtype = Cv_kb_R.dtype
+    buf = np.empty(rho_shape, dtype=rho_dtype)
+    for j in range(nocc_j):
+        # rho_jb_R = Co_kj_R[j].conj() * Cv_kb_R
+        rho_jb_R = np.ndarray(rho_shape, rho_dtype, buffer=buf)
+        np.multiply(Co_kj_R[j].conj(), Cv_kb_R, out=rho_jb_R)
+        for i in range(nocc_i):
+            # oovv[i,j] = lib.dot(v_ia[i], rho_jb_R.T)
+            lib.dot(v_ia[i], rho_jb_R.T, c=oovv[i,j])
+    if fac is not None: oovv *= fac
+
+    return oovv
+
+
+def kernel_dx_(cell, kpts, chkfile_name, summary, nvir=None, nvir_lst=None,
+               frozen=None, basis_ks=None):
+    """ Compute both direct (d) and exchange (x) contributions together.
+
+    Args:
+        nvir_lst (array-like of int):
+            If given, the MP2 correlation energies using the number of virtual
+            orbitals specified by the list will be returned.
+        frozen (int):
+            Number of core orbitals to be frozen.
+    """
+    log = logger.Logger(cell.stdout, cell.verbose)
+    cput0 = (logger.process_clock(), logger.perf_counter())
+
+    dtype = np.complex128
+    dsize = 16
+
+    fchk, C_ks, moe_ks, mocc_ks = read_fchk(chkfile_name)
+
+    if frozen is not None:
+        if isinstance(frozen, int):
+            log.info("freezing %d orbitals", frozen)
+            moe_ks = [moe_k[frozen:] for moe_k in moe_ks]
+            mocc_ks = [mocc_k[frozen:] for mocc_k in mocc_ks]
+        else:
+            raise NotImplementedError
+
+    nkpts = len(kpts)
+    if basis_ks is None:
+        basis_ks = [None] * nkpts
+        mesh = cell.mesh
+    else:
+        assert len(basis_ks) == nkpts
+        mesh = basis_ks[0].mesh
+    coords = cell.get_uniform_grids(mesh=mesh)
+    ngrids = coords.shape[0]
+
+    reduce_latvec = cell.lattice_vectors() / (2*np.pi)
+    kdota = lib.dot(kpts, reduce_latvec)
+
+    fac = ngrids**2. / cell.vol
+    fac_oovv = fac * ngrids / nkpts
+
+    nocc_ks = get_nocc_ks_from_mocc(mocc_ks)
+    if nvir is None:
+        n_ks = [len(mocc_ks[k]) for k in range(nkpts)]
+        nvir_ks = [n_ks[k] - nocc_ks[k] for k in range(nkpts)]
+    else:
+        nvir_ks = [nvir] * nkpts
+        n_ks = [nocc_ks[k] + nvir_ks[k] for k in range(nkpts)]
+    occ_ks = [list(range(nocc)) for nocc in nocc_ks]
+    vir_ks = [list(range(nocc,n)) for nocc,n in zip(nocc_ks,n_ks)]
+    nocc_max = np.max(nocc_ks)
+    nvir_max = np.max(nvir_ks)
+    if nvir_lst is None:
+        nvir_lst = [nvir_max]
+    nvir_lst = np.asarray(nvir_lst)
+    nnvir = len(nvir_lst)
+    log.info("Compute emp2 for these nvir's: %s", nvir_lst)
+
+    # estimate memory requirement if done outcore
+    est_mem = (nocc_max*nvir_max)**2*4      # for caching oovv_ka/kb, eijab, wijab
+    est_mem += nocc_max*nvir_max*ngrids     # for caching v_ia_R
+    est_mem += (nocc_max+nvir_max)*ngrids*2 # for caching MOs
+    est_mem *= dsize / 1e6
+    est_mem_outcore = est_mem
+    # estimate memory requirement if done incore
+    est_mem_incore = nkpts * (
+                nocc_max*nvir_max*ngrids +  # for caching v_ia_ks_R
+                (nocc_max+nvir_max)*ngrids  # for caching C_ks_R
+            ) * dsize / 1e6
+    est_mem_incore += est_mem
+    # get currently available memory
+    frac = 0.6
+    cur_mem = cell.max_memory - lib.current_memory()[0]
+    safe_mem = cur_mem * frac
+    # check if incore mode is possible
+    incore = est_mem_incore < cur_mem
+    est_mem = est_mem_incore if incore else est_mem_outcore
+
+    log.debug("Currently available memory total   %9.2f MB, "
+              "safe   %9.2f MB", cur_mem, safe_mem)
+    log.debug("Estimated required  memory outcore %9.2f MB, "
+              "incore %9.2f MB", est_mem_outcore, est_mem_incore)
+    log.debug("Incore mode: %r", incore)
+    if est_mem > safe_mem:
+        rec_mem = est_mem / frac + lib.current_memory()[0]
+        log.warn("Estimate memory (%.2f MB) exceeds %.0f%% of currently "
+                 "available memory (%.2f MB). Calculations may fail and "
+                 "`cell.max_memory = %.2f` is recommended.",
+                 est_mem, frac*100, safe_mem, rec_mem)
+
+    buf1 = np.empty(nocc_max*nvir_max*ngrids, dtype=dtype)
+    buf2 = np.empty(nocc_max*nocc_max*nvir_max*nvir_max, dtype=dtype)
+    buf3 = np.empty(nocc_max*nocc_max*nvir_max*nvir_max, dtype=dtype)
+
+    if incore:
+        C_ks_R = [None] * nkpts
+        v_ia_ks_R = [None] * nkpts
+    else:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        swapfile = None
+
+        C_ks_R = fswap.create_group("C_ks_R")
+        v_ia_ks_R = fswap.create_group("v_ia_ks_R")
+
+    for k in range(nkpts):
+        C_k = get_kcomp(C_ks, k)
+        if frozen is not None:
+            C_k = C_k[frozen:]
+        # C_k = tools.ifft(C_k, mesh)
+        C_k = wf_ifft(C_k, mesh, basis_ks[k])
+        set_kcomp(C_k, C_ks_R, k)
+        C_k = None
+
+    C_ks = None
+    fchk.close()
+
+    cput1 = log.timer('initialize pwmp2', *cput0)
+
+    tick = np.zeros(2)
+    tock = np.zeros(2)
+    tspans = np.zeros((7,2))
+    tcomps = summary["tcomps"] = ["init", "v_ks_R", "khelper", "IO", "oovv",
+                                  "energy", "tot"]
+    tspans[0] = np.asarray(cput1) - np.asarray(cput0)
+
+    emp2_d = np.zeros(nnvir)
+    emp2_x = np.zeros(nnvir)
+    emp2_ss = np.zeros(nnvir)
+    emp2_os = np.zeros(nnvir)
+    for ki in range(nkpts):
+        kpti = kpts[ki]
+        nocc_i = nocc_ks[ki]
+        occ_i = occ_ks[ki]
+
+        tick[:] = logger.process_clock(), logger.perf_counter()
+
+        Co_ki_R = get_kcomp(C_ks_R, ki, occ=occ_i)
+
+        for ka in range(nkpts):
+            kpta = kpts[ka]
+            nvir_a = nvir_ks[ka]
+            vir_a = vir_ks[ka]
+            coulG = tools.get_coulG(cell, kpta-kpti, exx=False, mesh=mesh)
+
+            Cv_ka_R = get_kcomp(C_ks_R, ka, occ=vir_a)
+            if incore:
+                # if from buffer, an extra "copy" is needed in "set_kcomp"
+                # below, which can be 1000x slower than allocating new mem.
+                v_ia_R = np.empty((nocc_i,nvir_a,ngrids), dtype=dtype)
+            else:
+                v_ia_R = np.ndarray((nocc_i,nvir_a,ngrids), dtype=dtype,
+                                    buffer=buf1)
+
+            for i in range(nocc_i):
+                v_ia = tools.fft(Co_ki_R[i].conj() * Cv_ka_R, mesh) * coulG
+                v_ia_R[i] = tools.ifft(v_ia, mesh)
+
+            set_kcomp(v_ia_R, v_ia_ks_R, ka)
+            v_ia_R = Cv_ka_R = None
+
+        Co_ki_R = None
+
+        tock[:] = logger.process_clock(), logger.perf_counter()
+        tspans[1] += tock - tick
+
+        for kj in range(nkpts):
+            nocc_j = nocc_ks[kj]
+            occ_j = occ_ks[kj]
+            kptij = kpti + kpts[kj]
+
+            tick[:] = logger.process_clock(), logger.perf_counter()
+
+            Co_kj_R = get_kcomp(C_ks_R, kj, occ=occ_j)
+
+            tock[:] = logger.process_clock(), logger.perf_counter()
+            tspans[3] += tock - tick
+
+            done = [False] * nkpts
+            kab_lst = []
+            kptijab_lst = []
+            for ka in range(nkpts):
+                if done[ka]: continue
+                kptija = kptij - kpts[ka]
+                kb = kconserv(kptija, reduce_latvec, kdota)
+                kab_lst.append((ka,kb))
+                kptijab_lst.append(kptija-kpts[kb])
+                done[ka] = done[kb] = True
+
+            tick[:] = logger.process_clock(), logger.perf_counter()
+            tspans[2] += tick - tock
+
+            nkab = len(kab_lst)
+            for ikab in range(nkab):
+                ka,kb = kab_lst[ikab]
+                kptijab = kptijab_lst[ikab]
+
+                nvir_a = nvir_ks[ka]
+                nvir_b = nvir_ks[kb]
+                vir_a = vir_ks[ka]
+                vir_b = vir_ks[kb]
+
+                tick[:] = logger.process_clock(), logger.perf_counter()
+                Cv_kb_R = get_kcomp(C_ks_R, kb, occ=vir_b)
+                v_ia = get_kcomp(v_ia_ks_R, ka)
+                tock[:] = logger.process_clock(), logger.perf_counter()
+                tspans[3] += tock - tick
+
+                phase = np.exp(-1j*lib.dot(coords,
+                                           kptijab.reshape(-1,1))).reshape(-1)
+                if incore:
+                    # two possible schemes: 1) make a copy in "get_kcomp" above
+                    # and use "a*=b" here. 2) (currently used) no copy in
+                    # "get_kcomp", init v_ia from buf, and use multiply with
+                    # "out".
+                    # numerical tests found that: a) copy is 2x expensive than
+                    # "a*=b" and 1000x than init from buf. b) mutiply with
+                    # "out" is as fast as "a*=b", which is half the cost of
+                    # "a*b".
+                    # conclusion: scheme 2 will be >3x faster.
+                    v_ia_ = v_ia
+                    v_ia = np.ndarray((nocc_i,nvir_a,ngrids), dtype=dtype,
+                                      buffer=buf1)
+                    np.multiply(v_ia_, phase, out=v_ia)
+                    v_ia_ = None
+                else:
+                    v_ia *= phase
+                oovv_ka = np.ndarray((nocc_i,nocc_j,nvir_a,nvir_b), dtype=dtype,
+                                     buffer=buf2)
+                fill_oovv(oovv_ka, v_ia, Co_kj_R, Cv_kb_R, fac_oovv)
+                tick[:] = logger.process_clock(), logger.perf_counter()
+                tspans[4] += tick - tock
+
+                Cv_kb_R = v_ia = None
+
+                if ka != kb:
+                    Cv_ka_R = get_kcomp(C_ks_R, ka, occ=vir_a)
+                    v_ib = get_kcomp(v_ia_ks_R, kb)
+                    tock[:] = logger.process_clock(), logger.perf_counter()
+                    tspans[3] += tock - tick
+
+                    if incore:
+                        v_ib_ = v_ib
+                        v_ib = np.ndarray((nocc_i,nvir_b,ngrids), dtype=dtype,
+                                          buffer=buf1)
+                        np.multiply(v_ib_, phase, out=v_ib)
+                        v_ib_ = None
+                    else:
+                        v_ib *= phase
+                    oovv_kb = np.ndarray((nocc_i,nocc_j,nvir_b,nvir_a), dtype=dtype,
+                                         buffer=buf3)
+                    fill_oovv(oovv_kb, v_ib, Co_kj_R, Cv_ka_R, fac_oovv)
+                    tick[:] = logger.process_clock(), logger.perf_counter()
+                    tspans[4] += tick - tock
+
+                    Cv_ka_R = v_ib = None
+                else:
+                    oovv_kb = oovv_ka
+
+# KMP2 energy evaluation starts here
+                tick[:] = logger.process_clock(), logger.perf_counter()
+                mo_e_o = moe_ks[ki][occ_i]
+                mo_e_v = moe_ks[ka][vir_a]
+                eia = mo_e_o[:,None] - mo_e_v
+
+                if ka != kb:
+                    mo_e_o = moe_ks[kj][occ_j]
+                    mo_e_v = moe_ks[kb][vir_b]
+                    ejb = mo_e_o[:,None] - mo_e_v
+                else:
+                    ejb = eia
+
+                eijab = lib.direct_sum('ia,jb->ijab',eia,ejb)
+                t2_ijab = np.conj(oovv_ka/eijab)
+
+                for invir_,nvir_ in enumerate(nvir_lst):
+                    eijab_d = 2 * np.einsum('ijab,ijab->',
+                                            t2_ijab[:,:,:nvir_,:nvir_],
+                                            oovv_ka[:,:,:nvir_,:nvir_]).real
+                    eijab_x = - np.einsum('ijab,ijba->',
+                                          t2_ijab[:,:,:nvir_,:nvir_],
+                                          oovv_kb[:,:,:nvir_,:nvir_]).real
+                    if ka != kb:
+                        eijab_d *= 2
+                        eijab_x *= 2
+
+                    emp2_d[invir_] += eijab_d
+                    emp2_x[invir_] += eijab_x
+                    emp2_ss[invir_] += eijab_d * 0.5 + eijab_x
+                    emp2_os[invir_] += eijab_d * 0.5
+
+                tock[:] = logger.process_clock(), logger.perf_counter()
+                tspans[5] += tock - tick
+
+                oovv_ka = oovv_kb = eijab = None
+
+        cput1 = log.timer('kpt %d (%6.3f %6.3f %6.3f)'%(ki,*kpti), *cput1)
+
+    buf1 = buf2 = buf3 = None
+
+    emp2_d /= nkpts
+    emp2_x /= nkpts
+    emp2_ss /= nkpts
+    emp2_os /= nkpts
+    emp2 = emp2_d + emp2_x
+    summary["e_corr_d"] = emp2_d[-1]
+    summary["e_corr_x"] = emp2_x[-1]
+    summary["e_corr_ss"] = emp2_ss[-1]
+    summary["e_corr_os"] = emp2_os[-1]
+    summary["e_corr"] = emp2[-1]
+    summary["nvir_lst"] = nvir_lst
+    summary["e_corr_d_lst"] = emp2_d
+    summary["e_corr_x_lst"] = emp2_x
+    summary["e_corr_ss_lst"] = emp2_ss
+    summary["e_corr_os_lst"] = emp2_os
+    summary["e_corr_lst"] = emp2
+
+    cput1 = log.timer('pwmp2', *cput0)
+    tspans[6] = np.asarray(cput1) - np.asarray(cput0)
+    for tspan, tcomp in zip(tspans,tcomps):
+        summary["t-%s"%tcomp] = tspan
+
+    return emp2[-1]
+
+
+def PWKRMP2_from_gtomf(mf, chkfile=None):
+    """ PWMP2 from a GTO-RHF object.
+    """
+    from pyscf.pbc.pwscf.pw_helper import gtomf2pwmf
+
+    return PWKRMP2(gtomf2pwmf(mf, chkfile=chkfile))
+
+
+class PWKRMP2:
+    """
+    Restriced MP2 perturbation theory in a plane-wave basis.
+    """
+    def __init__(self, mf, nvir=None, frozen=None):
+        self.cell = self.mol = mf.cell
+        self._scf = mf
+
+        self.verbose = self.mol.verbose
+        self.stdout = self.mol.stdout
+        self.max_memory = mf.max_memory
+
+        self.nvir = nvir
+        self.frozen = frozen
+
+##################################################
+# don't modify the following attributes, they are not input options
+        self.kpts = mf.kpts
+        self.nkpts = len(self.kpts)
+        self.mp2_summary = dict()
+        self.e_hf = self._scf.e_tot
+        self.e_corr = None
+        self.t2 = None
+        self._keys = set(self.__dict__.keys())
+
+    @property
+    def e_tot(self):
+        if self.e_corr is None:
+            return None
+        else:
+            return self.e_hf + self.e_corr
+
+    def dump_mp2_summary(self, verbose=logger.DEBUG):
+        log = logger.new_logger(self, verbose)
+        summary = self.mp2_summary
+        def write(fmt, key):
+            if key in summary:
+                log.info(fmt, summary[key])
+        log.info('**** MP2 Summaries ****')
+        log.info('Number of virtuals =              %d', summary["nvir_lst"][-1])
+        log.info('Total Energy (HF+MP2) =           %24.15f', self.e_tot)
+        log.info('Correlation Energy =              %24.15f', self.e_corr)
+        write('Direct Energy =                   %24.15f', 'e_corr_d')
+        write('Exchange Energy =                 %24.15f', 'e_corr_x')
+        write('Same-spin Energy =                %24.15f', 'e_corr_ss')
+        write('Opposite-spin Energy =            %24.15f', 'e_corr_os')
+
+        nvir_lst = summary["nvir_lst"]
+        if len(nvir_lst) > 1:
+            log.info('%sNvirt  Ecorr', "\n")
+            ecorr_lst = summary["e_corr_lst"]
+            for nvir,ecorr in zip(nvir_lst,ecorr_lst):
+                log.info("%5d  %24.15f", nvir, ecorr)
+            log.info("%s", "")
+
+        def write_time(comp, t_comp, t_tot):
+            tc, tw = t_comp
+            tct, twt = t_tot
+            rc = tc / tct * 100
+            rw = tw / twt * 100
+            log.info('CPU time for %10s %9.2f  ( %6.2f%% ), wall time %9.2f  '
+                     '( %6.2f%% )', comp.ljust(10), tc, rc, tw, rw)
+
+        t_tot = summary["t-tot"]
+        for icomp,comp in enumerate(summary["tcomps"]):
+            write_time(comp, summary["t-%s"%comp], t_tot)
+
+    def kernel(self, nvir=None, nvir_lst=None, frozen=None):
+        cell = self.cell
+        kpts = self.kpts
+        chkfile = self._scf.chkfile
+        summary = self.mp2_summary
+        if nvir is None: nvir = self.nvir
+        if frozen is None: frozen = self.frozen
+
+        self.e_corr = kernel_dx_(cell, kpts, chkfile, summary, nvir=nvir,
+                                 nvir_lst=nvir_lst, frozen=frozen,
+                                 basis_ks=self._scf._basis_data)
+
+        self._finalize()
+
+        return self.e_corr
+
+    def _finalize(self):
+        logger.note(self, "KMP2 energy = %.15g", self.e_corr)
+
+
+if __name__ == "__main__":
+    from pyscf.pbc import gto, pwscf
+
+    atom = "H 0 0 0; H 0.9 0 0"
+    a = np.eye(3) * 3
+    basis = "gth-szv"
+    pseudo = "gth-pade"
+
+    ke_cutoff = 50
+
+    cell = gto.Cell(atom=atom, a=a, basis=basis, pseudo=pseudo,
+                    ke_cutoff=ke_cutoff)
+    cell.build()
+    cell.verbose = 6
+
+    nk = 2
+    kmesh = [nk] * 3
+    kpts = cell.make_kpts(kmesh)
+    nkpts = len(kpts)
+
+    pwmf = pwscf.PWKRHF(cell, kpts)
+    pwmf.nvir = 20
+    pwmf.kernel()
+
+    es = {"5": -0.01363871, "10": -0.01873622, "20": -0.02461560}
+
+    pwmp = PWKRMP2(pwmf)
+    pwmp.kernel(nvir_lst=[5,10,20])
+    pwmp.dump_mp2_summary()
+    nvir_lst = pwmp.mp2_summary["nvir_lst"]
+    ecorr_lst = pwmp.mp2_summary["e_corr_lst"]
+    for nvir,ecorr in zip(nvir_lst,ecorr_lst):
+        err = abs(ecorr - es["%d"%nvir])
+        print(err)
+        assert(err < 1e-5)
diff --git a/pyscf/pbc/pwscf/kpt_symm.py b/pyscf/pbc/pwscf/kpt_symm.py
new file mode 100644
index 000000000..96e34480d
--- /dev/null
+++ b/pyscf/pbc/pwscf/kpt_symm.py
@@ -0,0 +1,651 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" k-point symmetry for plane-wave HF and DFT
+"""
+
+import tempfile
+from pyscf import lib
+import numpy as np
+import ctypes
+from pyscf.pbc.lib import kpts as libkpts
+from pyscf.pbc import tools
+from pyscf.pbc.pwscf import jk
+from pyscf.pbc.pwscf import khf, kuhf, krks, kuks
+from pyscf.lib import logger
+from pyscf.pbc.pwscf.pw_helper import wf_fft, wf_ifft
+
+
+libpw = lib.load_library("libpwscf")
+
+
+def add_rotated_realspace_func_(fin, fout, mesh, rot, wt):
+    """
+    For real-valued functions fin and fout on mesh,
+
+    fout(rot * x) = fout(rot * x) + wt * fin(x)
+
+    where rot is a rotation operator represented as a 3x3 integer matrix,
+    and x is an integer-valued 3D vector representing the position
+    of the function on the mesh.
+    """
+    assert fin.dtype == np.float64
+    assert fout.dtype == np.float64
+    assert fin.flags.c_contiguous
+    assert fout.flags.c_contiguous
+    shape = np.asarray(mesh, dtype=np.int32, order="C")
+    assert fout.size == np.prod(shape)
+    assert fin.size == np.prod(shape)
+    rot = np.asarray(rot, dtype=np.int32, order="C")
+    assert rot.shape == (3, 3)
+    libpw.add_rotated_realspace_func(
+        fin.ctypes, fout.ctypes, shape.ctypes, rot.ctypes, ctypes.c_double(wt)
+    )
+
+
+def get_rotated_complex_func(fin, mesh, rot, shift=None, fout=None):
+    """
+    For complex-valued fin on a given mesh, store a rotated
+    and shifted function in fout.
+
+    fout(rot * x + shift) = fin(x)
+
+    where rot and x are defined as in add_rotated_realspace_func_,
+    and shift is an integer-valued 3D vector.
+    """
+    if shift is None:
+        shift = [0, 0, 0]
+    assert fin.dtype == np.complex128
+    fout = np.ndarray(shape=mesh, dtype=np.complex128, order="C", buffer=fout)
+    assert fin.flags.c_contiguous
+    shape = np.asarray(mesh, dtype=np.int32, order="C")
+    assert fin.size == np.prod(shape), f"{fin.shape} {shape}"
+    rot = np.asarray(rot, dtype=np.int32, order="C")
+    assert rot.shape == (3, 3)
+    shift = np.asarray(shift, dtype=np.int32, order="C")
+    libpw.get_rotated_complex_func(
+        fin.ctypes, fout.ctypes, shape.ctypes, rot.ctypes, shift.ctypes
+    )
+    return fout
+
+
+def get_rho_R_ksym(C_ks, mocc_ks, mesh, kpts, basis_ks=None):
+    """
+    Get the real-space density from C_ks and mocc_ks, where the
+    set of kpts is reduced to the IBZ. kpts is a Kpoints object
+    storing both the IBZ and BZ k-points along with symmetry
+    mappings between them.
+    """
+    rho_R = np.zeros(np.prod(mesh), dtype=np.float64, order="C")
+    tmp_R = np.empty_like(rho_R)
+    nelec = 0
+    if basis_ks is None:
+        basis_ks = [None] * len(C_ks)
+    for k, mocc_k in enumerate(mocc_ks):
+        nelec += mocc_k.sum() * kpts.weights_ibz[k]
+    for k in range(kpts.nkpts_ibz):
+        occ = np.where(mocc_ks[k] > jk.THR_OCC)[0].tolist()
+        Co_k = jk.get_kcomp(C_ks, k, occ=occ)
+        Co_k_R = wf_ifft(Co_k, mesh, basis=basis_ks[k])
+        jk._mul_by_occ_(Co_k_R, mocc_ks[k], occ)
+        tmp_R[:] = lib.einsum("ig,ig->g", Co_k_R.conj(), Co_k_R).real
+        for istar, iop in enumerate(kpts.stars_ops[k]):
+            rot = kpts.ops[iop].rot
+            add_rotated_realspace_func_(tmp_R, rho_R, mesh, rot, 1.0)
+    return rho_R
+
+
+def get_ibz2bz_info(C_ks_ibz, kpts, k_bz, occ_ks=None):
+    k_ibz = kpts.bz2ibz[k_bz]
+    iop = kpts.stars_ops_bz[k_bz]
+    rot = kpts.ops[iop].rot
+    if occ_ks is not None:
+        occ = occ_ks[k_ibz]
+    else:
+        occ = None
+    C_k_ibz = jk.get_kcomp(C_ks_ibz, k_ibz, occ=occ)
+    return (
+        kpts.kpts_scaled_ibz[k_ibz],
+        kpts.kpts_scaled[k_bz],
+        rot,
+        kpts.time_reversal_symm_bz[k_bz],
+        C_k_ibz,
+    )
+
+
+def get_ibz2bz_info_v2(kpts, k_ibz):
+    maps = []
+    for istar, iop in enumerate(kpts.stars_ops[k_ibz]):
+        k_bz = kpts.stars[k_ibz][istar]
+        rot = kpts.ops[iop].rot
+        maps.append([
+            kpts.kpts_scaled_ibz[k_ibz],
+            kpts.kpts_scaled[k_bz],
+            rot,
+            kpts.time_reversal_symm_bz[k_bz],
+            k_bz,
+        ])
+    return maps
+
+
+def get_C_from_ibz2bz_info(mesh, kpt_ibz, kpt_bz, rot, tr, C_k_ibz,
+                           out=None, realspace=False):
+    """
+    From a set of bands C_k_ibz at a k-point in the IBZ (kpt_ibz), get the
+    bands at a symmetrically equivalent k-point kpt_bz.
+
+    kpt_ibz and kpt_bz are the scaled k-points (fractional coords in bz).
+    tr is a bool indicating whether the symmetry operation
+    includes time-reversal.
+
+    If tr == True, kpt_bz = -rot * kpt_ibz.
+    If tr == False, kpt_bz = rot * kpt_ibz.
+    In both cases, the k-points are equivalent modulo 1
+    (in fractional coordinates).
+    """
+    out = np.ndarray(C_k_ibz.shape, dtype=np.complex128, order="C", buffer=out)
+    rrot = rot.copy()
+    krot = np.rint(np.linalg.inv(rot).T)
+    if tr:
+        krot[:] *= -1
+    if not realspace:
+        rot = krot
+    else:
+        rot = rrot
+    new_kpt = krot.dot(kpt_ibz)
+    shift = [0, 0, 0]
+    for v in range(3):
+        while np.round(new_kpt[v] - kpt_bz[v]) < 0:
+            shift[v] += 1
+            new_kpt[v] += 1
+        while np.round(new_kpt[v] - kpt_bz[v]) > 0:
+            shift[v] -= 1
+            new_kpt[v] -= 1
+        assert np.abs(new_kpt[v] - kpt_bz[v]) < 1e-8, f"{v}, {new_kpt} {kpt_bz}"
+    kshift = [-1 * v for v in shift]
+    shift = [0, 0, 0] if realspace else kshift
+    for i in range(out.shape[0]):
+        get_rotated_complex_func(C_k_ibz[i], mesh, rot, shift, fout=out[i])
+        if tr:
+            out[i] = out[i].conj()
+    if realspace:
+        outshape = out.shape
+        out.shape = (-1, mesh[0], mesh[1], mesh[2])
+        wt = 1.0 / mesh[v]
+        phases = []
+        for v in range(3):
+            phases.append(np.exp(2j * np.pi * kshift[v] * np.arange(mesh[v]) * wt))
+        out[:] *= phases[0][None, :, None, None]
+        out[:] *= phases[1][None, None, :, None]
+        out[:] *= phases[2][None, None, None, :]
+        out.shape = outshape
+    return out
+
+
+def get_C_from_symm(C_ks_ibz, mesh, kpts, k_bz, out=None, occ_ks=None,
+                    realspace=False):
+    """
+    Get C_k in the full BZ from C_ks_ibz in the IBZ, at the k-point index k_bz.
+    """
+    kpt_ibz, kpt_bz, rot, tr, C_k_ibz = get_ibz2bz_info(C_ks_ibz, kpts, k_bz,
+                                                        occ_ks=occ_ks)
+    return get_C_from_ibz2bz_info(mesh, kpt_ibz, kpt_bz, rot, tr, C_k_ibz,
+                                  out=out, realspace=realspace)
+
+
+def get_C_from_C_ibz(C_ks_ibz, mesh, kpts, realspace=False):
+    """
+    Get C_ks in the full BZ from C_kz_ibz in the IBZ.
+    Assumes that C_ks_ibz is incore.
+    """
+    C_ks = []
+    for k in range(kpts.nkpts):
+        C_ks.append(get_C_from_symm(
+            C_ks_ibz, mesh, kpts, k, realspace=realspace
+        ))
+    return C_ks
+
+
+def apply_k_sym_s1(cell, C_ks, mocc_ks, kpts_obj, Ct_ks, ktpts, mesh, Gv,
+                   out=None, outcore=False, basis_ks=None):
+    kpts = kpts_obj.kpts
+    nkpts = len(kpts)
+    nktpts = len(ktpts)
+    ngrids = np.prod(mesh)
+    fac = ngrids**2./(cell.vol*nkpts)
+    mocc_ks = [mocc_ks[kpts_obj.bz2ibz[k]] for k in range(nkpts)]
+    occ_ks = [np.where(mocc_ks[k] > jk.THR_OCC)[0] for k in range(nkpts)]
+
+    if out is None: out = [None] * nktpts
+    if basis_ks is None:
+        basis_ks = [None] * len(C_ks)
+        use_basis = False
+    else:
+        use_basis = True
+
+# swap file to hold FFTs
+    if outcore:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        swapfile = None
+        Co_ks_R = fswap.create_group("Co_ks_R")
+        Ct_ks_R = fswap.create_group("Ct_ks_R")
+    else:
+        Co_ks_R = [None] * nkpts
+        Ct_ks_R = [None] * nktpts
+
+    if use_basis:
+        # TODO this is probably a bit memory-intensive
+        for k_ibz in range(len(C_ks)):
+            Co_k_ibz = jk.get_kcomp(C_ks, k_ibz, occ=occ_ks[k_ibz])
+            jk._mul_by_occ_(Co_k_ibz, mocc_ks[k_ibz], occ_ks[k_ibz])
+            Co_k_ibz_R = wf_ifft(Co_k_ibz, mesh, basis=basis_ks[k_ibz])
+            maps = get_ibz2bz_info_v2(kpts_obj, k_ibz)
+            for kmap in maps:
+                k_bz = kmap[-1]
+                kmap[-1] = Co_k_ibz_R
+                Co_k_R = get_C_from_ibz2bz_info(mesh, *kmap, realspace=True)
+                jk.set_kcomp(Co_k_R, Co_ks_R, k_bz)
+    else:
+        for k_ibz in range(len(C_ks)):
+            Co_k_ibz = jk.get_kcomp(C_ks, k_ibz, occ=occ_ks[k_ibz])
+            jk._mul_by_occ_(Co_k_ibz, mocc_ks[k_ibz], occ_ks[k_ibz])
+            maps = get_ibz2bz_info_v2(kpts_obj, k_ibz)
+            for kmap in maps:
+                k_bz = kmap[-1]
+                kmap[-1] = Co_k_ibz
+                Co_k = get_C_from_ibz2bz_info(mesh, *kmap, realspace=False)
+                jk.set_kcomp(wf_ifft(Co_k, mesh), Co_ks_R, k_bz)
+        """
+        Below is a draft of an alternate approach for the above loop
+        for k in range(nkpts):
+            # Co_k = jk.set_kcomp(C_ks, k, occ=occ_ks[k])
+            # TODO need to make a new basis for symmetrized calculation,
+            # or perhaps just rotate it in real space?
+            Co_k = get_C_from_symm(C_ks, mesh, kpts_obj, k, occ_ks=occ_ks)
+            jk._mul_by_occ_(Co_k, mocc_ks[k], occ_ks[k])
+            jk.set_kcomp(wf_ifft(Co_k, mesh), Co_ks_R, k)
+            Co_k = None
+        """
+
+    for k in range(nktpts):
+        Ct_k = jk.get_kcomp(Ct_ks, k)
+        jk.set_kcomp(wf_ifft(Ct_k, mesh, basis=basis_ks[k]), Ct_ks_R, k)
+        Ct_k = None
+
+    for k1,kpt1 in enumerate(ktpts):
+        Ct_k1_R = jk.get_kcomp(Ct_ks_R, k1)
+        Ctbar_k1 = np.zeros_like(Ct_k1_R)
+        for k2,kpt2 in enumerate(kpts):
+            coulG = tools.get_coulG(cell, kpt1-kpt2, exx=False, mesh=mesh,
+                                    Gv=Gv)
+            Co_k2_R = jk.get_kcomp(Co_ks_R, k2)
+            for j in occ_ks[k2]:
+                Cj_k2_R = Co_k2_R[j]
+                vij_R = tools.ifft(tools.fft(Ct_k1_R * Cj_k2_R.conj(), mesh) *
+                                   coulG, mesh)
+                Ctbar_k1 += vij_R * Cj_k2_R
+
+        Ctbar_k1 = wf_fft(Ctbar_k1, mesh, basis=basis_ks[k1]) * fac
+        jk.set_kcomp(Ctbar_k1, out, k1)
+        Ctbar_k1 = None
+
+    return out
+
+
+def apply_k_sym(cell, C_ks, mocc_ks, kpts, mesh, Gv, Ct_ks=None, ktpts=None,
+                exxdiv=None, out=None, outcore=False, basis_ks=None):
+    """
+    Apply the EXX operator with symmetry-reduced k-points.
+    """
+    if Ct_ks is None:
+        # TODO s2 symmetry
+        Ct_ks = C_ks
+        ktpts = kpts.kpts_ibz
+        return apply_k_sym_s1(cell, C_ks, mocc_ks, kpts, Ct_ks, ktpts, mesh, Gv,
+                              out, outcore, basis_ks)
+    else:
+        return apply_k_sym_s1(cell, C_ks, mocc_ks, kpts, Ct_ks, ktpts, mesh, Gv,
+                              out, outcore, basis_ks)
+
+
+def get_ace_support_vec(cell, C1_ks, mocc1_ks, k1pts, C2_ks=None, k2pts=None,
+                        out=None, mesh=None, Gv=None, exxdiv=None, method="cd",
+                        outcore=False, basis_ks=None):
+    """ Compute the ACE support vectors for orbitals given by C2_ks and the
+    corresponding k-points given by k2pts, using the Fock matrix obtained from
+    C1_ks, mocc1_ks, k1pts. If C2_ks and/or k2pts are not provided, their
+    values will be set to the C1_ks and/or k1pts. The results are saved to out
+    and returned.
+    """
+    from pyscf.pbc.pwscf.pseudo import get_support_vec
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+
+    if outcore:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        fswap = lib.H5TmpFile(swapfile.name)
+        dname0 = "W_ks"
+        W_ks = fswap.create_group(dname0)
+    else:
+        W_ks = None
+
+    W_ks = apply_k_sym(cell, C1_ks, mocc1_ks, k1pts, mesh, Gv,
+                       Ct_ks=C2_ks, ktpts=k2pts, exxdiv=exxdiv, out=W_ks,
+                       outcore=outcore, basis_ks=basis_ks)
+
+    if C2_ks is None: C2_ks = C1_ks
+    if k2pts is None: k2pts = k1pts
+    nk2pts = len(k2pts)
+
+    for k in range(nk2pts):
+        C_k = jk.get_kcomp(C2_ks, k)
+        W_k = jk.get_kcomp(W_ks, k)
+        W_k = get_support_vec(C_k, W_k, method=method)
+        jk.set_kcomp(W_k, out, k)
+        W_k = None
+
+    if outcore:
+        del fswap[dname0]
+
+    return out
+
+
+class KsymAdaptedPWJK(jk.PWJK):
+    """
+    Lattice symmetry-adapted PWJK module.
+    """
+    _ace_kpts = None
+
+    def __init__(self, cell, kpts, mesh=None, exxdiv=None, **kwargs):
+        if cell.space_group_symmetry and not cell.symmorphic:
+            raise NotImplementedError(
+                "Plane-wave calculation with k-point symmetry only "
+                "supports symmorphic symmetry operations"
+            )
+        super().__init__(cell, kpts, mesh=mesh, exxdiv=exxdiv, **kwargs)
+
+    def __init_exx(self):
+        if self.outcore:
+            self.swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+            self.fswap = lib.H5TmpFile(self.swapfile.name)
+            self.exx_W_ks = self.fswap.create_group("exx_W_ks")
+        else:
+            self.exx_W_ks = {}
+
+    def get_rho_R(self, C_ks, mocc_ks, mesh=None, Gv=None, ncomp=1):
+        if mesh is None: mesh = self.mesh
+        if Gv is None: Gv = self.get_Gv(mesh)
+        if ncomp == 1:
+            rho_R = get_rho_R_ksym(
+                C_ks, mocc_ks, mesh, self.kpts, basis_ks=self.basis_ks
+            )
+        else:
+            rho_R = 0.
+            for comp in range(ncomp):
+                C_ks_comp = jk.get_kcomp(C_ks, comp, load=False)
+                rho_R += get_rho_R_ksym(
+                    C_ks_comp, mocc_ks[comp], mesh, self.kpts,
+                    basis_ks=self.basis_ks
+                )
+            rho_R *= 1./ncomp
+        return rho_R
+
+    def get_vj_R_from_rho_R(self, rho_R, mesh=None, Gv=None):
+        if mesh is None: mesh = self.mesh
+        if Gv is None: Gv = self.get_Gv(mesh)
+        cell = self.cell
+        nkpts = self.kpts.nkpts
+        ngrids = Gv.shape[0]
+        fac = ngrids**2 / (cell.vol*nkpts)
+        vj_R = tools.ifft(tools.fft(rho_R, mesh) * tools.get_coulG(cell, Gv=Gv),
+                          mesh).real * fac
+        return vj_R
+
+    def update_k_support_vec(self, C_ks, mocc_ks, kpts, Ct_ks=None,
+                             mesh=None, Gv=None, exxdiv=None, comp=None):
+        """
+        kpts are the kpts in the bz, or those for which you want to calculate
+        the support vectors.
+        """
+        if self.exx_W_ks is None:
+            self.__init_exx()
+
+        if mesh is None:
+            mesh = self.mesh
+
+        if comp is None:
+            out = self.exx_W_ks
+        elif isinstance(comp, int):
+            keycomp = "%d" % comp
+            if keycomp not in self.exx_W_ks:
+                if self.outcore:
+                    self.exx_W_ks.create_group(keycomp)
+                else:
+                    self.exx_W_ks[keycomp] = {}
+            out = self.exx_W_ks[keycomp]
+        else:
+            raise RuntimeError("comp must be None or int")
+
+        if self.ace_exx:
+            self._ace_kpts = kpts
+            out = get_ace_support_vec(self.cell, C_ks, mocc_ks, self.kpts,
+                                      C2_ks=Ct_ks, k2pts=kpts, out=out,
+                                      mesh=mesh, Gv=Gv, exxdiv=exxdiv,
+                                      method="cd", outcore=self.outcore,
+                                      basis_ks=self.basis_ks)
+        else:   # store ifft of Co_ks
+            # TODO kpt_symm without ACE
+            raise NotImplementedError("kpt_symm only supports ACE for EXX")
+            """
+            if mesh is None: mesh = self.mesh
+            for k in range(nkpts):
+                occ = np.where(mocc_ks[k]>jk.THR_OCC)[0]
+                Co_k = jk.get_kcomp(C_ks, k, occ=occ)
+                jk.set_kcomp(tools.ifft(Co_k, mesh), out, k)
+            """
+
+    def apply_k_kpt(self, C_k, kpt, mesh=None, Gv=None, exxdiv=None, comp=None,
+                    basis=None):
+        if comp is None:
+            W_ks = self.exx_W_ks
+        elif isinstance(comp, int):
+            W_ks = jk.get_kcomp(self.exx_W_ks, comp, load=False)
+        else:
+            raise RuntimeError("comp must be None or int.")
+
+        if self.ace_exx:
+            if self._ace_kpts is None:
+                kpts_ibz = self.kpts.kpts_ibz
+            else:
+                kpts_ibz = self._ace_kpts
+            k = jk.member(kpt, kpts_ibz)[0]
+            W_k = jk.get_kcomp(W_ks, k)
+            return jk.apply_k_kpt_support_vec(C_k, W_k)
+        else:
+            # TODO kpt_symm without ACE
+            raise NotImplementedError("kpt_symm only supports ACE for EXX")
+            """
+            cell = self.cell
+            kpts = self.kpts
+            nkpts = len(kpts)
+            if mesh is None: mesh = self.mesh
+            if Gv is None: Gv = self.get_Gv(mesh)
+            if exxdiv is None: exxdiv = self.exxdiv
+            mocc_ks = [np.ones(jk.get_kcomp(W_ks, k, load=False).shape[0])*2
+                       for k in range(nkpts)]
+            return apply_k_kpt(cell, C_k, kpt, None, mocc_ks, kpts, mesh, Gv,
+                               C_ks_R=W_ks, exxdiv=exxdiv)
+            """
+
+
+def jksym(mf, with_jk=None, ace_exx=True, outcore=False, mesh=None,
+          basis_ks=None):
+    if with_jk is None:
+        with_jk = KsymAdaptedPWJK(mf.cell, mf.kpts_obj, exxdiv=mf.exxdiv,
+                                  mesh=mesh, basis_ks=basis_ks)
+        with_jk.ace_exx = ace_exx
+        with_jk.outcore = outcore
+
+    mf.with_jk = with_jk
+
+    return mf
+
+
+class KsymMixin:
+    """
+    This mixin can be inherited to make a PWKSCF object support
+    symmetry reduction of the k-points to the
+    irreducible Brillouin zone (IBZ).
+    """
+    def _set_madelung(self):
+        self._madelung = tools.pbc.madelung(self.cell, self.all_kpts)
+        self._etot_shift_ewald = -0.5*self._madelung*self.cell.nelectron
+
+    @property
+    def kpts(self):
+        return self._kpts.kpts_ibz
+    @property
+    def all_kpts(self):
+        return self._kpts.kpts
+    @property
+    def kpts_obj(self):
+        return self._kpts
+    @property
+    def weights(self):
+        return self._kpts.weights_ibz
+    @kpts.setter
+    def kpts(self, x):
+        if isinstance(x, np.ndarray):
+            kpts = libkpts.make_kpts(
+                self.cell,
+                kpts=np.reshape(x, (-1,3)),
+                space_group_symmetry=False,
+                time_reversal_symmetry=False,
+            )
+        elif isinstance(x, libkpts.KPoints):
+            kpts = x
+        else:
+            raise TypeError("Input kpts have wrong type: %s" % type(kpts))
+        self._kpts = kpts
+        # update madelung constant and energy shift for exxdiv
+        self._set_madelung()
+        if self._ecut_wf is None:
+            self._wf_mesh = None
+            self._xc_mesh = None
+            self._wf2xc = None
+            self._basis_data = None
+        else:
+            self.set_meshes()
+
+    def init_jk(self, with_jk=None, ace_exx=None):
+        if ace_exx is None: ace_exx = self.ace_exx
+        return jksym(self, with_jk=with_jk, ace_exx=ace_exx,
+                     outcore=self.outcore, mesh=self.wf_mesh,
+                     basis_ks=self._basis_data)
+
+    def get_init_guess_key(self, cell=None, kpts=None, basis=None, pseudo=None,
+                           nvir=None, key="hcore", out=None):
+        if cell is None: cell = self.cell
+        if kpts is None: kpts = self.kpts
+        if nvir is None: nvir = self.nvir
+
+        if key in ["h1e","hcore","cycle1","scf"]:
+            C_ks, mocc_ks = khf.get_init_guess(cell, kpts,
+                                               basis=basis, pseudo=pseudo,
+                                               nvir=nvir, key=key, out=out,
+                                               kpts_obj=self.kpts_obj,
+                                               mesh=self.wf_mesh)
+        else:
+            logger.warn(self, "Unknown init guess %s", key)
+            raise RuntimeError
+
+        if self._basis_data is not None:
+            for k, kpt in enumerate(self.kpts):
+                inds = self.get_basis_kpt(kpt).indexes
+                jk.set_kcomp(np.ascontiguousarray(C_ks[k][:, inds]), C_ks, k)
+
+        return C_ks, mocc_ks
+
+
+class KsymAdaptedPWKRHF(KsymMixin, khf.PWKRHF):
+    pass
+
+
+class KsymAdaptedPWKUHF(KsymMixin, kuhf.PWKUHF):
+    pass
+
+
+class KsymAdaptedPWKRKS(KsymMixin, krks.PWKRKS):
+    pass
+
+
+class KsymAdaptedPWKUKS(KsymMixin, kuks.PWKUKS):
+    pass
+
+
+if __name__ == "__main__":
+    from pyscf.pbc import gto
+    from pyscf.pbc.pwscf.khf import PWKRHF
+    import time
+
+    cell = gto.Cell(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        space_group_symmetry=True,
+        symmorphic=True,
+    )
+    cell.build()
+    cell.verbose = 6
+
+    kmesh = [2, 2, 2]
+    center = [0, 0, 0]
+    kpts = cell.make_kpts(kmesh)
+    skpts = cell.make_kpts(
+        kmesh,
+        scaled_center=center,
+        space_group_symmetry=True,
+        time_reversal_symmetry=True,
+    )
+
+    mf = PWKRHF(cell, kpts, ecut_wf=40)
+    mf.nvir = 4
+    t0 = time.monotonic()
+    mf.kernel()
+    t1 = time.monotonic()
+
+    mf2 = KsymAdaptedPWKRHF(cell, skpts, ecut_wf=20)
+    mf2.damp_type = "simple"
+    mf2.damp_factor = 0.7
+    mf2.nvir = 4
+    t2 = time.monotonic()
+    mf2.kernel()
+    t3 = time.monotonic()
+
+    print(mf.e_tot, mf2.e_tot)
+    mf.dump_scf_summary()
+    mf2.dump_scf_summary()
+    print("nkpts in BZ and IBZ", skpts.nkpts, skpts.nkpts_ibz)
+    print("Runtime without symmmetry", t1 - t0)
+    print("Runtime with symmetry", t3 - t2)
diff --git a/pyscf/pbc/pwscf/krks.py b/pyscf/pbc/pwscf/krks.py
new file mode 100644
index 000000000..6bc17ea43
--- /dev/null
+++ b/pyscf/pbc/pwscf/krks.py
@@ -0,0 +1,496 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" Spin-restricted Kohn-Sham DFT in plane-wave basis
+"""
+
+from pyscf.pbc.pwscf import khf
+from pyscf.pbc.dft import rks
+from pyscf.pbc import gto, tools
+from pyscf import __config__, lib
+from pyscf.lib import logger
+import numpy as np
+
+from pyscf.pbc.lib.kpts_helper import member
+
+
+def get_rho_for_xc(mf, xctype, C_ks, mocc_ks, mesh=None, Gv=None,
+                   out=None):
+    """
+    Get a density array from computing the xc potential, similar to
+    the pyscf.dft.numint module. For LDA, returns [rho].
+    For GGA, returns [rho, drho/dx, drho/dy, drho/dz]. For MGGA,
+    returns [rho, drho/dx, drho/dy, drho/dz, tau], with tau
+    being the kinetic energy density.
+    """
+    if mocc_ks[0][0].ndim == 0:
+        spin = 0
+    else:
+        assert mocc_ks[0][0].ndim == 1
+        spin = 1
+    cell = mf.cell
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if xctype == "LDA":
+        nrho = 1
+    elif xctype == "GGA":
+        nrho = 4
+    elif xctype == "MGGA":
+        nrho = 5
+    elif xctype is None:
+        nrho = 0
+    else:
+        raise ValueError(f"Unsupported xctype {xctype}")
+    if spin != 0:
+        nspin = len(C_ks)
+        assert nspin > 0
+    else:
+        nspin = 1
+        C_ks = [C_ks]
+        mocc_ks = [mocc_ks]
+    outshape = (nspin, nrho, np.prod(mesh))
+    rhovec_R = np.ndarray(outshape, buffer=out)
+    if nrho > 0:
+        for s in range(nspin):
+            rhovec_R[s, 0] = mf.with_jk.get_rho_R(
+                C_ks[s], mocc_ks[s], mesh=mesh, Gv=Gv
+            )
+    if nrho > 1:
+        for s in range(nspin):
+            rho_G = tools.fft(rhovec_R[s, 0], mesh)
+            for v in range(3):
+                drho_G = 1j * Gv[:, v] * rho_G
+                rhovec_R[s, v + 1] = tools.ifft(drho_G, mesh).real
+    if nrho > 4:
+        for s in range(nspin):
+            dC_ks = [np.empty_like(C_k) for C_k in C_ks[s]]
+            rhovec_R[s, 4] = 0
+            const = 1j * np.sqrt(0.5)
+            for v in range(3):
+                for k, C_k in enumerate(C_ks[s]):
+                    if mf.with_jk.basis_ks is None:
+                        ikgv = const * (mf.kpts[k][v] + Gv[:, v])
+                    else:
+                        ikgv = const * mf.with_jk.basis_ks[k].Gk[:, v]
+                    dC_ks[k][:] = ikgv * C_k
+                rhovec_R[s, 4] += mf.with_jk.get_rho_R(
+                    dC_ks, mocc_ks[s], mesh=mesh, Gv=Gv
+                )
+    if spin == 0:
+        rhovec_R = rhovec_R[0]
+    return rhovec_R
+
+
+def apply_vxc_kpt(mf, C_k, kpt, vxc_R, vtau_R=None, mesh=None, Gv=None,
+                  C_k_R=None, comp=None, basis=None):
+    """
+    Apply the XC potential to the bands C_k at a given kpt.
+    """
+    cell = mf.cell
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if comp is not None:
+        vxc_R = vxc_R[comp]
+        if vtau_R is not None:
+            vtau_R = vtau_R[comp]
+    apply_j_kpt = mf.with_jk.apply_j_kpt
+    Cbar_k = apply_j_kpt(C_k, mesh, vxc_R, C_k_R=C_k_R, basis=basis)
+    if vtau_R is not None:
+        const = 1j * np.sqrt(0.5)
+        dC_k = np.empty_like(C_k)
+        for v in range(3):
+            if mf.with_jk.basis_ks is None:
+                ikgv = const * (kpt[v] + Gv[:, v])
+            else:
+                ikgv = const * basis.Gk[:, v]
+            dC_k[:] = ikgv * C_k
+            dC_k[:] = apply_j_kpt(dC_k, mesh, vtau_R, basis=basis)
+            Cbar_k[:] += ikgv.conj() * dC_k
+    return Cbar_k
+
+
+def eval_xc(mf, xc_code, rhovec_R, xctype):
+    if rhovec_R.ndim == 2:
+        spin = 0
+    else:
+        assert rhovec_R.ndim == 3
+        spin = 1
+    exc_R, vxcvec_R = mf._numint.eval_xc_eff(xc_code, rhovec_R, deriv=1,
+                                             xctype=xctype)[:2]
+    dv = mf.cell.vol / exc_R.size
+    if spin == 0:
+        vxcvec_R = vxcvec_R[None, ...]
+        rho_R = rhovec_R[0]
+        rhovec_R = rhovec_R.view()[None, ...]
+    else:
+        rho_R = rhovec_R[:, 0].sum(0)
+    exc = dv * exc_R.dot(rho_R)
+    return exc, vxcvec_R
+
+
+def vxc_from_vxcvec(rhovec_R, vxcvec_R, xctype, mesh, Gv, dv):
+    """
+    Takes the vxcvec_R (containg the XC energy functional derivative
+    with respect to rho, drho/dx, drho/dy, drho/dz, tau) and
+    converts it to vxc_R (dexc/drho) and vtau_R (dexc/dtau).
+    vtau_R is None for non-MGGA functionals.
+    """
+    nspin = vxcvec_R.shape[0]
+    vxc_R = vxcvec_R[:, 0].copy()
+    if rhovec_R.ndim == 2:
+        rhovec_R = rhovec_R[None, :, :]
+    vxcdot = 0
+    for s in range(nspin):
+        if xctype in ["GGA", "MGGA"]:
+            vrho_G = 0
+            for v in range(3):
+                vdrho_G = tools.fft(vxcvec_R[s, v + 1], mesh)
+                vrho_G += -1j * Gv[:, v] * vdrho_G
+            vxc_R[s, :] += tools.ifft(vrho_G, mesh).real
+        vxcdot += vxc_R[s].dot(rhovec_R[s, 0])
+    if xctype == "MGGA":
+        vtau_R = vxcvec_R[:, 4].copy()
+        for s in range(nspin):
+            vxcdot += vtau_R[s].dot(rhovec_R[s, 4])
+    else:
+        vtau_R = None
+    return vxcdot * dv, vxc_R, vtau_R
+
+
+def apply_veff_kpt(mf, C_k, kpt, mocc_ks, kpts, mesh, Gv, vj_R, with_jk,
+                   exxdiv, C_k_R=None, comp=None, ret_E=False):
+    r""" Apply non-local part of the Fock opeartor to orbitals at given
+    k-point. The non-local part includes the exact exchange.
+    Also apply the semilocal XC part to the orbitals.
+    """
+    log = logger.Logger(mf.stdout, mf.verbose)
+
+    if mocc_ks is None:
+        mocc_k = 2
+    else:
+        k = member(kpt, mf.kpts)[0]
+        mocc_k = mocc_ks[k][:C_k.shape[0]]
+    Cto_k = C_k.conj() * mocc_k[:, None]
+
+    tspans = np.zeros((3,2))
+    es = np.zeros(3, dtype=np.complex128)
+    ni = mf._numint
+    omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mf.cell.spin)
+    if omega != 0:
+        # TODO range-separated hybrid functionals
+        raise NotImplementedError(
+            "Range-separated hybrids not implemented for PW mode"
+        )
+
+    basis = mf.get_basis_kpt(kpt)
+
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tmp = with_jk.apply_j_kpt(C_k, mesh, vj_R, C_k_R=C_k_R, basis=basis)
+    Cbar_k = tmp
+    es[0] = np.einsum("ig,ig->", Cto_k, tmp) * 0.5
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[0] = np.asarray(tock - tick).reshape(1,2)
+
+    if ni.libxc.is_hybrid_xc(mf.xc):
+        tmp = -hyb * with_jk.apply_k_kpt(C_k, kpt, mesh=mesh, Gv=Gv, exxdiv=exxdiv,
+                                         comp=comp, basis=basis)
+        if comp is None:
+            tmp *= 0.5
+        Cbar_k += tmp
+        es[1] = 0.5 * np.einsum("ig,ig->", Cto_k, tmp)
+    else:
+        es[1] = 0.0
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[1] = np.asarray(tick - tock).reshape(1,2)
+
+    tmp = mf.apply_vxc_kpt(C_k, kpt, vxc_R=vj_R.vxc_R, mesh=mesh, Gv=Gv,
+                           C_k_R=C_k_R, vtau_R=vj_R.vtau_R, comp=comp,
+                           basis=basis)
+    Cbar_k += tmp
+    es[2] = vj_R.exc
+    if comp is not None:
+        es[2] *= 0.5
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    tspans[2] = np.asarray(tock - tick).reshape(1,2)
+
+    for ie_comp,e_comp in enumerate(mf.scf_summary["e_comp_name_lst"][-3:]):
+        key = "t-%s" % e_comp
+        if key not in mf.scf_summary:
+            mf.scf_summary[key] = np.zeros(2)
+        mf.scf_summary[key] += tspans[ie_comp]
+
+    if ret_E:
+        if (np.abs(es.imag) > 1e-6).any():
+            e_comp = mf.scf_summary["e_comp_name_lst"][-2:]
+            icomps = np.where(np.abs(es.imag) > 1e-6)[0]
+            log.warn("Energy has large imaginary part:" +
+                     "%s : %s\n" * len(icomps),
+                     *[s for i in icomps for s in [e_comp[i],es[i]]])
+        es = es.real
+        return Cbar_k, es
+    else:
+        return Cbar_k
+
+
+class PWKohnShamDFT(rks.KohnShamDFT):
+    """
+    Kohn-Sham DFT in a plane-wave basis.
+    """
+    def __init__(self, xc='LDA,VWN'):
+        rks.KohnShamDFT.__init__(self, xc)
+        self.scf_summary["e_comp_name_lst"].append("xc")
+
+    get_rho_for_xc = get_rho_for_xc
+    apply_vxc_kpt = apply_vxc_kpt
+    eval_xc = eval_xc
+    apply_veff_kpt = apply_veff_kpt
+
+    @property
+    def etot_shift_ewald(self):
+        ni = self._numint
+        omega, alpha, hyb = ni.rsh_and_hybrid_coeff(
+            self.xc, spin=self.cell.spin
+        )
+        if omega != 0:
+            # TODO range-separated hybrid functionals
+            raise NotImplementedError
+        return hyb * self._etot_shift_ewald
+
+    @property
+    def madelung(self):
+        ni = self._numint
+        omega, alpha, hyb = ni.rsh_and_hybrid_coeff(
+            self.xc, spin=self.cell.spin
+        )
+        if omega != 0:
+            # TODO range-separated hybrid functionals
+            raise NotImplementedError
+        return hyb * self._madelung
+
+    def nuc_grad_method(self):
+        raise NotImplementedError
+
+    def get_vj_R_from_rho_R(self, *args, **kwargs):
+        # unneeded
+        raise NotImplementedError
+
+    def coarse_to_dense_grid(self, func_xR, out_xr=None):
+        """
+        Use FFT's to transfer func_xR from a coarse grid
+        (specifically, self.wf_mesh) to a dense grid
+        (specifically, self.xc_mesh).
+        """
+        # TODO use real FFTs here since the real-space density is real
+        xshape = func_xR.shape[:-1]
+        small_size = np.prod(self.wf_mesh)
+        big_size = np.prod(self.xc_mesh)
+        ratio = big_size / small_size
+        func_xR = func_xR.view()
+        func_xR.shape = (-1, small_size)
+        rhovec_G = tools.fft(func_xR, self.wf_mesh)
+        dense_size = np.prod(self.xc_mesh)
+        if func_xR.ndim == 1:
+            shape = (dense_size,)
+        else:
+            nrho = func_xR.shape[0]
+            shape = (nrho, dense_size)
+        rhovec_g = np.zeros(shape, dtype=np.complex128)
+        rhovec_g[..., self._wf2xc] = rhovec_G
+        if out_xr is None:
+            rhovec_r = tools.ifft(rhovec_g, self.xc_mesh).real
+        else:
+            rhovec_r = out_xr
+            rhovec_r[:] = tools.ifft(rhovec_g, self.xc_mesh).real
+        rhovec_r[:] *= ratio
+        rhovec_r.shape = xshape + (big_size,)
+        return rhovec_r
+
+    def dense_to_coarse_grid(self, func_xr, out_xR=None):
+        """
+        Use FFT's to transfer func_xr from a dense grid
+        (specifically, self.xc_mesh) to a coarse grid
+        (specifically, self.wf_mesh).
+        """
+        # TODO use real FFTs here since the real-space density is real
+        ratio = np.prod(self.xc_mesh) / np.prod(self.wf_mesh)
+        invr = 1 / ratio
+        vxcvec_g = tools.fft(func_xr, self.xc_mesh) * invr
+        vxcvec_G = np.asarray(vxcvec_g[:, self._wf2xc], order="C")
+        if out_xR is None:
+            out_xR = tools.ifft(vxcvec_G, self.wf_mesh).real
+        else:
+            out_xR[:] = tools.ifft(vxcvec_G, self.wf_mesh).real
+        return out_xR
+
+    def get_vj_R(self, C_ks, mocc_ks, mesh=None, Gv=None, save_rho=False):
+        """
+        As with the Hartree-Fock version, this routine computes the Coulomb
+        potential vj_R and returns it. It also computes the XC potential
+        and tags vj_R with four quantities used  in the DFT SCF cycle:
+            exc: The XC energy
+            vxcdot:
+                The integral of the XC potential multiplied by the density
+                (and the XC kinetic potential multiplied by the kinetic
+                energy density, for MGGAs). This is needed if the total
+                energy is computed from the orbital eigenvalues.
+            vxc_R: The XC potential in realspace, dexc/drho.
+            vtau_R:
+                The XC kinetic potential in realspace, dexc/dtau.
+                This is None of the functional is not a MGGA.
+        """
+        # Override get_vj_R to include XC potential
+        cell = self.cell
+        if mesh is None: mesh = self.wf_mesh
+        if Gv is None: Gv = cell.get_Gv(mesh)
+        ng = np.prod(mesh)
+        dv = self.cell.vol / ng
+        xctype = self._numint._xc_type(self.xc)
+        rhovec_R = self.get_rho_for_xc(xctype, C_ks, mocc_ks, mesh, Gv)
+        if rhovec_R.ndim == 2:
+            # non-spin-polarized
+            spinfac = 1
+            rho_R = rhovec_R[0]
+            nkpts = len(C_ks)
+        else:
+            # spin-polarized
+            spinfac = 1
+            rho_R = rhovec_R[:, 0].sum(0)
+            nkpts = len(C_ks[0])
+        if self.kpts_obj is not None:
+            nkpts = self.kpts_obj.nkpts
+        vj_R = self.with_jk.get_vj_R_from_rho_R(rho_R, mesh=mesh, Gv=Gv)
+        rhovec_R[:] *= (spinfac / nkpts) * ng / dv
+        if save_rho:
+            self._rhovec_R = rhovec_R
+        if (self.wf_mesh == self.xc_mesh).all():
+            # xc integration is on the same mesh as density generation
+            exc, vxcvec_R = self.eval_xc(
+                self.xc, rhovec_R, xctype
+            )
+            if hasattr(self, "_deda_r") and self._deda_r is not None:
+                vxcvec_R[:] += self._deda_r * self._damix_r
+        else:
+            # xc integration is on a denser mesh than density generation
+            rhovec_r = self.coarse_to_dense_grid(rhovec_R)
+            exc, vxcvec_r = self.eval_xc(
+                self.xc, rhovec_r, xctype
+            )
+            if hasattr(self, "_deda_r") and self._deda_r is not None:
+                vxcvec_r[:] += self._deda_r * self._damix_r
+            vxcvec_R = np.empty_like(rhovec_R)
+            if vxcvec_R.ndim == 2:
+                vxcvec_R = vxcvec_R[None, ...]
+            for s in range(vxcvec_r.shape[0]):
+                self.dense_to_coarse_grid(vxcvec_r[s], vxcvec_R[s])
+                #vxcvec_g = tools.fft(vxcvec_r[s], self.xc_mesh) * invr
+                #vxcvec_G = np.asarray(vxcvec_g[:, self._wf2xc], order="C")
+                #vxcvec_R[s] = tools.ifft(vxcvec_G, self.wf_mesh).real
+        vxcdot, vxc_R, vtau_R = vxc_from_vxcvec(
+            rhovec_R, vxcvec_R, xctype, mesh, Gv, dv
+        )
+        vj_R = lib.tag_array(
+            vj_R, exc=exc, vxcdot=vxcdot, vxc_R=vxc_R, vtau_R=vtau_R
+        )
+        return vj_R
+
+    def _get_xcdiff(self, vj_R):
+        return vj_R.exc - 0.5 * vj_R.vxcdot
+
+    to_gpu = lib.to_gpu
+
+
+class PWKRKS(PWKohnShamDFT, khf.PWKRHF):
+    """
+    Restricted Kohn-Sham DFT in a plane-wave basis.
+    """
+    def __init__(self, cell, kpts=np.zeros((1,3)), xc='LDA,VWN',
+                 ecut_wf=None, ecut_rho=None,
+                 exxdiv=getattr(__config__, 'pbc_scf_SCF_exxdiv', 'ewald')):
+        """
+        See PWKSCF for input options.
+        """
+        khf.PWKRHF.__init__(self, cell, kpts, ecut_wf=ecut_wf,
+                            ecut_rho=ecut_rho, exxdiv=exxdiv)
+        PWKohnShamDFT.__init__(self, xc)
+
+    def dump_flags(self, verbose=None):
+        khf.PWKRHF.dump_flags(self)
+        PWKohnShamDFT.dump_flags(self, verbose)
+        return self
+
+    def to_hf(self):
+        out = self._transfer_attrs_(khf.PWKRHF(self.cell, self.kpts))
+        # TODO might need to setup up ACE here if xc is not hybrid
+        return out
+
+    def get_mo_energy(self, C_ks, mocc_ks, mesh=None, Gv=None, exxdiv=None,
+                      vj_R=None, comp=None, ret_mocc=True, full_ham=False):
+        if vj_R is None: vj_R = self.get_vj_R(C_ks, mocc_ks)
+        res = khf.PWKRHF.get_mo_energy(self, C_ks, mocc_ks, mesh=mesh, Gv=Gv,
+                                       exxdiv=exxdiv, vj_R=vj_R, comp=comp,
+                                       ret_mocc=ret_mocc, full_ham=full_ham)
+        if ret_mocc:
+            moe_ks = res[0]
+        else:
+            moe_ks = res
+        moe_ks[0] = lib.tag_array(moe_ks[0], xcdiff=self._get_xcdiff(vj_R))
+        return res
+
+    def energy_elec(self, C_ks, mocc_ks, mesh=None, Gv=None, moe_ks=None,
+                    vj_R=None, exxdiv=None):
+        if moe_ks is not None:
+            # Need xcdiff to compute energy from moe_ks
+            if vj_R is None and not hasattr(moe_ks[0], "xcdiff"):
+                moe_ks = None
+        e_scf = khf.PWKRHF.energy_elec(self, C_ks, mocc_ks, moe_ks=moe_ks,
+                                       mesh=mesh, Gv=Gv, vj_R=vj_R,
+                                       exxdiv=exxdiv)
+        # When energy is computed from the orbitals, we need to account for
+        # the different between \int vxc rho and \int exc rho.
+        if moe_ks is not None:
+            e_scf += moe_ks[0].xcdiff
+        return e_scf
+
+    def update_k(self, C_ks, mocc_ks):
+        ni = self._numint
+        if ni.libxc.is_hybrid_xc(self.xc):
+            super().update_k(C_ks, mocc_ks)
+        elif "t-ace" not in self.scf_summary:
+            self.scf_summary["t-ace"] = np.zeros(2)
+
+
+if __name__ == "__main__":
+    cell = gto.Cell(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+    )
+    cell.build()
+    cell.verbose = 6
+
+    kmesh = [2, 2, 2]
+    kpts = cell.make_kpts(kmesh)
+    mf = PWKRKS(cell, kpts, xc="PBE", ecut_wf=20)
+    mf.nvir = 4  # converge first 4 virtual bands
+    mf.kernel()
+    mf.dump_scf_summary()
diff --git a/pyscf/pbc/pwscf/kuhf.py b/pyscf/pbc/pwscf/kuhf.py
new file mode 100644
index 000000000..43152e00f
--- /dev/null
+++ b/pyscf/pbc/pwscf/kuhf.py
@@ -0,0 +1,532 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+
+""" Spin-unrestricted Hartree-Fock in the Plane Wave Basis
+"""
+
+
+import h5py
+import copy
+import numpy as np
+
+from pyscf.pbc import gto, scf
+from pyscf.pbc.pwscf import khf, pw_helper
+from pyscf.pbc.pwscf.pw_helper import get_kcomp, set_kcomp
+from pyscf.lib import logger
+from pyscf import __config__
+
+
+def get_spin_component(C_ks, s):
+    return get_kcomp(C_ks, s, load=False)
+
+
+def get_nband(mf, nbandv, nbandv_extra):
+    cell = mf.cell
+    if isinstance(nbandv, int): nbandv = [nbandv] * 2
+    if isinstance(nbandv_extra, int): nbandv_extra = [nbandv_extra] * 2
+    nbando = cell.nelec
+    nbandv_tot = [nbandv[s] + nbandv_extra[s] for s in [0,1]]
+    nband = [nbando[s] + nbandv[s] for s in [0,1]]
+    nband_tot = [nbando[s] + nbandv_tot[s] for s in [0,1]]
+
+    return nbando, nbandv_tot, nband, nband_tot
+
+
+def dump_moe(mf, moe_ks, mocc_ks, nband=None, trigger_level=logger.DEBUG):
+    if nband is None: nband = [None,None]
+    if isinstance(nband, int): nband = [nband,nband]
+    for s in [0,1]:
+        khf.dump_moe(mf, moe_ks[s], mocc_ks[s],
+                     nband=nband[s], trigger_level=trigger_level)
+
+
+def get_mo_energy(mf, C_ks, mocc_ks, mesh=None, Gv=None, exxdiv=None,
+                  vj_R=None, ret_mocc=True, full_ham=False):
+    cell = mf.cell
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if exxdiv is None: exxdiv = mf.exxdiv
+
+    moe_ks = [None] * 2
+    for s in [0,1]:
+        C_ks_s = get_spin_component(C_ks, s)
+        moe_ks[s] = khf.get_mo_energy(mf, C_ks_s, mocc_ks[s],
+                                      mesh=mesh, Gv=Gv, exxdiv="none",
+                                      vj_R=vj_R, comp=s,
+                                      ret_mocc=False, full_ham=full_ham)
+
+    if full_ham:
+        return moe_ks
+
+    # determine mo occ and apply ewald shift if requested
+    mocc_ks = mf.get_mo_occ(moe_ks)
+    if exxdiv is None: exxdiv = mf.exxdiv
+    if exxdiv == "ewald":
+        nkpts = len(mf.kpts)
+        for s in [0,1]:
+            for k in range(nkpts):
+                moe_ks[s][k][mocc_ks[s][k] > khf.THR_OCC] -= mf.madelung
+
+    if ret_mocc:
+        return moe_ks, mocc_ks
+    else:
+        return moe_ks
+
+
+def get_mo_occ(cell, moe_ks=None, C_ks=None):
+    mocc_ks = [None] * 2
+    for s in [0,1]:
+        nocc = cell.nelec[s]
+        if moe_ks is not None:
+            mocc_ks[s] = khf.get_mo_occ(cell, moe_ks[s], nocc=nocc)
+        elif C_ks is not None:
+            C_ks_s = get_spin_component(C_ks, s)
+            mocc_ks[s] = khf.get_mo_occ(cell, C_ks=C_ks_s, nocc=nocc)
+        else:
+            raise RuntimeError
+        for k in range(len(mocc_ks[s])):
+            mocc_ks[s][k] *= 0.5
+
+    return mocc_ks
+
+
+def get_init_guess(cell0, kpts, basis=None, pseudo=None, nvir=0,
+                   key="hcore", out=None, kpts_obj=None, mesh=None,
+                   xc=None):
+    """
+    Args:
+        nvir (int):
+            Number of virtual bands to be evaluated. Default is zero.
+        out (h5py group):
+            If provided, the orbitals are written to it.
+    """
+
+    log = logger.Logger(cell0.stdout, cell0.verbose)
+
+    nkpts = len(kpts)
+    if out is None:
+        out = [[None]*nkpts, [None]*nkpts]
+    else:
+        for s in [0,1]:
+            if "%d"%s in out: del out["%d"%s]
+            out.create_group("%d"%s)
+
+    if basis is None: basis = cell0.basis
+    if pseudo is None: pseudo = cell0.pseudo
+    cell = cell0.copy()
+    if cell.__class__ != gto.Cell:
+        cell.__class__ = gto.Cell
+        cell.pseudo = None
+        cell._pseudo = None
+    cell.basis = basis
+    if len(cell._ecp) > 0 or pseudo == "SG15":  # use GTH to avoid the slow init time of ECP
+        gth_pseudo = {}
+        for iatm in range(cell0.natm):
+            atm = cell0.atom_symbol(iatm)
+            if atm in gth_pseudo:
+                continue
+            q = cell0.atom_charge(iatm)
+            if q == 0:  # Ghost atom
+                continue
+            else:
+                gth_pseudo[atm] = "gth-pade-q%d"%q
+        log.debug("Using the GTH-PP for init guess: %s", gth_pseudo)
+        cell.pseudo = gth_pseudo
+        cell.ecp = {}
+        cell._ecp = {}
+        cell._ecpbas = []
+    else:
+        cell.pseudo = pseudo
+    cell.ke_cutoff = cell0.ke_cutoff
+    cell.verbose = 0
+    cell.build()
+
+    log.info("generating init guess using %s basis", cell.basis)
+
+    if kpts_obj is None:
+        kpts_obj = kpts
+    if xc is None:
+        if len(kpts) < 30:
+            pmf = scf.KUHF(cell, kpts_obj)
+        else:
+            pmf = scf.KUHF(cell, kpts_obj).density_fit()
+    else:
+        if len(kpts) < 30:
+            pmf = scf.KUKS(cell, kpts_obj, xc=xc)
+        else:
+            pmf = scf.KUKS(cell, kpts_obj, xc=xc).density_fit()
+
+    if key.lower() == "cycle1":
+        pmf.max_cycle = 0
+        pmf.kernel()
+        mo_coeff = pmf.mo_coeff
+        mo_occ = pmf.mo_occ
+    elif key.lower() in ["hcore", "h1e"]:
+        h1e = pmf.get_hcore()
+        h1e = [h1e, h1e]
+        s1e = pmf.get_ovlp()
+        mo_energy, mo_coeff = pmf.eig(h1e, s1e)
+        mo_occ = pmf.get_occ(mo_energy, mo_coeff)
+    elif key.lower() == "scf":
+        pmf.kernel()
+        mo_coeff = pmf.mo_coeff
+        mo_occ = pmf.mo_occ
+    else:
+        raise NotImplementedError("Init guess %s not implemented" % key)
+
+    log.debug1("converting init MOs from GTO basis to PW basis")
+
+    # TODO: support specifying nvir for each kpt (useful for e.g., metals)
+    if isinstance(nvir, int): nvir = [nvir,nvir]
+
+    mocc_ks_spin = [None] * 2
+    for s in [0,1]:
+        nocc = cell.nelec[s]
+        nmo_ks = [len(mo_occ[s][k]) for k in range(nkpts)]
+        ntot = nocc + nvir[s]
+        ntot_ks = [min(ntot,nmo_ks[k]) for k in range(nkpts)]
+
+        C_ks = get_spin_component(out, s)
+        pw_helper.get_C_ks_G(cell, kpts, mo_coeff[s], ntot_ks, out=C_ks,
+                             verbose=cell0.verbose, mesh=mesh)
+        mocc_ks = [mo_occ[s][k][:ntot_ks[k]] for k in range(nkpts)]
+
+        C_ks = khf.orth_mo(cell0, C_ks, mocc_ks)
+
+        C_ks, mocc_ks = khf.add_random_mo(cell0, [ntot]*nkpts, C_ks, mocc_ks)
+
+        mocc_ks_spin[s] = mocc_ks
+
+    return out, mocc_ks_spin
+
+
+def init_guess_by_chkfile(cell, chkfile_name, nvir, project=None, out=None,
+                          basis_ks=None):
+    if isinstance(nvir, int): nvir = [nvir] * 2
+
+    from pyscf.pbc.scf import chkfile
+    scf_dict = chkfile.load_scf(chkfile_name)[1]
+    mocc_ks = scf_dict["mo_occ"]
+    nkpts = len(mocc_ks[0])
+    if out is None: out = [[None] * nkpts for s in [0,1]]
+    if isinstance(out, h5py.Group):
+        for s in [0,1]:
+            key = "%d"%s
+            if key in out: del out[key]
+            out.create_group(key)
+    C_ks = out
+    for s in [0,1]:
+        ntot_ks = [None] * nkpts
+        C_ks_s = get_spin_component(C_ks, s)
+        with h5py.File(chkfile_name, "r") as f:
+            C0_ks_s = f["mo_coeff/%d"%s]
+            for k in range(nkpts):
+                set_kcomp(get_kcomp(C0_ks_s, k), C_ks_s, k)
+        for k in range(nkpts):
+            nocc = np.sum(mocc_ks[s][k]>khf.THR_OCC)
+            ntot_ks[k] = max(nocc+nvir[s], len(mocc_ks[s][k]))
+
+        C_ks_s, mocc_ks[s] = khf.init_guess_from_C0(cell, C_ks_s, ntot_ks,
+                                                    out=C_ks_s,
+                                                    mocc_ks=mocc_ks[s],
+                                                    basis_ks=basis_ks)
+
+    return C_ks, mocc_ks
+
+
+def update_pp(mf, C_ks):
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    if "t-ppnl" not in mf.scf_summary:
+        mf.scf_summary["t-ppnl"] = np.zeros(2)
+
+    mf.with_pp.update_vppnloc_support_vec(C_ks, ncomp=2, basis_ks=mf._basis_data)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    mf.scf_summary["t-ppnl"] += tock - tick
+
+
+def update_k(mf, C_ks, mocc_ks):
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+    if "t-ace" not in mf.scf_summary:
+        mf.scf_summary["t-ace"] = np.zeros(2)
+
+    for s in [0,1]:
+        C_ks_s = get_kcomp(C_ks, s, load=False)
+        mf.with_jk.update_k_support_vec(C_ks_s, mocc_ks[s], mf.kpts, comp=s, Ct_ks=C_ks_s)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    mf.scf_summary["t-ace"] += tock - tick
+
+
+def eig_subspace(mf, C_ks, mocc_ks, mesh=None, Gv=None, vj_R=None, exxdiv=None,
+                 comp=None):
+    if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+    moe_ks = [None] * 2
+    for s in [0,1]:
+        C_ks_s = get_spin_component(C_ks, s)
+        mocc_ks_s = mocc_ks[s]
+        C_ks_s, moe_ks[s], mocc_ks[s] = khf.eig_subspace(mf, C_ks_s, mocc_ks_s,
+                                                         mesh=mesh, Gv=Gv,
+                                                         vj_R=vj_R,
+                                                         exxdiv="none", comp=s)
+        if isinstance(C_ks, list): C_ks[s] = C_ks_s
+
+    # determine mo occ and apply ewald shift if requested
+    mocc_ks = mf.get_mo_occ(moe_ks)
+    if exxdiv is None: exxdiv = mf.exxdiv
+    if exxdiv == "ewald":
+        nkpts = len(mf.kpts)
+        for s in [0,1]:
+            for k in range(nkpts):
+                moe_ks[s][k][mocc_ks[s][k] > khf.THR_OCC] -= mf.madelung
+
+    return C_ks, moe_ks, mocc_ks
+
+
+def energy_elec(mf, C_ks, mocc_ks, mesh=None, Gv=None, moe_ks=None,
+                vj_R=None, exxdiv=None):
+    cell = mf.cell
+    if mesh is None: mesh = mf.wf_mesh
+    if Gv is None: Gv = cell.get_Gv(mesh)
+    if exxdiv is None: exxdiv = mf.exxdiv
+
+    kpts = mf.kpts
+    nkpts = len(kpts)
+
+    wts = mf.weights
+    e_ks = np.zeros(nkpts)
+    if moe_ks is None:
+        if vj_R is None: vj_R = mf.get_vj_R(C_ks, mocc_ks)
+        e_comp = 0  # np.zeros(5)
+        for s in [0,1]:
+            C_ks_s = get_spin_component(C_ks, s)
+            for k in range(nkpts):
+                kpt = kpts[k]
+                occ = np.where(mocc_ks[s][k] > khf.THR_OCC)[0]
+                Co_k = get_kcomp(C_ks_s, k, occ=occ)
+                e_comp_k = mf.apply_Fock_kpt(Co_k, kpt, mocc_ks[s], mesh, Gv,
+                                             vj_R, exxdiv, comp=s,
+                                             ret_E=True)[1]
+                e_ks[k] += np.sum(e_comp_k)
+                e_comp += e_comp_k * wts[k]
+        # e_comp /= nkpts
+
+        if exxdiv == "ewald":
+            e_comp[mf.scf_summary["e_comp_name_lst"].index("ex")] += \
+                                                        mf.etot_shift_ewald
+
+        for comp,e in zip(mf.scf_summary["e_comp_name_lst"],e_comp):
+            mf.scf_summary[comp] = e
+    else:
+        for s in [0, 1]:
+            C_ks_s = get_spin_component(C_ks, s)
+            for k in range(nkpts):
+                kpt = kpts[k]
+                occ = np.where(mocc_ks[s][k] > khf.THR_OCC)[0]
+                mocc_k = 0.5 * mocc_ks[s][k][occ]
+                Co_k = get_kcomp(C_ks_s, k, occ=occ)
+                e1_comp = mf.apply_hcore_kpt(Co_k, kpt, mesh, Gv, mf.with_pp,
+                                             comp=s, ret_E=True,
+                                             mocc_ks=mocc_k)[1]
+                e_ks[k] += np.sum(e1_comp)
+                e_ks[k] += np.sum(moe_ks[s][k][occ] * mocc_k)
+    e_scf = np.dot(e_ks, wts)
+
+    if moe_ks is None and exxdiv == "ewald":
+        # Note: ewald correction is not needed if e_tot is computed from moe_ks
+        # since the correction is already in the mo energy
+        e_scf += mf.etot_shift_ewald
+
+    return e_scf
+
+
+def converge_band(mf, C_ks, mocc_ks, kpts, Cout_ks=None,
+                  mesh=None, Gv=None,
+                  vj_R=None,
+                  conv_tol_davidson=1e-6,
+                  max_cycle_davidson=100,
+                  verbose_davidson=0):
+
+    nkpts = len(kpts)
+
+    conv_ks = [None] * 2
+    moeout_ks = [None] * 2
+    fc_ks = [None] * 2
+    if isinstance(C_ks, list):
+        if Cout_ks is None: Cout_ks = [None] * 2
+    else:
+        Cout_ks = C_ks
+    for s in [0,1]:
+        C_ks_s = get_spin_component(C_ks, s)
+        conv_ks[s], moeout_ks[s], Cout_ks_s, fc_ks[s] = khf.converge_band(
+                            mf, C_ks_s, mocc_ks, kpts, mesh=mesh, Gv=Gv,
+                            vj_R=vj_R, comp=s,
+                            conv_tol_davidson=conv_tol_davidson,
+                            max_cycle_davidson=max_cycle_davidson,
+                            verbose_davidson=verbose_davidson)
+
+        if isinstance(C_ks, list): Cout_ks[s] = Cout_ks_s
+
+    fc_ks = [fc_ks[0][k]+fc_ks[1][k] for k in range(nkpts)]
+
+    return conv_ks, moeout_ks, Cout_ks, fc_ks
+
+
+class PWKUHF(khf.PWKSCF):
+    """
+    Unrestricted Hartree-Fock in a plane-wave basis.
+    """
+    def __init__(self, cell, kpts=np.zeros((1,3)),
+                 ecut_wf=None, ecut_rho=None,
+                 exxdiv=getattr(__config__, 'pbc_scf_PWKUHF_exxdiv', 'ewald')):
+        """
+        See PWKSCF for input options.
+        """
+        khf.PWKSCF.__init__(self, cell, kpts, ecut_wf=ecut_wf,
+                            ecut_rho=ecut_rho, exxdiv=exxdiv)
+        self.nvir = [0,0]
+        self.nvir_extra = [1,1]
+        self._nelec = None
+
+    def get_init_guess_key(self, cell=None, kpts=None, basis=None, pseudo=None,
+                           nvir=None, key="hcore", out=None):
+        if cell is None: cell = self.cell
+        if kpts is None: kpts = self.kpts
+        if nvir is None: nvir = self.nvir
+
+        if key in ["h1e","hcore","cycle1","scf"]:
+            if hasattr(self, "xc"):
+                # This is DFT, use fast initial guess
+                xc = "LDA,VWN"
+            else:
+                xc = None
+            C_ks, mocc_ks = get_init_guess(cell, kpts,
+                                           basis=basis, pseudo=pseudo,
+                                           nvir=nvir, key=key, out=out,
+                                           mesh=self.wf_mesh, xc=xc)
+        else:
+            logger.warn(self, "Unknown init guess %s", key)
+            raise RuntimeError
+
+        if self._basis_data is not None:
+            Cspin_ks = C_ks
+            for C_ks in Cspin_ks:
+                for k, kpt in enumerate(self.kpts):
+                    inds = self.get_basis_kpt(kpt).indexes
+                    set_kcomp(np.ascontiguousarray(C_ks[k][:, inds]), C_ks, k)
+            C_ks = Cspin_ks
+
+        return C_ks, mocc_ks
+
+    def get_init_guess_C0(self, C0_ks, nvir=None, out=None):
+        if nvir is None: nvir = self.nvir
+        if isinstance(nvir, int): nvir = [nvir,nvir]
+        nocc = self.cell.nelec
+        nkpts = len(self.kpts)
+        if out is None:
+            out = [[None]*nkpts, [None]*nkpts]
+        elif isinstance(out, h5py.Group):
+            for s in [0,1]:
+                if "%d"%s in out: del out["%d"%s]
+                out.create_group("%d"%s)
+        C_ks = out
+        mocc_ks = [None] * 2
+        for s in [0,1]:
+            ntot_ks = [nocc[s]+nvir[s]] * len(self.kpts)
+            C_ks_s = get_spin_component(C_ks, s)
+            C0_ks_s = get_spin_component(C0_ks, s)
+            n0_ks = [get_kcomp(C0_ks_s, k, load=False).shape[0]
+                     for k in range(nkpts)]
+            mocc_ks[s] = [np.asarray([1 if i < nocc[s] else 0
+                          for i in range(n0_ks[k])]) for k in range(nkpts)]
+            C_ks_s, mocc_ks[s] = khf.init_guess_from_C0(self.cell, C0_ks_s,
+                                                        ntot_ks, out=C_ks_s,
+                                                        mocc_ks=mocc_ks[s],
+                                                        basis_ks=self._basis_data)
+
+        return C_ks, mocc_ks
+
+    def init_guess_by_chkfile(self, chk=None, nvir=None, project=True,
+                              out=None):
+        if chk is None: chk = self.chkfile
+        if nvir is None: nvir = self.nvir
+        return init_guess_by_chkfile(self.cell, chk, nvir, project=project,
+                                     out=out, basis_ks=self._basis_data)
+
+    def get_mo_occ(mf, moe_ks=None, C_ks=None):
+        return get_mo_occ(mf.cell, moe_ks, C_ks)
+
+    def get_vj_R(self, C_ks, mocc_ks, mesh=None, Gv=None):
+        return self.with_jk.get_vj_R(C_ks, mocc_ks, mesh=mesh, Gv=Gv, ncomp=2)
+
+    @property
+    def nelec(self):
+        if self._nelec is not None:
+            return self._nelec
+        else:
+            cell = self.cell
+            nkpts = len(self.kpts)
+            ne = cell.tot_electrons(nkpts)
+            nalpha = (ne + cell.spin) // 2
+            nbeta = nalpha - cell.spin
+            if nalpha + nbeta != ne:
+                raise RuntimeError('Electron number %d and spin %d are not consistent\n'
+                                   'Note cell.spin = 2S = Nalpha - Nbeta, not 2S+1' %
+                                   (ne, cell.spin))
+            return nalpha, nbeta
+    @nelec.setter
+    def nelec(self, x):
+        self._nelec = x
+
+    get_nband = get_nband
+    dump_moe = dump_moe
+    update_pp = update_pp
+    update_k = update_k
+    eig_subspace = eig_subspace
+    get_mo_energy = get_mo_energy
+    energy_elec = energy_elec
+    converge_band = converge_band
+
+
+if __name__ == "__main__":
+    cell = gto.Cell(
+        atom = "C 0 0 0",
+        a = np.eye(3) * 4,
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        spin=2,
+    )
+    cell.mesh = [25, 25, 25]
+    cell.build()
+    cell.verbose = 6
+
+    nk = 1
+    kmesh = (nk,)*3
+    kpts = cell.make_kpts(kmesh)
+
+    umf = PWKUHF(cell, kpts)
+    umf.nvir = [0,2]
+    umf.nvir_extra = 4
+    umf.kernel()
+
+    umf.dump_scf_summary()
+
+    assert(abs(umf.e_tot - -5.39994570429868) < 1e-5)
diff --git a/pyscf/pbc/pwscf/kuks.py b/pyscf/pbc/pwscf/kuks.py
new file mode 100644
index 000000000..c45378a09
--- /dev/null
+++ b/pyscf/pbc/pwscf/kuks.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" Spin-unrestricted Kohn-Sham DFT in plane-wave basis
+"""
+
+from pyscf import __config__
+from pyscf import lib
+from pyscf.pbc import gto
+from pyscf.pbc.pwscf import khf, kuhf, krks
+import numpy as np
+
+
+class PWKUKS(krks.PWKohnShamDFT, kuhf.PWKUHF):
+    """
+    Unrestricted Kohn-Sham DFT in a plane-wave basis.
+    """
+    def __init__(self, cell, kpts=np.zeros((1,3)), xc='LDA,VWN',
+                 ecut_wf=None, ecut_rho=None,
+                 exxdiv=getattr(__config__, 'pbc_scf_SCF_exxdiv', 'ewald')):
+        """
+        See PWKSCF for input options.
+        """
+        kuhf.PWKUHF.__init__(self, cell, kpts, ecut_wf=ecut_wf, ecut_rho=ecut_rho,
+                             exxdiv=exxdiv)
+        krks.PWKohnShamDFT.__init__(self, xc)
+
+    def dump_flags(self, verbose=None):
+        kuhf.PWKUHF.dump_flags(self)
+        krks.PWKohnShamDFT.dump_flags(self, verbose)
+        return self
+
+    def to_hf(self):
+        out = self._transfer_attrs_(kuhf.PWKUHF(self.cell, self.kpts))
+        # TODO might need to setup up ACE here if xc is not hybrid
+        return out
+
+    def get_mo_energy(self, C_ks, mocc_ks, mesh=None, Gv=None, exxdiv=None,
+                      vj_R=None, ret_mocc=True, full_ham=False):
+        if vj_R is None: vj_R = self.get_vj_R(C_ks, mocc_ks)
+        res = kuhf.PWKUHF.get_mo_energy(self, C_ks, mocc_ks, mesh=mesh, Gv=Gv,
+                                        exxdiv=exxdiv, vj_R=vj_R,
+                                        ret_mocc=ret_mocc, full_ham=full_ham)
+        if ret_mocc:
+            moe_ks = res[0]
+        else:
+            moe_ks = res
+        moe_ks[0][0] = lib.tag_array(moe_ks[0][0], xcdiff=self._get_xcdiff(vj_R))
+        return res
+
+    def energy_elec(self, C_ks, mocc_ks, mesh=None, Gv=None, moe_ks=None,
+                    vj_R=None, exxdiv=None):
+        if moe_ks is not None:
+            # Need xcdiff to compute energy from moe_ks
+            if vj_R is None and not hasattr(moe_ks[0][0], "xcdiff"):
+                moe_ks = None
+        e_scf = kuhf.PWKUHF.energy_elec(self, C_ks, mocc_ks, moe_ks=moe_ks,
+                                        mesh=mesh, Gv=Gv, vj_R=vj_R,
+                                        exxdiv=exxdiv)
+        # When energy is computed from the orbitals, we need to account for
+        # the different between \int vxc rho and \int exc rho.
+        if moe_ks is not None:
+            e_scf += moe_ks[0][0].xcdiff
+        return e_scf
+
+    def update_k(self, C_ks, mocc_ks):
+        ni = self._numint
+        if ni.libxc.is_hybrid_xc(self.xc):
+            super().update_k(C_ks, mocc_ks)
+        elif "t-ace" not in self.scf_summary:
+            self.scf_summary["t-ace"] = np.zeros(2)
+
+
+if __name__ == "__main__":
+    cell = gto.Cell(
+        atom = "C 0 0 0",
+        a = np.eye(3) * 4,
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        spin=2,
+    )
+    cell.mesh = [25, 25, 25]
+    cell.build()
+    cell.verbose = 6
+
+    nk = 1
+    kmesh = (nk,)*3
+    kpts = cell.make_kpts(kmesh)
+
+    umf = PWKUKS(cell, kpts, xc="PBE0")
+    umf.damp_type = "simple"
+    umf.damp_factor = 0.7
+    umf.nvir = [0,2]
+    umf.nvir_extra = 4
+    umf.kernel()
+
+    umf.dump_scf_summary()
diff --git a/pyscf/pbc/pwscf/kump2.py b/pyscf/pbc/pwscf/kump2.py
new file mode 100644
index 000000000..31d0d206e
--- /dev/null
+++ b/pyscf/pbc/pwscf/kump2.py
@@ -0,0 +1,425 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+#
+
+""" kpt-sampled periodic MP2 using a plane wave basis and spin-unrestricted HF
+"""
+
+import tempfile
+import numpy as np
+
+from pyscf.pbc.pwscf import kmp2
+from pyscf.pbc.pwscf.pw_helper import (get_nocc_ks_from_mocc, wf_ifft)
+from pyscf.pbc.pwscf.kuhf import get_spin_component
+from pyscf.pbc import tools
+from pyscf import lib
+from pyscf.lib import logger
+
+
+def kconserv(kptija, reduce_latvec, kdota):
+    tmp = lib.dot(kptija.reshape(1,-1), reduce_latvec) - kdota
+    return np.where(abs(tmp - np.rint(tmp)).sum(axis=1)<1e-6)[0][0]
+
+
+def fill_oovv(oovv, v_ia, Co_kj_R, Cv_kb_R, fac=None):
+    r"""
+    Math:
+        oovv = \sum_G rho_ia^kika(G)*coulG(ki-ka) * rho_jb^kjkb(kptijab-G)
+             = \sum_G V_ia^kika(G) * rho_jb^kjkb(kptijab-G)
+             = \sum_r V_ia^kika(r)*phase * rho_jb^kjkb(r)
+             = \sum_r v_ia^kika(r) * rho_jb^kjkb(r)
+    """
+    nocc_i, nocc_j = oovv.shape[:2]
+    for j in range(nocc_j):
+        rho_jb_R = Co_kj_R[j].conj() * Cv_kb_R
+        for i in range(nocc_i):
+            oovv[i,j] = lib.dot(v_ia[i], rho_jb_R.T)
+    if fac is not None: oovv *= fac
+
+    return oovv
+
+
+def kernel_dx_(cell, kpts, chkfile_name, summary, nvir=None, nvir_lst=None,
+               basis_ks=None):
+    """ Compute both direct (d) and exchange (x) contributions together.
+    """
+    log = logger.Logger(cell.stdout, cell.verbose)
+    cput0 = (logger.process_clock(), logger.perf_counter())
+
+    dtype = np.complex128
+    dsize = 16
+
+    fchk, C_ks, moe_ks, mocc_ks = kmp2.read_fchk(chkfile_name)
+
+    nkpts = len(kpts)
+    if basis_ks is None:
+        basis_ks = [None] * nkpts
+        mesh = cell.mesh
+    else:
+        assert len(basis_ks) == nkpts
+        mesh = basis_ks[0].mesh
+    coords = cell.get_uniform_grids(mesh=mesh)
+    ngrids = coords.shape[0]
+
+    reduce_latvec = cell.lattice_vectors() / (2*np.pi)
+    kdota = lib.dot(kpts, reduce_latvec)
+
+    fac = ngrids**2. / cell.vol
+    fac_oovv = fac * ngrids / nkpts
+
+    nocc_ks = np.asarray([get_nocc_ks_from_mocc(mocc_ks[s]) for s in [0,1]])
+    if nvir is None:
+        n_ks = np.asarray([[len(mocc_ks[s][k]) for k in range(nkpts)]
+                          for s in [0,1]])
+        nvir_ks = n_ks - nocc_ks
+    else:
+        if isinstance(nvir,int): nvir = [nvir] * 2
+        nvir_ks = np.asarray([[nvir[s]] * nkpts for s in [0,1]])
+        n_ks = nocc_ks + nvir_ks
+    nocc_max = np.max(nocc_ks)
+    nvir_max = np.max(nvir_ks)
+    nocc_sps = np.asarray([[nocc_ks[0][k],nocc_ks[1][k]] for k in range(nkpts)])
+    nvir_sps = np.asarray([[nvir_ks[0][k],nvir_ks[1][k]] for k in range(nkpts)])
+    if nvir_lst is None:
+        nvir_lst = [nvir_max]
+    nvir_lst = np.asarray(nvir_lst)
+    nnvir = len(nvir_lst)
+    logger.info(cell, "Compute emp2 for these nvir's: %s", nvir_lst)
+
+    # estimate memory requirement
+    est_mem = nocc_max*nvir_max*ngrids      # for caching v_ia_R
+    est_mem += (nocc_max*nvir_max)**2*4     # for caching oovv_ka/kb, eijab, wijab
+    est_mem += (nocc_max+nvir_max)*ngrids*2 # for caching MOs
+    est_mem *= dsize / 1e6
+    frac = 0.6
+    cur_mem = cell.max_memory - lib.current_memory()[0]
+    safe_mem = cur_mem * frac
+    log.debug("Currently available memory %9.2f MB, safe %9.2f MB",
+              cur_mem, safe_mem)
+    log.debug("Estimated required memory  %9.2f MB", est_mem)
+    if est_mem > safe_mem:
+        rec_mem = est_mem / frac + lib.current_memory()[0]
+        log.warn("Estimate memory requirement (%.2f MB) exceeds %.0f%%"
+                 " of currently available memory (%.2f MB). Calculations may"
+                 " fail and `cell.max_memory = %.2f` is recommended.",
+                 est_mem, frac*100, safe_mem, rec_mem)
+
+    buf1 = np.empty(nocc_max*nvir_max*ngrids, dtype=dtype)
+    buf2 = np.empty(nocc_max*nocc_max*nvir_max*nvir_max, dtype=dtype)
+    buf3 = np.empty(nocc_max*nocc_max*nvir_max*nvir_max, dtype=dtype)
+
+    swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+    fswap = lib.H5TmpFile(swapfile.name)
+    swapfile = None
+
+# ifft to make C(G) --> C(r)
+# note the ordering of spin and k-pt indices is swapped
+    C_ks_R = fswap.create_group("C_ks_R")
+    for s in [0,1]:
+        C_ks_s = get_spin_component(C_ks, s)
+        for k in range(nkpts):
+            key = "%d"%k
+            C_k = C_ks_s[key][()]
+            # C_ks_R["%s/%d"%(key,s)] = tools.ifft(C_k, mesh)
+            C_ks_R["%s/%d"%(key,s)] = wf_ifft(C_k, mesh, basis_ks[k])
+            C_k = None
+
+    v_ia_ks_R = fswap.create_group("v_ia_ks_R")
+
+    cput1 = log.timer('initialize pwmp2', *cput0)
+
+    tick = np.zeros(2)
+    tock = np.zeros(2)
+    tspans = np.zeros((7,2))
+    tcomps = summary["tcomps"] = ["init", "v_ks_R", "khelper", "IO", "oovv",
+                                  "energy", "tot"]
+    tspans[0] = np.asarray(cput1) - np.asarray(cput0)
+
+    emp2_d = np.zeros(nnvir)
+    emp2_x = np.zeros(nnvir)
+    emp2_ss = np.zeros(nnvir)
+    emp2_os = np.zeros(nnvir)
+    for ki in range(nkpts):
+        kpti = kpts[ki]
+        nocc_i = nocc_sps[ki]
+
+        tick[:] = logger.process_clock(), logger.perf_counter()
+
+        Co_ki_R = [C_ks_R["%d/%d"%(ki,s)][:nocc_i[s]] for s in [0,1]]
+
+        for ka in range(nkpts):
+            kpta = kpts[ka]
+            nocc_a = nocc_sps[ka]
+            nvir_a = nvir_sps[ka]
+            coulG = tools.get_coulG(cell, kpta-kpti, exx=False, mesh=mesh)
+
+            key_ka = "%d"%ka
+            if key_ka in v_ia_ks_R: del v_ia_ks_R[key_ka]
+
+            for s in [0,1]:
+                Cv_ka_R = C_ks_R["%s/%d"%(key_ka,s)][nocc_a[s]:nocc_a[s]+nvir_a[s]]
+                v_ia_R = np.ndarray((nocc_i[s],nvir_a[s],ngrids), dtype=dtype,
+                                    buffer=buf1)
+
+                for i in range(nocc_i[s]):
+                    v_ia = tools.fft(Co_ki_R[s][i].conj() *
+                                     Cv_ka_R, mesh) * coulG
+                    v_ia_R[i] = tools.ifft(v_ia, mesh)
+
+                v_ia_ks_R["%s/%d"%(key_ka,s)] = v_ia_R
+                v_ia_R = Cv_ka_R = None
+
+        Co_ki_R = None
+
+        tock[:] = logger.process_clock(), logger.perf_counter()
+        tspans[1] += tock - tick
+
+        for kj in range(nkpts):
+            nocc_j = nocc_sps[kj]
+            kptij = kpti + kpts[kj]
+
+            tick[:] = logger.process_clock(), logger.perf_counter()
+
+            Co_kj_R = [C_ks_R["%d/%d"%(kj,s)][:nocc_j[s]] for s in [0,1]]
+
+            tock[:] = logger.process_clock(), logger.perf_counter()
+            tspans[3] += tock - tick
+
+            done = [False] * nkpts
+            kab_lst = []
+            kptijab_lst = []
+            for ka in range(nkpts):
+                if done[ka]: continue
+                kptija = kptij - kpts[ka]
+                kb = kconserv(kptija, reduce_latvec, kdota)
+                kab_lst.append((ka,kb))
+                kptijab_lst.append(kptija-kpts[kb])
+                done[ka] = done[kb] = True
+
+            tick[:] = logger.process_clock(), logger.perf_counter()
+            tspans[2] += tick - tock
+
+            nkab = len(kab_lst)
+            for ikab in range(nkab):
+                ka,kb = kab_lst[ikab]
+                kptijab = kptijab_lst[ikab]
+
+                nocc_a = nocc_sps[ka]
+                nvir_a = nvir_sps[ka]
+                nocc_b = nocc_sps[kb]
+                nvir_b = nvir_sps[kb]
+
+                tick[:] = logger.process_clock(), logger.perf_counter()
+                phase = np.exp(-1j*lib.dot(coords,
+                                           kptijab.reshape(-1,1))).reshape(-1)
+                tock[:] = logger.process_clock(), logger.perf_counter()
+                tspans[4] += tock - tick
+
+                for s in [0,1]:
+
+                    tick[:] = logger.process_clock(), logger.perf_counter()
+                    Cv_kb_R = C_ks_R["%d/%d"%(kb,s)][nocc_b[s]:nocc_b[s]+nvir_b[s]]
+                    v_ia = v_ia_ks_R["%d/%d"%(ka,s)][:]
+                    tock[:] = logger.process_clock(), logger.perf_counter()
+                    tspans[3] += tock - tick
+
+                    v_ia *= phase
+                    oovv_ka = np.ndarray((nocc_i[s],nocc_j[s],nvir_a[s],nvir_b[s]),
+                                         dtype=dtype, buffer=buf2)
+                    fill_oovv(oovv_ka, v_ia, Co_kj_R[s], Cv_kb_R, fac_oovv)
+                    tick[:] = logger.process_clock(), logger.perf_counter()
+                    tspans[4] += tick - tock
+
+                    Cv_kb_R = None
+
+                    if ka != kb:
+                        Cv_ka_R = C_ks_R["%d/%d"%(ka,s)][nocc_a[s]:
+                                                         nocc_a[s]+nvir_a[s]]
+                        v_ib = v_ia_ks_R["%d/%s"%(kb,s)][:]
+                        tock[:] = logger.process_clock(), logger.perf_counter()
+                        tspans[3] += tock - tick
+
+                        v_ib *= phase
+                        oovv_kb = np.ndarray((nocc_i[s],nocc_j[s],nvir_b[s],nvir_a[s]),
+                                             dtype=dtype, buffer=buf3)
+                        fill_oovv(oovv_kb, v_ib, Co_kj_R[s], Cv_ka_R, fac_oovv)
+                        tick[:] = logger.process_clock(), logger.perf_counter()
+                        tspans[4] += tick - tock
+
+                        Cv_ka_R = v_ib = None
+                    else:
+                        oovv_kb = oovv_ka
+
+# Same-spin contribution to KUMP2 energy
+                    tick[:] = logger.process_clock(), logger.perf_counter()
+                    mo_e_o = moe_ks[s][ki][:nocc_i[s]]
+                    mo_e_v = moe_ks[s][ka][nocc_a[s]:nocc_a[s]+nvir_a[s]]
+                    eia = mo_e_o[:,None] - mo_e_v
+
+                    if ka != kb:
+                        mo_e_o = moe_ks[s][kj][:nocc_j[s]]
+                        mo_e_v = moe_ks[s][kb][nocc_b[s]:nocc_b[s]+nvir_b[s]]
+                        ejb = mo_e_o[:,None] - mo_e_v
+                    else:
+                        ejb = eia
+
+                    eijab = lib.direct_sum('ia,jb->ijab',eia,ejb)
+                    t2_ijab = np.conj(oovv_ka/eijab)
+                    for invir_,nvir_ in enumerate(nvir_lst):
+                        eijab_d = np.einsum('ijab,ijab->',
+                                            t2_ijab[:,:,:nvir_,:nvir_],
+                                            oovv_ka[:,:,:nvir_,:nvir_]).real
+                        eijab_x = - np.einsum('ijab,ijba->',
+                                              t2_ijab[:,:,:nvir_,:nvir_],
+                                              oovv_kb[:,:,:nvir_,:nvir_]).real
+                        if ka != kb:
+                            eijab_d *= 2
+                            eijab_x *= 2
+                        emp2_d[invir_] += eijab_d
+                        emp2_x[invir_] += eijab_x
+                        emp2_ss[invir_] += eijab_d + eijab_x
+                    tock[:] = logger.process_clock(), logger.perf_counter()
+                    tspans[5] += tock - tick
+
+                    oovv_ka = oovv_kb = eijab = None
+
+# Opposite-spin contribution to KUMP2 energy
+                    if s == 0:
+                        t = 1 - s
+                        tick[:] = logger.process_clock(), logger.perf_counter()
+                        Cv_kb_R = C_ks_R["%d/%d"%(kb,t)][nocc_b[t]:
+                                                         nocc_b[t]+nvir_b[t]]
+                        tock[:] = logger.process_clock(), logger.perf_counter()
+                        tspans[3] += tock - tick
+
+                        oovv_ka = np.ndarray((nocc_i[s],nocc_j[t],nvir_a[s],nvir_b[t]),
+                                             dtype=dtype, buffer=buf2)
+                        fill_oovv(oovv_ka, v_ia, Co_kj_R[t], Cv_kb_R, fac_oovv)
+                        tick[:] = logger.process_clock(), logger.perf_counter()
+                        tspans[4] += tick - tock
+
+                        Cv_kb_R = v_ia = None
+
+                        mo_e_o = moe_ks[t][kj][:nocc_j[t]]
+                        mo_e_v = moe_ks[t][kb][nocc_b[t]:nocc_b[t]+nvir_b[t]]
+                        ejb = mo_e_o[:,None] - mo_e_v
+
+                        eijab = lib.direct_sum('ia,jb->ijab',eia,ejb)
+                        t2_ijab = np.conj(oovv_ka/eijab)
+                        for invir_,nvir_ in enumerate(nvir_lst):
+                            eijab_d = np.einsum('ijab,ijab->',
+                                                t2_ijab[:,:,:nvir_,:nvir_],
+                                                oovv_ka[:,:,:nvir_,:nvir_]).real
+                            if ka != kb:
+                                eijab_d *= 2
+                            eijab_d *= 2    # alpha,beta <-> beta,alpha
+                            emp2_d[invir_] += eijab_d
+                            emp2_os[invir_] += eijab_d
+                        tock[:] = logger.process_clock(), logger.perf_counter()
+                        tspans[5] += tock - tick
+
+                        oovv_ka = eijab = None
+                    else:
+                        v_ia = None
+
+        cput1 = log.timer('kpt %d (%6.3f %6.3f %6.3f)'%(ki,*kpti), *cput1)
+
+    buf1 = buf2 = buf3 = None
+
+    emp2_d *= 0.5 / nkpts
+    emp2_x *= 0.5 / nkpts
+    emp2_ss *= 0.5 / nkpts
+    emp2_os *= 0.5 / nkpts
+    emp2 = emp2_d + emp2_x
+    summary["e_corr_d"] = emp2_d[-1]
+    summary["e_corr_x"] = emp2_x[-1]
+    summary["e_corr_ss"] = emp2_ss[-1]
+    summary["e_corr_os"] = emp2_os[-1]
+    summary["e_corr"] = emp2[-1]
+    summary["nvir_lst"] = nvir_lst
+    summary["e_corr_d_lst"] = emp2_d
+    summary["e_corr_x_lst"] = emp2_x
+    summary["e_corr_ss_lst"] = emp2_ss
+    summary["e_corr_os_lst"] = emp2_os
+    summary["e_corr_lst"] = emp2
+
+    cput1 = log.timer('pwmp2', *cput0)
+    tspans[6] = np.asarray(cput1) - np.asarray(cput0)
+    for tspan, tcomp in zip(tspans,tcomps):
+        summary["t-%s"%tcomp] = tspan
+
+    return emp2[-1]
+
+
+class PWKUMP2(kmp2.PWKRMP2):
+    """
+    Spin-unrestriced MP2 in a plane-wave basis.
+    """
+    def __init__(self, mf, nvir=None):
+        kmp2.PWKRMP2.__init__(self, mf, nvir=nvir)
+
+    def kernel(self, nvir=None, nvir_lst=None):
+        cell = self.cell
+        kpts = self.kpts
+        chkfile = self._scf.chkfile
+        summary = self.mp2_summary
+        if nvir is None: nvir = self.nvir
+
+        self.e_corr = kernel_dx_(cell, kpts, chkfile, summary, nvir=nvir,
+                                 nvir_lst=nvir_lst,
+                                 basis_ks=self._scf._basis_data)
+
+        self._finalize()
+
+        return self.e_corr
+
+
+if __name__ == "__main__":
+    from pyscf.pbc import gto, scf, mp, pwscf
+
+    atom = "H 0 0 0; H 0.9 0 0"
+    a = np.eye(3) * 3
+    basis = "gth-szv"
+    pseudo = "gth-pade"
+
+    ke_cutoff = 50
+
+    cell = gto.Cell(atom=atom, a=a, basis=basis, pseudo=pseudo,
+                    ke_cutoff=ke_cutoff)
+    cell.build()
+    cell.verbose = 5
+
+    nk = 2
+    kmesh = [nk] * 3
+    kpts = cell.make_kpts(kmesh)
+    nkpts = len(kpts)
+
+    pwmf = pwscf.PWKUHF(cell, kpts)
+    pwmf.nvir = 5
+    pwmf.kernel()
+
+    es = {"5": -0.01363871}
+
+    pwmp = PWKUMP2(pwmf)
+    pwmp.kernel(nvir_lst=[5])
+    pwmp.dump_mp2_summary()
+    nvir_lst = pwmp.mp2_summary["nvir_lst"]
+    ecorr_lst = pwmp.mp2_summary["e_corr_lst"]
+    for nvir,ecorr in zip(nvir_lst,ecorr_lst):
+        err = abs(ecorr - es["%d"%nvir])
+        print(err)
+        assert(err < 1e-5)
diff --git a/pyscf/pbc/pwscf/ncpp_cell.py b/pyscf/pbc/pwscf/ncpp_cell.py
new file mode 100644
index 000000000..d5dd04e6f
--- /dev/null
+++ b/pyscf/pbc/pwscf/ncpp_cell.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" Sub-class of the Cell object that supports SG15 psuedopotentials.
+"""
+
+from pyscf import __config__
+from pyscf.pbc.gto.cell import Cell
+from pyscf.data.elements import _symbol, is_ghost_atom, \
+        _std_symbol_without_ghost
+from pyscf.pbc.pwscf.upf import get_nc_data_from_upf
+import numpy as np
+import os
+import io
+
+
+_ARRAY_PREFIX = "__NCPP_NPARRAY__"
+DEFAULT_SG15_PATH = getattr(__config__, 'pbc_pwscf_ncpp_cell_sg15_path', None)
+
+
+class NCPPCell(Cell):
+    """
+    Sub-class of Cell supporting SG15 pseudopotentials. The sg15_path
+    must be set either upon initialization or upon calling build.
+    Do not set any other pseudopotentials when initializing,
+    as only SG15 pseudos are supported when using this subclass.
+    """
+
+    _keys = {"sg15_path"}
+
+    def __init__(self, **kwargs):
+        sg15_path = kwargs.pop("sg15_path", DEFAULT_SG15_PATH)
+        Cell.__init__(self, **kwargs)
+        self.sg15_path = sg15_path
+
+    def build(self, **kwargs):
+        if "pseudo" in kwargs or "ecp" in kwargs:
+            raise ValueError("pseudo and ecp not supported")
+        self.sg15_path = kwargs.pop("sg15_path", self.sg15_path)
+        if self.sg15_path is None:
+            raise ValueError("sg15_path is not set")
+        super().build(**kwargs)
+
+        uniq_atoms = {a[0] for a in self._atom}
+        # Unless explicitly input, PP should not be assigned to ghost atoms
+        atoms_wo_ghost = [a for a in uniq_atoms if not is_ghost_atom(a)]
+        _pseudo = {a: "SG15" for a in atoms_wo_ghost}
+        fmt_pseudo = {}
+        for atom, atom_pp in _pseudo.items():
+            symb = _symbol(atom)
+            assert isinstance(symb, str)
+            stdsymb = _std_symbol_without_ghost(symb)
+            fname = os.path.join(
+                self.sg15_path, f"{stdsymb}_ONCV_PBE-1.2.upf"
+            )
+            fmt_pseudo[symb] = get_nc_data_from_upf(fname)
+        self._pseudo = _pseudo = fmt_pseudo
+        self.pseudo = "SG15"
+
+        for ia, atom in enumerate(self._atom):
+            symb = atom[0]
+            if (symb in _pseudo and
+                # skip ghost atoms
+                self._atm[ia, 0] != 0):
+                self._atm[ia, 0] = _pseudo[symb]["z"]
+        self._built = True
+
+    def dumps(self):
+        backup_pseudo = self._pseudo
+        def recurse(dic):
+            if isinstance(dic, dict):
+                dic1 = {}
+                iter = dic.items()
+            else:
+                dic1 = [None] * len(dic)
+                iter = enumerate(dic)
+            for k, v in iter:
+                if (v is None or
+                    isinstance(v, (str, bool, int, float))):
+                    dic1[k] = v
+                elif isinstance(v, (list, dict)):
+                    dic1[k] = recurse(v)
+                elif isinstance(v, (np.ndarray, np.generic)):
+                    # dic1[k] = v.tolist()
+                    x = io.BytesIO()
+                    np.save(x, v)
+                    dic1[k] = _ARRAY_PREFIX + x.getvalue().hex()
+                else:
+                    raise ValueError("Cannot dump type {}".format(type(v)))
+            return dic1
+        self._pseudo = recurse(backup_pseudo)
+        res = super().dumps()
+        self._pseudo = backup_pseudo
+        return res
+
+    @classmethod
+    def loads(cls, molstr):
+        cell = super().loads(molstr)
+        str_pseudo = cell._pseudo
+        def recurse(dic):
+            if isinstance(dic, dict):
+                dic1 = {}
+                iter = dic.items()
+            else:
+                dic1 = [None] * len(dic)
+                iter = enumerate(dic)
+            for k, v in iter:
+                if (v is None or
+                    isinstance(v, (bool, int, float))):
+                    dic1[k] = v
+                elif isinstance(v, (list, dict)):
+                    dic1[k] = recurse(v)
+                elif isinstance(v, str):
+                    if v.startswith(_ARRAY_PREFIX):
+                        v = v[len(_ARRAY_PREFIX):]
+                        v = np.load(io.BytesIO(bytes.fromhex(v)))
+                    dic1[k] = v
+                else:
+                    raise ValueError("Cannot dump type {}".format(type(v)))
+            return dic1
+        cell._pseudo = recurse(str_pseudo)
+        return cell
+
+
+if __name__ == "__main__":
+    from pyscf.pbc import gto
+    from pyscf.pbc.pwscf.krks import PWKRKS
+
+    kwargs = dict(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        verbose=0,
+    )
+
+    cell = gto.Cell(**kwargs)
+    cell.build()
+
+    kwargs.pop("pseudo")
+    nccell = NCPPCell(**kwargs)
+
+    kmesh = [2, 2, 2]
+    kpts = cell.make_kpts(kmesh)
+
+    mf = PWKRKS(cell, kpts, xc="PBE", ecut_wf=20)
+    mf.nvir = 4  # converge first 4 virtual bands
+    mf.kernel()
+    mf.dump_scf_summary()
diff --git a/pyscf/pbc/pwscf/pseudo.py b/pyscf/pbc/pwscf/pseudo.py
new file mode 100644
index 000000000..bb27478b0
--- /dev/null
+++ b/pyscf/pbc/pwscf/pseudo.py
@@ -0,0 +1,951 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" All actual implementation of PW-related PPs go here.
+    The wrapper for calling the functions here go to pw_helper.py
+"""
+
+import tempfile
+import numpy as np
+import scipy.linalg
+import scipy.special
+from scipy.interpolate import make_interp_spline
+
+from pyscf.pbc.pwscf.pw_helper import (get_kcomp, set_kcomp, get_C_ks_G, orth,
+                                       get_mesh_map, wf_fft, wf_ifft)
+from pyscf.pbc.gto import pseudo as gth_pseudo
+from pyscf.pbc.gto.pseudo.pp import cart2polar
+from pyscf.pbc import tools
+from pyscf.pbc.lib.kpts_helper import member
+from pyscf import lib
+from pyscf.lib import logger
+from pyscf import __config__
+
+
+IOBLK = getattr(__config__, "pbc_pwscf_pseudo_IOBLK", 4000) # unit MB
+dawsn = scipy.special.dawsn
+if hasattr(scipy.special, "sph_harm_y"):
+    sph_harm = scipy.special.sph_harm_y
+    new_sph_harm = True
+else:
+    sph_harm = scipy.special.sph_harm
+    new_sph_harm = False
+
+
+""" Wrapper functions
+"""
+def get_vpplocR(cell, mesh=None, Gv=None):
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh=mesh)
+    SI = cell.get_SI(Gv=Gv)
+    ngrids = Gv.shape[0]
+    fac = ngrids / cell.vol
+    vpplocG = np.einsum("ag,ag->g", SI, get_vpplocG(cell, mesh, Gv))
+    vpplocR = tools.ifft(vpplocG, mesh).real * fac
+
+    return vpplocR
+
+
+def get_vpplocG(cell, mesh=None, Gv=None):
+    if mesh is None: mesh = cell.mesh
+    if Gv is None: Gv = cell.get_Gv(mesh=mesh)
+
+    if len(cell._ecp) > 0:
+        return get_vpplocG_ccecp(cell, Gv)
+    elif cell.pseudo is not None:
+        if isinstance(cell.pseudo, dict):
+            # assume it is GTH and check for errors
+            try:
+                res = get_vpplocG_gth(cell, Gv)
+            except Exception as e:
+                raise e
+            return res
+        elif "GTH" in cell.pseudo.upper():
+            return get_vpplocG_gth(cell, Gv)
+        elif cell.pseudo == "SG15":
+            return get_vpplocG_sg15(cell, Gv)
+        else:
+            raise NotImplementedError("Pseudopotential %s is currently not supported." % (str(cell.pseudo)))
+    else:
+        return get_vpplocG_alle(cell, Gv)
+
+
+def apply_vppl_kpt(cell, C_k, mesh=None, vpplocR=None, C_k_R=None, basis=None):
+    if mesh is None: mesh = cell.mesh
+    if vpplocR is None: vpplocR = get_vpplocR(cell, mesh)
+    if C_k_R is None: C_k_R = wf_ifft(C_k, mesh, basis=basis)
+    return wf_fft(C_k_R * vpplocR, mesh, basis=basis)
+
+
+""" PW-PP class implementation goes here
+"""
+def get_pp_type(cell):
+    hasecp = len(cell._ecp) > 0
+    haspp = len(cell._pseudo) > 0
+    if not (hasecp or haspp):
+        return "alle"
+    elif haspp:
+        if isinstance(cell.pseudo, str):
+            if cell.pseudo == "SG15":
+                return "SG15"
+            assert("GTH" in cell.pseudo.upper())
+        elif isinstance(cell.pseudo, dict):
+            for key,pp in cell.pseudo.items():
+                assert(isinstance(pp, list) or "GTH" in pp.upper())
+        else:
+            raise RuntimeError("Unknown pseudo type %s" % (str(cell.pseudo)))
+        return "gth"
+    else:
+        if isinstance(cell.ecp, str):
+            assert("CCECP" in cell.ecp.upper())
+        elif isinstance(cell.ecp, dict):
+            for key,pp in cell.ecp.items():
+                assert("CCECP" in pp.upper())
+        else:
+            raise RuntimeError("Unknown ecp type %s" % (str(cell.ecp)))
+        return "ccecp"
+
+
+def pseudopotential(mf, with_pp=None, mesh=None, outcore=False, **kwargs):
+    def set_kw(with_pp_, key):
+        val = kwargs.get(key, None)
+        if val is not None: setattr(with_pp_, key, val)
+
+    if with_pp is None:
+        with_pp = PWPP(mf.cell, mf.kpts, mesh=mesh, outcore=outcore)
+        set_kw(with_pp, "ecpnloc_method")
+        set_kw(with_pp, "ecpnloc_kbbas")
+        set_kw(with_pp, "ecpnloc_ke_cutoff")
+        set_kw(with_pp, "ecpnloc_use_numexpr")
+
+    mf.with_pp = with_pp
+
+    return mf
+
+
+class PWPP:
+
+    ecpnloc_method = getattr(__config__, "pbc_pwscf_pseudo_PWPP_ecpnloc_method",
+                             "kb")  # other options: "direct"
+    ecpnloc_kbbas = getattr(__config__, "pbc_pwscf_pseudo_PWPP_ecpnloc_method",
+                            "ccecp-cc-pvqz")
+    ecpnloc_ke_cutoff = getattr(__config__,
+                                "pbc_pwscf_pseudo_PWPP_ecpnloc_ke_cutoff", None)
+    threshold_svec = getattr(__config__, "pbc_pwscf_pseudo_PWPP_threshold_svec",
+                             1e-12)
+
+    def __init__(self, cell, kpts, mesh=None, **kwargs):
+        self.cell = cell
+        self.stdout = cell.stdout
+        self.verbose = cell.verbose
+        self.kpts = kpts
+        if mesh is None: mesh = cell.mesh
+        self.mesh = mesh
+        self.Gv = cell.get_Gv(mesh)
+        logger.debug(self, "Initializing PP local part")
+        self.vpplocR = get_vpplocR(cell, self.mesh, self.Gv)
+
+        self.pptype = get_pp_type(cell)
+        self._ecp = None
+        self.vppnlocWks = None
+        self._ecpnloc_initialized = False
+
+        # kwargs
+        self.outcore = kwargs.get("outcore", False)
+
+        # debug options
+        self.ecpnloc_use_numexpr = False
+
+    def initialize_ecpnloc(self):
+        if self.pptype == "ccecp":
+            logger.debug(self, "Initializing ccECP non-local part")
+            cell = self.cell
+            self._ecp = format_ccecp_param(cell)
+            if self.outcore:
+                self.swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+                self.fswap = lib.H5TmpFile(self.swapfile.name)
+                if self.ecpnloc_method in ["direct", "kb", "kb2"]:
+                    self.vppnlocWks = self.fswap.create_group("vppnlocWks")
+                else:
+                    raise RuntimeError("Unknown ecpnloc_method %s" %
+                                       (self.ecp_nloc_item))
+            else:
+                if self.ecpnloc_method in ["direct", "kb", "kb2"]:
+                    self.vppnlocWks = {}
+                else:
+                    raise RuntimeError("Unknown ecpnloc_method %s" %
+                                       (self.ecp_nloc_item))
+        self._ecpnloc_initialized = True
+
+    def update_vppnloc_support_vec(self, C_ks, ncomp=1, out=None, basis_ks=None):
+        if self.pptype == "ccecp":
+            if not self._ecpnloc_initialized:
+                self.initialize_ecpnloc()
+            cell = self.cell
+
+            if self.ecpnloc_method == "kb":
+                if len(self.vppnlocWks) > 0:
+                    return
+                if out is None:
+                    out = self.vppnlocWks
+                get_ccecp_kb_support_vec(cell, self.ecpnloc_kbbas, self.kpts,
+                                         out,
+                                         ke_cutoff_nloc=self.ecpnloc_ke_cutoff,
+                                         ncomp=ncomp, _ecp=self._ecp,
+                                         thr_eig=self.threshold_svec,
+                                         use_numexpr=self.ecpnloc_use_numexpr,
+                                         basis_ks=basis_ks)
+            elif self.ecpnloc_method == "kb2":
+                raise NotImplementedError
+                if len(self.vppnlocWks) > 0:
+                    return
+                if ncomp == 1:
+                    out = self.vppnlocWks
+                else:
+                    out = self.vppnlocWks.create_group("0")
+                kb_basis = self.ecpnloc_kbbas
+                kpts = self.kpts
+                get_ccecp_kb_support_vec(cell, kb_basis, kpts, out=out)
+                if ncomp > 1:
+                    for comp in range(1,ncomp):
+                        self.vppnlocWks["%d"%comp] = out
+            else:
+                if out is None: out = self.vppnlocWks
+                get_ccecp_support_vec(cell, C_ks, self.kpts, out,
+                                      _ecp=self._ecp,
+                                      ke_cutoff_nloc=self.ecpnloc_ke_cutoff,
+                                      ncomp=ncomp, thr_eig=self.threshold_svec,
+                                      use_numexpr=self.ecpnloc_use_numexpr,
+                                      basis_ks=basis_ks)
+
+    def apply_vppl_kpt(self, C_k, mesh=None, vpplocR=None, C_k_R=None,
+                       basis=None):
+        if mesh is None: mesh = self.mesh
+        if vpplocR is None: vpplocR = self.vpplocR
+        return apply_vppl_kpt(self, C_k, mesh=mesh, vpplocR=vpplocR,
+                              C_k_R=C_k_R, basis=basis)
+
+    def apply_vppnl_kpt(self, C_k, kpt, mesh=None, Gv=None, comp=None,
+                        basis=None):
+        cell = self.cell
+        if self.pptype == "ccecp":
+            k = member(kpt, self.kpts)[0]
+            assert self.vppnlocWks is not None
+            if comp is None:
+                W_k = get_kcomp(self.vppnlocWks, k)
+            elif isinstance(comp, int):
+                W_k = get_kcomp(self.vppnlocWks["%d"%comp], k)
+            else:
+                raise RuntimeError("comp must be None or int")
+            return lib.dot(lib.dot(C_k, W_k.T.conj()), W_k)
+        elif self.pptype == "gth":
+            return apply_vppnl_kpt_gth(cell, C_k, kpt, Gv, basis=basis)
+        elif self.pptype == "SG15":
+            return apply_vppnl_kpt_sg15(cell, C_k, kpt, Gv, basis=basis)
+        elif self.pptype == "alle":
+            return apply_vppnl_kpt_alle(cell, C_k, kpt, Gv)
+        else:
+            raise NotImplementedError("Pseudopotential %s is currently not supported." % (str(cell.pseudo)))
+
+
+""" All-electron implementation starts here
+"""
+def get_vpplocG_alle(cell, Gv):
+    Zs = cell.atom_charges()
+    coulG = tools.get_coulG(cell, Gv=Gv)
+    vpplocG = -np.einsum("a,g->ag", Zs, coulG)
+    return vpplocG
+
+
+def apply_vppnl_kpt_alle(cell, C_k, kpt, Gv):
+    return np.zeros_like(C_k)
+
+
+""" GTH implementation starts here
+"""
+def get_vpplocG_gth(cell, Gv):
+    return -gth_pseudo.get_vlocG(cell, Gv)
+
+
+def get_vpplocG_sg15(cell, Gv):
+    coulG = tools.get_coulG(cell, Gv=Gv)
+    G2 = np.einsum('ix,ix->i', Gv, Gv)
+    G = np.sqrt(G2)
+    G0idx = np.where(G2==0)[0]
+    vlocG = np.zeros((cell.natm, len(G2)))
+    for ia in range(cell.natm):
+        Zia = cell.atom_charge(ia)
+        symb = cell.atom_symbol(ia)
+        vlocG[ia] = Zia * coulG
+        if symb in cell._pseudo:
+            pp = cell._pseudo[symb]
+            spline = make_interp_spline(pp["grids"]["k"], pp["local_part"]["recip"])
+            vlocG[ia] *= spline(G) / Zia  # spline is normalized to Zia
+            # alpha parameters from the non-divergent Hartree+Vloc G=0 term.
+            vlocG[ia,G0idx] = pp["local_part"]["finite_g0"]
+    vlocG[:] *= -1
+    return vlocG
+
+
+def apply_vppnl_kpt_gth(cell, C_k, kpt, Gv, basis=None):
+    # non-local pp
+    from pyscf import gto
+    fakemol = gto.Mole()
+    fakemol._atm = np.zeros((1,gto.ATM_SLOTS), dtype=np.int32)
+    fakemol._bas = np.zeros((1,gto.BAS_SLOTS), dtype=np.int32)
+    ptr = gto.PTR_ENV_START
+    fakemol._env = np.zeros(ptr+10)
+    fakemol._bas[0,gto.NPRIM_OF ] = 1
+    fakemol._bas[0,gto.NCTR_OF  ] = 1
+    fakemol._bas[0,gto.PTR_EXP  ] = ptr+3
+    fakemol._bas[0,gto.PTR_COEFF] = ptr+4
+
+    if basis is None:
+        Gk = Gv + kpt
+        SI = cell.get_SI(Gv=Gv)
+    else:
+        Gk = basis.Gk
+        SI = cell.get_SI(Gv=Gk-kpt)
+    ngrids = Gk.shape[0]
+    buf = np.empty((48,ngrids), dtype=np.complex128)
+    Cbar_k = np.zeros_like(C_k)
+
+    G_rad = lib.norm(Gk, axis=1)
+    #:vppnl = 0
+    for ia in range(cell.natm):
+        symb = cell.atom_symbol(ia)
+        if symb not in cell._pseudo:
+            continue
+        pp = cell._pseudo[symb]
+        p1 = 0
+        for l, proj in enumerate(pp[5:]):
+            rl, nl, hl = proj
+            if nl > 0:
+                fakemol._bas[0,gto.ANG_OF] = l
+                fakemol._env[ptr+3] = .5*rl**2
+                fakemol._env[ptr+4] = rl**(l+1.5)*np.pi**1.25
+                pYlm_part = fakemol.eval_gto('GTOval', Gk)
+
+                p0, p1 = p1, p1+nl*(l*2+1)
+                # pYlm is real, SI[ia] is complex
+                pYlm = np.ndarray((nl,l*2+1,ngrids), dtype=np.complex128, buffer=buf[p0:p1])
+                for k in range(nl):
+                    qkl = gth_pseudo.pp._qli(G_rad*rl, l, k)
+                    pYlm[k] = pYlm_part.T * qkl
+                #:SPG_lmi = np.einsum('g,nmg->nmg', SI[ia].conj(), pYlm)
+                #:SPG_lm_aoG = np.einsum('nmg,gp->nmp', SPG_lmi, aokG)
+                #:tmp = np.einsum('ij,jmp->imp', hl, SPG_lm_aoG)
+                #:vppnl += np.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp)
+        if p1 > 0:
+            SPG_lmi = buf[:p1]
+            SPG_lmi *= SI[ia].conj()
+            p1 = 0
+            for l, proj in enumerate(pp[5:]):
+                rl, nl, hl = proj
+                if nl > 0:
+                    p0, p1 = p1, p1+nl*(l*2+1)
+                    hl = np.asarray(hl)
+                    SPG_lmi_ = SPG_lmi[p0:p1].reshape(nl,l*2+1,-1)
+                    tmp = np.einsum("imG,IG->Iim", SPG_lmi_, C_k)
+                    tmp = np.einsum("ij,Iim->Ijm", hl, tmp)
+                    Cbar_k += np.einsum("Iim,imG->IG", tmp, SPG_lmi_.conj())
+    Cbar_k /= cell.vol
+
+    return Cbar_k
+
+
+def apply_vppnl_kpt_sg15(cell, C_k, kpt, Gv, basis=None):
+    if basis is None:
+        Gk = Gv + kpt
+        SI = cell.get_SI(Gv=Gv)
+    else:
+        Gk = basis.Gk
+        SI = cell.get_SI(Gv=Gk-kpt)
+    ngrids = Gk.shape[0]
+    Cbar_k = np.zeros_like(C_k)
+
+    G_rad, G_theta, G_phi = cart2polar(Gk)
+    G_phi[:] = G_phi % (2 * np.pi)
+    lmax = np.max([[proj["l"] for proj in pp["projectors"]]
+                  for pp in cell._pseudo.values()])
+    G_ylm = np.empty(((lmax + 1) * (lmax + 1), ngrids), dtype=np.complex128)
+    lm = 0
+    for l in range(lmax + 1):
+        for m in range(2 * l + 1):
+            mp = m - l
+            if new_sph_harm:
+                G_ylm[lm] = sph_harm(l, mp, G_theta, G_phi)
+            else:
+                G_ylm[lm] = sph_harm(mp, l, G_phi, G_theta)
+            lm += 1
+
+    for ia in range(cell.natm):
+        symb = cell.atom_symbol(ia)
+        if symb not in cell._pseudo:
+            continue
+        pp = cell._pseudo[symb]
+        kmesh = pp["grids"]["k"]
+        for iproj, proj in enumerate(pp["projectors"]):
+            l = proj["l"]
+            pfunc = proj["kproj"]
+            spline = make_interp_spline(kmesh, pfunc)
+            radpart = spline(G_rad)
+            sphpart = G_ylm[l*l:(l+1)*(l+1)]
+            d = pp["dij"][iproj, iproj]
+            SPG_mi = radpart * sphpart * SI[ia].conj()
+            tmp = np.einsum("mG,IG->Im", SPG_mi, C_k)
+            tmp *= d
+            Cbar_k += np.einsum("Im,mG->IG", tmp, SPG_mi.conj())
+    Cbar_k /= cell.vol
+
+    return Cbar_k
+
+
+""" ccECP implementation starts here
+"""
+def fast_SphBslin(n, xs, thr_switch=20, thr_overflow=700, out=None):
+    if out is None: out = np.zeros_like(xs)
+    with np.errstate(over="ignore", invalid="ignore"):
+        if n == 0:
+            out[:] = np.sinh(xs) / xs
+        elif n == 1:
+            out[:] = (xs * np.cosh(xs) - np.sinh(xs)) / xs**2.
+        elif n == 2:
+            out[:] = ((xs**2.+3.)*np.sinh(xs) - 3.*xs*np.cosh(xs)) / xs**3.
+        elif n == 3:
+            out[:] = ((xs**3.+15.*xs)*np.cosh(xs) -
+                      (6.*xs**2.+15.)*np.sinh(xs)) / xs**4.
+        else:
+            raise NotImplementedError("fast_SphBslin with n=%d is not implemented." % n)
+
+    np.nan_to_num(out, copy=False, nan=0., posinf=0., neginf=0.)
+
+    return out
+
+
+def fast_SphBslin_numexpr(n, xs, thr_switch=20, thr_overflow=700, out=None):
+    import numexpr
+    if out is None: out = np.zeros_like(xs)
+    with np.errstate(over="ignore", invalid="ignore"):
+        if n == 0:
+            numexpr.evaluate("sinh(xs)/xs", out=out)
+        elif n == 1:
+            numexpr.evaluate("(xs * cosh(xs) - sinh(xs)) / xs**2.", out=out)
+        elif n == 2:
+            numexpr.evaluate("((xs**2.+3.)*sinh(xs) - 3.*xs*cosh(xs)) / xs**3.",
+                             out=out)
+        elif n == 3:
+            numexpr.evaluate("((xs**3.+15.*xs)*cosh(xs) -(6.*xs**2.+15.)*sinh(xs)) / xs**4.", out=out)
+        else:
+            raise NotImplementedError("fast_SphBslin with n=%d is not implemented." % n)
+
+    np.nan_to_num(out, copy=False, nan=0., posinf=0., neginf=0.)
+
+    return out
+
+
+def fast_SphBslin_c(n, xs, out=None):
+    if n > 3:
+        raise NotImplementedError("fast_SphBslin with n=%d is not implemented." % n)
+
+    if out is None: out = np.zeros_like(xs)
+
+    import ctypes
+    libpw = lib.load_library("libpwscf")
+    libpw.fast_SphBslin(
+        xs.ctypes.data_as(ctypes.c_void_p),
+        ctypes.c_int(xs.size),
+        ctypes.c_int(n),
+        out.ctypes.data_as(ctypes.c_void_p),
+    )
+    np.nan_to_num(out, copy=False, nan=0., posinf=0., neginf=0.)
+
+    return out
+
+
+def format_ccecp_param(cell):
+    r""" Format the ecp data into the following dictionary:
+        _ecp = {
+                    atm1: [_ecpl_atm1, _ecpnl_atm1],
+                    atm2: [_ecpl_atm2, _ecpnl_atm2],
+                    ...
+                }
+        _ecpl  = [
+                    [alp1_1, c1_1, alp2_1, c2_1, ...],
+                    [alp1_2, c1_2, alp2_2, c2_2, ...],
+                    [alp1_3, c1_3, alp2_3, c2_3, ...],
+                ]
+        _ecpnl = [
+                    [l1, alp1_l1, c1_l1, alp2_l1, c2_l1, ...],
+                    [l2, alp1_l2, c1_l2, alp2_l2, c2_l2, ...],
+                    ...
+                ]
+        where
+            Zeff = \sum_k ck_1
+            Vl(r)  = -Zeff/r + c_1/r*exp(-alp_1*r^2) + c_2*r*exp(-alp_2*r^2) +
+                        \sum_{k} ck_3*exp(-alpk_3*r^2)
+            Vnl(r) = \sum_l \sum_k ck_l * exp(-alpk_l*r^2) \sum_m |lm><lm|
+    """
+    _ecp = {}
+    for iatm in range(cell.natm):
+        atm = cell.atom_symbol(iatm)
+        if atm not in cell._ecp: continue
+        if atm in _ecp: continue
+        ncore, ecp_dic = cell._ecp[atm]
+# local part
+        ecp_loc = ecp_dic[0]
+        _ecp_loc = []
+        ecp_loc_item = ecp_loc[1]
+        _ecp_loc = [np.concatenate([*ecp_loc_item[iloc]]) for iloc in [1,3,2]]
+# non-local part
+        _ecp_nloc = []
+        for ecp_nloc_litem in ecp_dic[1:]:
+            l = ecp_nloc_litem[0]
+            _ecp_nloc_item = [l]
+            for ecp_nloc_item in ecp_nloc_litem[1]:
+                if len(ecp_nloc_item) > 0:
+                    for ecp_nloc_item2 in ecp_nloc_item:
+                        _ecp_nloc_item += ecp_nloc_item2
+            _ecp_nloc.append(_ecp_nloc_item)
+        _ecp[atm] = [_ecp_loc, _ecp_nloc]
+
+    return _ecp
+
+
+def get_vpplocG_ccecp(cell, Gv, _ecp=None):
+    if _ecp is None: _ecp = format_ccecp_param(cell)
+    G_rad = np.linalg.norm(Gv, axis=1)
+    coulG = tools.get_coulG(cell, Gv=Gv)
+    G0_idx = np.where(G_rad==0)[0]
+    with np.errstate(divide="ignore"):
+        invG = 4*np.pi / G_rad
+        invG[G0_idx] = 0
+    ngrids = coulG.size
+    vlocG = np.zeros((cell.natm,ngrids))
+    for iatm in range(cell.natm):
+        atm = cell.atom_symbol(iatm)
+        if atm not in _ecp:
+            continue
+        _ecpi = _ecp[atm][0]
+# Zeff / r
+        Zeff = sum(_ecpi[0][1::2])
+        vlocG[iatm] += -coulG * Zeff
+        v0 = -coulG[G0_idx] * Zeff
+# c1 / r * exp(-a1 * r^2)
+        n1 = len(_ecpi[0]) // 2
+        for i1 in range(n1):
+            a1, c1 = _ecpi[0][i1*2:(i1+1)*2]
+            vlocG[iatm] += c1 * invG * a1**-0.5 * dawsn(G_rad*(0.5/a1**0.5))
+            v0 += 2*np.pi / a1 * c1
+# c2 * r * exp(-a2 * r^2)
+        n2 = len(_ecpi[1]) // 2
+        for i2 in range(n2):
+            a2, c2 = _ecpi[1][i2*2:(i2+1)*2]
+            vlocG[iatm] += c2 * (np.pi/a2**2. + ((0.5/a2**1.5) * invG -
+                                                 (np.pi/a2**2.5)*G_rad) *
+                                 dawsn(G_rad*(0.5/a2**0.5)))
+            v0 += 2*np.pi / a2**2 * c2
+# \sum_k c3_k * exp(-a3_k * r^2)
+        n3 = len(_ecpi[2]) // 2
+        if n3 > 0:
+            for i3 in range(n3):
+                a3, c3 = _ecpi[2][i3*2:(i3+1)*2]
+                vlocG[iatm] += c3 * (np.pi/a3)**1.5 * np.exp(-G_rad**2.*
+                                                             (0.25/a3))
+                v0 += (np.pi/a3)**1.5 * c3
+# G = 0
+        vlocG[iatm][G0_idx] = v0
+
+    return vlocG
+
+
+def apply_vppnlocGG_kpt_ccecp(cell, C_k, kpt, _ecp=None, Gv=None,
+                              use_numexpr=False):
+    log = logger.Logger(cell.stdout, cell.verbose)
+
+    if _ecp is None: _ecp = format_ccecp_param(cell)
+    if Gv is None:
+        Gv = cell.get_Gv()
+    SI = cell.get_SI(Gv)
+    ngrids = Gv.shape[0]
+
+    from pyscf import gto
+    fakemol = gto.Mole()
+    fakemol._atm = np.zeros((1,gto.ATM_SLOTS), dtype=np.int32)
+    fakemol._bas = np.zeros((1,gto.BAS_SLOTS), dtype=np.int32)
+    ptr = gto.PTR_ENV_START
+    fakemol._env = np.zeros(ptr+10)
+    fakemol._bas[0,gto.NPRIM_OF ] = 1
+    fakemol._bas[0,gto.NCTR_OF  ] = 1
+    fakemol._bas[0,gto.PTR_EXP  ] = ptr+3
+    fakemol._bas[0,gto.PTR_COEFF] = ptr+4
+
+    uniq_atm_map = dict()
+    for iatm in range(cell.natm):
+        atm = cell.atom_symbol(iatm)
+        if atm not in uniq_atm_map:
+            uniq_atm_map[atm] = []
+        uniq_atm_map[atm].append(iatm)
+
+    nmo = C_k.shape[0]
+
+    angls_nl = [_ecpnlitem[0] for _ecpitem in _ecp.values()
+                for _ecpnlitem in _ecpitem[1]]
+    if len(angls_nl) == 0:
+        return np.zeros_like(C_k)
+
+    lmax = np.max(angls_nl)
+    natmmax = np.max([len(iatm_lst) for iatm_lst in uniq_atm_map.values()])
+
+    dtype0 = np.float64
+    dtype = np.complex128
+    dsize = 16
+    max_memory = (cell.max_memory - lib.current_memory()[0]) * 0.8
+    Gblksize = min(int(np.floor((max_memory*1e6/dsize/ngrids -
+                                 ((2*lmax+1)*natmmax+10+nmo))*0.2)), ngrids)
+    buf = np.empty(Gblksize*ngrids, dtype=dtype)
+    buf2 = np.empty(Gblksize*ngrids, dtype=dtype0)
+    buf3 = np.empty(Gblksize*ngrids, dtype=dtype0)
+    log.debug1("Computing v^nl*C_k in %d segs with blksize %d",
+               (ngrids-1)//Gblksize+1, Gblksize)
+
+    Gk = Gv + kpt
+    G_rad = lib.norm(Gk, axis=1)
+    if abs(kpt).sum() < 1e-8: G_rad += 1e-40    # avoid inverting zero
+    if lmax > 0: invG_rad = 1./G_rad
+
+    tspans = np.zeros((4,2))
+    TICK = np.array([logger.process_clock(), logger.perf_counter()])
+
+    # if use_numexpr:
+    #     fSBin = fast_SphBslin_c
+    # else:
+    #     fSBin = fast_SphBslin
+    fSBin = fast_SphBslin_c
+    # fSBin = fast_SphBslin
+
+    Cbar_k = np.zeros_like(C_k)
+    for atm,iatm_lst in uniq_atm_map.items():
+        if atm not in _ecp:
+            continue
+        _ecpnl_lst = _ecp[atm][1]
+        for _ecpnl in _ecpnl_lst:
+            l = _ecpnl[0]
+            nl = (len(_ecpnl) - 1) // 2
+            for il in range(nl):
+                al, cl = _ecpnl[(1+il*2):(3+il*2)]
+                fakemol._bas[0,gto.ANG_OF] = l
+                fakemol._env[ptr+3] = 0.25 / al
+                fakemol._env[ptr+4] = 2.*np.pi**1.25 * abs(cl)**0.5 / al**0.75
+                flip_sign = cl < 0
+                # pYlm_part.shape = (ngrids, (2*l+1)*len(iatm_lst))
+                pYlm_part = np.einsum("gl,ag->gla",
+                                      fakemol.eval_gto('GTOval', Gk),
+                                      SI[iatm_lst]).reshape(ngrids,-1)
+                if l > 0:
+                    pYlm_part[:] *= (invG_rad**l)[:,None]
+                G_red = G_rad * (0.5 / al)
+                iblk = 0
+                for p0,p1 in lib.prange(0,ngrids,Gblksize):
+                    log.debug2("Gblk [%d/%d], %d ~ %d", iblk,
+                               (ngrids-1)//Gblksize+1, p0, p1)
+                    iblk += 1
+                    vnlGG = np.ndarray((p1-p0,ngrids), dtype=dtype, buffer=buf)
+                    G_rad2 = np.ndarray((p1-p0,ngrids), dtype=dtype0,
+                                        buffer=buf2)
+                    SBin = np.ndarray((p1-p0,ngrids), dtype=dtype0, buffer=buf3)
+                    np.multiply(G_rad[p0:p1,None], G_red, out=G_rad2)
+                    # use np.dot since a slice is neither F nor C-contiguous
+                    if flip_sign:
+                        vnlGG = np.dot(pYlm_part[p0:p1], -pYlm_part.conj().T,
+                                       out=vnlGG)
+                    else:
+                        vnlGG = np.dot(pYlm_part[p0:p1], pYlm_part.conj().T,
+                                       out=vnlGG)
+                    tick = np.array([logger.process_clock(), logger.perf_counter()])
+                    SBin = fSBin(l, G_rad2, out=SBin)
+                    tock = np.array([logger.process_clock(), logger.perf_counter()])
+                    tspans[0] += tock - tick
+                    np.multiply(vnlGG, SBin, out=vnlGG)
+                    tick = np.array([logger.process_clock(), logger.perf_counter()])
+                    Cbar_k[:,p0:p1] += lib.dot(vnlGG, C_k.T).T
+                    tock = np.array([logger.process_clock(), logger.perf_counter()])
+                    tspans[1] += tock - tick
+                    G_rad2 = vnlGG = SBin = None
+                G_red = pYlm_part = None
+    Cbar_k /= cell.vol
+
+    TOCK = np.array([logger.process_clock(), logger.perf_counter()])
+    tspans[3] += TOCK - TICK
+    tspans[2] = tspans[3] - np.sum(tspans[:2], axis=0)
+
+    tnames = ["SBin", "dot", "other", "total"]
+    for tname, tspan in zip(tnames, tspans):
+        tc, tw = tspan
+        rc, rw = tspan / tspans[-1] * 100
+        log.debug1('CPU time for %10s %9.2f  ( %6.2f%% ), wall time '
+                   '%9.2f  ( %6.2f%% )', tname.ljust(10), tc, rc, tw, rw)
+
+    return Cbar_k
+
+
+def get_ccecp_support_vec(cell, C_ks, kpts, out, _ecp=None, ke_cutoff_nloc=None,
+                          ncomp=1, thr_eig=1e-12, use_numexpr=False,
+                          basis_ks=None):
+    log = logger.Logger(cell.stdout, cell.verbose)
+
+    if out is None:
+        out = {}
+    if isinstance(out, dict):
+        outcore = False
+    else:
+        outcore = True
+
+    if ncomp > 1:
+        for comp in range(ncomp):
+            key = "%d"%comp
+            if outcore:
+                if key in out: del out[key]
+                out.create_group(key)
+            else:
+                out[key] = {}
+
+    if _ecp is None: _ecp = format_ccecp_param(cell)
+
+    mesh_map = cell_nloc = None
+    if basis_ks is None and ke_cutoff_nloc is not None:
+        if ke_cutoff_nloc < cell.ke_cutoff:
+            log.debug1("Using ke_cutoff_nloc %s for KB support vector", ke_cutoff_nloc)
+            mesh_map = get_mesh_map(cell, cell.ke_cutoff, ke_cutoff_nloc)
+            cell_nloc = cell.copy()
+            cell_nloc.ke_cutoff = ke_cutoff_nloc
+            cell_nloc.build()
+        else:
+            log.warn("Input ke_cutoff_nloc %s is greater than cell.ke_cutoff "
+                     "%s and will be ignored.", ke_cutoff_nloc, cell.ke_cutoff)
+
+    nkpts = len(kpts)
+    for k in range(nkpts):
+        if ncomp == 1:
+            C_k = get_kcomp(C_ks, k)
+        else:
+            # concatenate all kpts
+            comp_loc = [0] * (ncomp+1)
+            C_k = [None] * ncomp
+            for comp in range(ncomp):
+                C_k[comp] = get_kcomp(C_ks["%d"%comp], k)
+                comp_loc[comp+1] = comp_loc[comp] + C_k[comp].shape[0]
+            C_k = np.vstack(C_k)
+
+        kpt = kpts[k]
+        if cell_nloc is None:
+            if basis_ks is None:
+                Gv = None
+            else:
+                Gv = basis_ks[k].Gk - kpt
+            W_k = apply_vppnlocGG_kpt_ccecp(cell, C_k, kpt, _ecp=_ecp,
+                                            use_numexpr=use_numexpr, Gv=Gv)
+        else:
+            W_k = np.zeros_like(C_k)
+            W_k[:,mesh_map] = apply_vppnlocGG_kpt_ccecp(cell_nloc,
+                                                        C_k[:,mesh_map],
+                                                        kpt, _ecp=_ecp,
+                                                        use_numexpr=use_numexpr)
+
+        if ncomp == 1:
+            W_k = get_support_vec(C_k, W_k, method="eig", thr_eig=thr_eig)
+            set_kcomp(W_k, out, k)
+        else:
+            # deconcatenate all kpts
+            for comp in range(ncomp):
+                p0, p1 = comp_loc[comp:comp+2]
+                w_k = get_support_vec(C_k[p0:p1], W_k[p0:p1],
+                                      method="eig", thr_eig=thr_eig)
+                set_kcomp(w_k, out["%d"%comp], k)
+                w_k = None
+
+        C_k = W_k = None
+
+    return out
+
+
+def get_ccecp_kb_support_vec(cell, kb_basis, kpts, out, ke_cutoff_nloc=None,
+                             ncomp=1, _ecp=None, thr_eig=1e-12,
+                             use_numexpr=False, ioblk=IOBLK, basis_ks=None):
+
+    log = logger.Logger(cell.stdout, cell.verbose)
+
+    if out is None:
+        out = {}
+    outcore = not isinstance(out, dict)
+
+    if ncomp == 1:
+        W_ks = out
+    else:
+        if outcore:
+            W_ks = out.create_group("0")
+        else:
+            out["0"] = {}
+            W_ks = out["0"]
+
+    nkpts = len(kpts)
+    cell_kb = cell.copy()
+    cell_kb.basis = kb_basis
+    if basis_ks is not None:
+        cell_kb.mesh = basis_ks[0].mesh
+    cell_kb.build()
+    log.debug("Using basis %s for KB-ccECP (%d AOs)", kb_basis,
+              cell_kb.nao_nr())
+
+    nao = cell_kb.nao_nr()
+
+# batching kpts to avoid high peak disk usage
+    ngrids = np.prod(cell_kb.mesh)
+    kblk = min(int(np.floor(ioblk/(ngrids*nao*16/1024**2.))), nkpts)
+    nblk = int(np.ceil(nkpts / kblk))
+    log.debug("Calculating KB support vec for all kpts in %d segments with "
+              "kptblk size %d", nblk, kblk)
+    log.debug("KB outcore: %s", outcore)
+
+    tmpgroupname = "tmp"
+    iblk = 0
+    for k0,k1 in lib.prange(0,nkpts,kblk):
+        log.debug1("BLK %d  kpt range %d ~ %d  kpts %s", iblk, k0, k1,
+                   kpts[k0:k1])
+        iblk += 1
+        nkpts01 = k1 - k0
+        kpts01 = kpts[k0:k1]
+        Cg_ks = [np.eye(nao) + 0.j for k in range(nkpts01)]
+        ng_ks = [nao] * nkpts01
+        if basis_ks is None:
+            sub_bks = None
+        else:
+            sub_bks = basis_ks[k0:k1]
+        if outcore:
+            W_ks_blk = W_ks.create_group(tmpgroupname)
+            Cg_ks = get_C_ks_G(cell_kb, kpts01, Cg_ks, ng_ks, out=W_ks_blk)
+        else:
+            W_ks_blk = {}
+            Cg_ks = get_C_ks_G(cell_kb, kpts01, Cg_ks, ng_ks)
+        for k in range(nkpts01):
+            Cg_k = get_kcomp(Cg_ks, k)
+            Cg_k = orth(cell_kb, Cg_k)
+            if sub_bks is not None:
+                Cg_k = Cg_k[:, sub_bks[k].indexes]
+            set_kcomp(Cg_k, Cg_ks, k)
+        Cg_k = None
+        log.debug("keeping %s SOAOs", ng_ks)
+
+        get_ccecp_support_vec(cell, Cg_ks, kpts01, W_ks_blk, _ecp=_ecp,
+                              ke_cutoff_nloc=ke_cutoff_nloc, ncomp=1,
+                              thr_eig=thr_eig, use_numexpr=use_numexpr,
+                              basis_ks=sub_bks)
+
+        for k in range(k0,k1):
+            set_kcomp(get_kcomp(W_ks_blk, k-k0), W_ks, k)
+        if outcore:
+            del W_ks[tmpgroupname]
+        else:
+            Cg_ks = W_ks_blk = None
+
+    nsv_ks = np.array([get_kcomp(W_ks, k, load=False).shape[0]
+                       for k in range(nkpts)])
+    mem_W_ks = nsv_ks.sum() * ngrids * 16 / 1024**2.
+
+    log.debug("keeping %s KB support vectors", nsv_ks)
+    log.debug("estimated %s usage: %.2f MB", "disk" if outcore else "memory",
+              mem_W_ks)
+
+    if ncomp > 1:
+        for comp in range(1,ncomp):
+            key = "%d"%comp
+            if key in out: del out[key]
+            out[key] = W_ks
+
+
+def get_ccecp_kb2_support_vec(cell0, kb_basis, kpts, out=None, thr=1e-12):
+    from pyscf.pbc.gto import ecp
+    cell = cell0.copy()
+    cell.basis = kb_basis
+    cell.pseudo = "ccecp"   # make sure
+    cell.verbose = 0
+    cell.build()
+
+# remove local part of the ecp
+    cell = cell.copy()
+    for bas in cell._ecpbas:
+        if bas[1] == -1:
+            idx = list(range(bas[5],bas[6]+1))
+            cell._env[idx] = 0.
+
+    nkpts = len(kpts)
+    if out is None: out = [None] * nkpts
+
+    ovlp = cell.pbc_intor("int1e_ovlp", kpts=kpts)
+    vecp = ecp.ecp_int(cell, kpts)
+
+# get Sinv and gto bas vecs (SOAO)
+    c = [None] * nkpts
+    Sinv = [None] * nkpts
+    for k in range(nkpts):
+        e, u = scipy.linalg.eigh(ovlp[k])
+        c[k] = lib.dot(u*e**-0.5, u.T.conj())
+        Sinv[k] = lib.dot(u*e**-1, u.T.conj())
+
+# gto -> pw
+    swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+    fswap = lib.H5TmpFile(swapfile.name)
+    swapfile = None
+    C = fswap.create_group("C")
+    D = fswap.create_group("D")
+
+    n_ks = [c[k].shape[1] for k in range(nkpts)]
+    get_C_ks_G(cell, kpts, c, n_ks, out=C)
+    n_ks = [Sinv[k].shape[1] for k in range(nkpts)]
+    get_C_ks_G(cell, kpts, Sinv, n_ks, out=D)
+
+# get W
+    for k in range(nkpts):
+        C_k = get_kcomp(C, k)
+        D_k = get_kcomp(D, k)
+        DC_k = lib.dot(D_k.conj(), C_k.T)
+        w_k = lib.dot(Sinv[k], lib.dot(vecp[k], DC_k))
+        W_k = get_C_ks_G(cell, [kpts[k]], [w_k], [w_k.shape[1]])[0]
+        C_k = get_kcomp(C, k)
+        W_k = get_support_vec(C_k, W_k, method="eig")
+        set_kcomp(W_k, out, k)
+        C_k = D_k = W_k = None
+
+
+def get_support_vec(C, W, method="cd", thr_eig=1e-12):
+    M = lib.dot(C.conj(), W.T)
+    if np.sum(np.abs(M)) < 1e-10:
+        svec = np.zeros_like(C)
+    else:
+        if method == "cd":
+            svec = scipy.linalg.cholesky(M, lower=True)
+            svec = scipy.linalg.solve_triangular(svec.conj(), W, lower=True)
+        elif method == "eig":
+            e, u = scipy.linalg.eigh(M)
+            idx_keep = np.where(e > thr_eig)[0]
+            svec = lib.dot((u[:,idx_keep]*e[idx_keep]**-0.5).T, W)
+        else:
+            raise RuntimeError("Unknown method %s" % str(method))
+
+    return svec
diff --git a/pyscf/pbc/pwscf/pw_helper.py b/pyscf/pbc/pwscf/pw_helper.py
new file mode 100644
index 000000000..d89e59448
--- /dev/null
+++ b/pyscf/pbc/pwscf/pw_helper.py
@@ -0,0 +1,645 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Hong-Zhou Ye <hzyechem@gmail.com>
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" Helper functions for PW SCF
+"""
+
+import tempfile
+import numpy as np
+import scipy.linalg
+
+from pyscf.pbc import tools, df
+from pyscf.pbc.dft import rks
+from pyscf.pbc.lib.kpts_helper import gamma_point
+from pyscf import lib
+from pyscf.lib.diis import DIIS
+from pyscf.lib import logger
+
+
+class PWBasis:
+    """
+    A simple container to store a plane-wave basis for a given
+    k-point. A PWBasis consists of all the plane-waves
+    on `mesh` which have a smaller kinetic energy than `cutoff`.
+    The indexes of these plane-waves are given by `indexes`
+    and have kinetic energy `ke`
+
+    Attributes:
+        mesh: mesh from which the basis is constructed.
+        cutoff: PW cutoff in Hartree
+        indexes: The (raveled) indexes on mesh that are
+            part of the basis, i.e. which have kinetic
+            energy smaller than cutoff
+        ke: The kinetic energy of each plane-wave given
+            by the indexes
+        Gk: The plane-wave G-vectors for each index
+    """
+    def __init__(self, mesh, cutoff, kpt, indexes, ke, Gk):
+        self.mesh = mesh
+        self.cutoff = cutoff
+        self.kpt = kpt
+        self.indexes = indexes
+        self.ke = ke
+        self.Gk = Gk
+
+    @property
+    def npw(self):
+        return self.ke.size
+
+
+def get_basis_data(cell, kpts, ecut_wf, ecut_rho=None, wf_mesh=None,
+                   xc_mesh=None, sphere=True):
+    latvec = cell.lattice_vectors()
+    if wf_mesh is None:
+        use_small_inner_mesh = True
+        if ecut_wf is None:
+            wf_mesh = cell.mesh
+        else:
+            wf_mesh = tools.cutoff_to_mesh(latvec, 4 * ecut_wf)
+    else:
+        use_small_inner_mesh = False
+    if xc_mesh is None:
+        if ecut_wf is None and ecut_rho is None:
+            xc_mesh = wf_mesh
+        else:
+            if ecut_rho is None:
+                ecut_rho = 16 * ecut_wf
+            xc_mesh = tools.cutoff_to_mesh(latvec, ecut_rho)
+    if not sphere:
+        if use_small_inner_mesh:
+            inner_mesh = [((((m + 1) // 2) - 1) // 2) * 2 + 1 for m in wf_mesh]
+        else:
+            inner_mesh = wf_mesh
+        indexes = get_mesh_map(cell, None, None, mesh=wf_mesh, mesh2=inner_mesh)
+    wf2xc = get_mesh_map(cell, None, None, mesh=xc_mesh, mesh2=wf_mesh)
+    Gv = cell.get_Gv(np.array(wf_mesh))
+    basis_data = []
+    for kpt in kpts:
+        kinetic = get_kinetic(kpt, Gv)
+        if sphere:
+            indexes = np.where(kinetic < ecut_wf)[0]
+        basis_data.append(PWBasis(
+            wf_mesh,
+            ecut_wf,
+            kpt,
+            np.asarray(indexes, dtype=np.uintp, order="C"),
+            np.asarray(kinetic[indexes], dtype=np.float64, order="C"),
+            np.asarray((kpt + Gv)[indexes, :], dtype=np.float64, order="C"),
+        ))
+    return np.array(wf_mesh), np.array(xc_mesh), wf2xc, basis_data
+
+
+def wf_fft(C_k_R, mesh, basis=None):
+    """
+    Fourier transform a wave function from real to reciprocal space.
+    If `basis` is provided, only the coefficients for the plane-waves in
+    `basis` are returned.
+    """
+    assert C_k_R.dtype == np.complex128
+    C_k = tools.fft(C_k_R, mesh)
+    if basis is not None:
+        C_k = C_k[:, basis.indexes]
+    return C_k
+
+
+def wf_ifft(C_k, mesh, basis=None):
+    """
+    Fourier transform a wave function from reciprocal to real space.
+    If `basis` is provided, `C_k` should only contain the coefficients
+    for the plane-waves in `basis`.
+    """
+    if basis is not None:
+        _C_k = np.zeros((C_k.shape[0], np.prod(mesh)), dtype=C_k.dtype)
+        _C_k[:, basis.indexes] = C_k
+        C_k = _C_k
+    return tools.ifft(C_k, mesh)
+
+
+""" Helper functions
+"""
+def get_kcomp(C_ks, k, load=True, occ=None, copy=False):
+    if C_ks is None: return None
+    if isinstance(C_ks, (list,np.ndarray)):
+        if occ is None:
+            if copy:
+                return C_ks[k].copy()
+            else:
+                return C_ks[k]
+        else:
+            if copy:
+                return C_ks[k][occ].copy()
+            else:
+                return C_ks[k][occ]
+    else:
+        key = "%d"%k
+        if load:
+            if occ is None:
+                return C_ks[key][()]
+            else:
+                if isinstance(occ, np.ndarray):
+                    occ = occ.tolist()
+                return C_ks[key][occ]
+        else:
+            return C_ks[key]
+
+
+def safe_write(h5grp, key, val, occ=None):
+    if key in h5grp:
+        if occ is None:
+            if h5grp[key].shape == val.shape:
+                h5grp[key][()] = val
+            else:
+                del h5grp[key]
+                h5grp[key] = val
+        else:
+            h5grp[key][occ] = val
+    else:
+        h5grp[key] = val
+
+
+def set_kcomp(C_k, C_ks, k, occ=None, copy=False):
+    if isinstance(C_ks, (list,np.ndarray)):
+        if occ is None:
+            if copy:
+                C_ks[k] = C_k.copy()
+            else:
+                C_ks[k] = C_k
+        else:
+            if copy:
+                C_ks[k][occ] = C_k.copy()
+            else:
+                C_ks[k][occ] = C_k
+    else:
+        key = "%d"%k
+        safe_write(C_ks, key, C_k, occ)
+
+
+def acc_kcomp(C_k, C_ks, k, occ=None):
+    if isinstance(C_ks, (list,np.ndarray)):
+        if occ is None:
+            C_ks[k] += C_k
+        else:
+            C_ks[k][occ] += C_k
+    else:
+        key = "%d"%k
+        if occ is None:
+            C_ks[key][()] += C_k
+        else:
+            if isinstance(occ, np.ndarray):
+                occ = occ.tolist()
+            C_ks[key][occ] += C_k
+
+
+def scale_kcomp(C_ks, k, scale):
+    if isinstance(C_ks, (list,np.ndarray)):
+        C_ks[k] *= scale
+    else:
+        key = "%d"%k
+        C_ks[key][()] *= scale
+
+
+def timing_call(func, args, tdict, tname):
+    tick = np.asarray([logger.process_clock(), logger.perf_counter()])
+
+    res = func(*args)
+
+    tock = np.asarray([logger.process_clock(), logger.perf_counter()])
+    if tname not in tdict:
+        tdict[tname] = np.zeros(2)
+    tdict[tname] += tock - tick
+
+    return res
+
+
+def orth(cell, C, thr_nonorth=1e-6, thr_lindep=1e-12, follow=True):
+    n = C.shape[0]
+    norm = lib.einsum("ig,ig->i", C.conj(), C).real**0.5
+    C *= 1./norm.reshape(-1,1)
+    S = lib.dot(C.conj(), C.T)
+    nonorth_err = np.max(np.abs(S - np.eye(S.shape[0])))
+    if nonorth_err < thr_nonorth:
+        return C
+
+    e, u = scipy.linalg.eigh(S)
+    idx_keep = np.where(e > thr_lindep)[0]
+    nkeep = idx_keep.size
+    if n == nkeep:  # symm orth
+        lib.logger.debug2(cell, "Cond nubmer = %.3e", e.max()/e.min())
+        if follow:
+            # reorder to maximally overlap original orbs
+            idx = []
+            for i in range(n):
+                order = np.argsort(np.abs(u[i]))[::-1]
+                for j in order:
+                    if j not in idx:
+                        break
+                idx.append(j)
+            U = lib.dot(u[:,idx]*e[idx]**-0.5, u[:,idx].conj()).T
+        else:
+            U = lib.dot(u*e**-0.5, u.conj()).T
+    else:   # cano orth
+        lib.logger.debug2(cell, "Cond nubmer = %.3e  Drop %d orbitals",
+                          e.max()/e.min(), n-nkeep)
+        U = (u[:,idx_keep]*e[idx_keep]**-0.5).T
+    C = lib.dot(U, C)
+
+    return C
+
+
+def get_nocc_ks_from_mocc(mocc_ks):
+    return np.asarray([np.sum(np.asarray(mocc) > 0) for mocc in mocc_ks])
+
+
+def get_C_ks_G(cell, kpts, mo_coeff_ks, n_ks, out=None, verbose=0, mesh=None):
+    """ Return Cik(G) for input MO coeff. The normalization convention is such that Cik(G).conj()@Cjk(G) = delta_ij.
+    """
+    log = logger.new_logger(cell, verbose)
+
+    nkpts = len(kpts)
+    if out is None: out = [None] * nkpts
+
+    dtype = np.complex128
+    dsize = 16
+
+    if mesh is not None:
+        cell = cell.copy()
+        cell.mesh = mesh
+        cell.build()
+    mydf = df.FFTDF(cell)
+    mesh = mydf.mesh
+
+    coords = mydf.grids.coords
+    ngrids = coords.shape[0]
+    weight = mydf.grids.weights[0]
+    fac = (weight/ngrids)**0.5
+
+    frac = 0.5  # to be safe
+    cur_memory = lib.current_memory()[0]
+    max_memory = (cell.max_memory - cur_memory) * frac
+    log.debug1("max_memory= %s MB (currently used %s MB)", cell.max_memory, cur_memory)
+    # FFT needs 2 temp copies of MOs
+    extra_memory = 2*ngrids*np.max(n_ks)*dsize / 1.e6
+    # add 1 for ao_ks
+    perk_memory = ngrids*(np.max(n_ks)+1)*dsize / 1.e6
+    kblksize = min(int(np.floor((max_memory-extra_memory) / perk_memory)),
+                   nkpts)
+    if kblksize <= 0:
+        log.warn("Available memory %s MB cannot perform conversion for orbitals"
+                 " of a single k-point. Calculations may crash and `cell.memory"
+                 " = %s` is recommended.",
+                 max_memory, (perk_memory + extra_memory) / frac + cur_memory)
+
+    log.debug1("max memory= %s MB, extra memory= %s MB, perk memory= %s MB,"
+               " kblksize= %s", max_memory, extra_memory, perk_memory, kblksize)
+
+    for k0,k1 in lib.prange(0, nkpts, kblksize):
+        nk = k1 - k0
+        C_ks_R = [np.zeros([ngrids,n_ks[k]], dtype=dtype)
+                   for k in range(k0,k1)]
+        for ao_ks_etc, p0, p1 in mydf.aoR_loop(mydf.grids, kpts[k0:k1]):
+            ao_ks = ao_ks_etc[0]
+            for krel, ao in enumerate(ao_ks):
+                k = krel + k0
+                kpt = kpts[k].reshape(-1,1)
+                C_k = np.asarray(mo_coeff_ks[k][:,:n_ks[k]], order='C')
+                C_ks_R[krel][p0:p1] = lib.dot(ao, C_k)
+                if not gamma_point(kpt):
+                    phase = np.exp(-1j * lib.dot(coords[p0:p1], kpt))
+                    C_ks_R[krel][p0:p1] *= phase
+                    phase = None
+            ao = ao_ks = None
+
+        for krel in range(nk):
+            C_k_R = tools.fft(C_ks_R[krel].T * fac, mesh)
+            set_kcomp(C_k_R, out, krel+k0)
+
+    return out
+
+
+""" Contracted PW
+"""
+def get_mesh_map(cell, ke_cutoff, ke_cutoff2, mesh=None, mesh2=None):
+    """ Input ke_cutoff > ke_cutoff2, hence define a dense grid "mesh" and
+    a sparse grid "mesh2" where mesh2 is rigorously a subset of mesh. This
+    function returns the indices of grid points in mesh2 in mesh.
+    """
+    latvec = cell.lattice_vectors()
+    if mesh is None:
+        mesh = tools.cutoff_to_mesh(latvec, ke_cutoff)
+    else:
+        mesh = np.asarray(mesh)
+    if mesh2 is None:
+        mesh2 = tools.cutoff_to_mesh(latvec, ke_cutoff2)
+    else:
+        mesh2 = np.asarray(mesh2)
+    assert(np.all(mesh>=mesh2))
+    rs = [np.fft.fftfreq(mesh[i], 1./mesh[i]) for i in range(3)]
+    rs2 = [np.fft.fftfreq(mesh2[i], 1./mesh2[i]) for i in range(3)]
+    idxr = [np.where(abs(rs[i][:,None]-rs2[i])<1e-3)[0] for i in range(3)]
+    nr = [len(rs[i]) for i in range(3)]
+    mesh_map = np.ravel(((idxr[0]*nr[1]*nr[2])[:,None] +
+                          idxr[1]*nr[2])[:,:,None] + idxr[2])
+
+    return mesh_map
+
+
+def remove_pGTO_from_cGTO_(bdict, amax=None, amin=None, verbose=0):
+    """ Removing from input GTO basis all primitive GTOs whose exponents are >amax or <amin.
+    """
+    from pyscf import gto as mol_gto
+    from pyscf.pbc import gto as pbc_gto
+    def prune(blist):
+        if amin is None and amax is None:
+            return blist
+
+        amax_ = 1e10 if amax is None else amax
+        amin_ = -1 if amin is None else amin
+
+        blist_new = []
+        for lbs in blist:
+            l = lbs[0]
+            bs = []
+            for b in lbs[1:]:
+                e = b[0]
+                if amin_ < e < amax_:
+                    bs.append(b)
+            if len(bs) > 0:
+                blist_new.append([l] + bs)
+
+        return blist_new
+
+    ang_map = ["S", "P", "D", "F", "G", "H", "I", "J"]
+
+    log = lib.logger.Logger(verbose=verbose)
+    log.debug1("Generating basis...")
+    bdict_new = {}
+    for atm,basis in bdict.items():
+        if isinstance(basis, str):
+            if "gth" in basis.lower():
+                cell = pbc_gto.M(atom="%s 0 0 0"%atm, basis=basis, spin=1)
+                blist = cell._basis[atm]
+            else:
+                blist = mol_gto.basis.load(basis, atm)
+        else:
+            blist = basis
+        bdict_new[atm] = prune(blist)
+
+        for lbs in bdict_new[atm]:
+            l = lbs[0]
+            bs = lbs[1:]
+            log.debug1("%s %s", atm, ang_map[l])
+            for b in bs:
+                log.debug1("%.10f " * len(b), *b)
+    log.debug1("")
+
+    return bdict_new
+
+
+def cpw_from_cell(cell_cpw, kpts, out=None):
+    nao = cell_cpw.nao_nr()
+    nkpts = len(kpts)
+    Cao_ks = [np.eye(nao)+0j for k in range(nkpts)]
+    nao_ks = np.ones(nkpts,dtype=int) * nao
+    if out is None: out = [None] * nkpts
+    out = get_C_ks_G(cell_cpw, kpts, Cao_ks, nao_ks, out=out)
+    return out
+
+
+def gto2cpw(cell, basis, kpts, amin=None, amax=None, ke_or_mesh=None, out=None):
+    """ Get the contracted PWs for input GTO basis
+    Args:
+        basis:
+            Some examples:
+                basis = "ccecp-cc-pVDZ" (applies to all atoms in "cell")
+                basis = {"C": "ccecp-cc-pVDZ", "N": "gth-tzv2p"}
+                basis = {"C": [[0,[12,0.7],[5,0.3],[1,0.5]]], "N": "gth-szv"}
+        amin/amax:
+            If provided, all primitive GTOs from the basis that have exponents >amax or <amin will be removed.
+        ke_or_mesh:
+            If list/tuple/numpy array, interpreted as mesh
+            otherwise, interpreted as ke_cutoff.
+            Default is None which uses the same ke_cutoff/mesh from input "cell".
+        out:
+            None --> return a list of numpy arrays (incore mode)
+            hdf5 group --> saved to the hdf5 group (outcore mode)
+    """
+# formating basis
+    atmsymbs = cell._basis.keys()
+    if isinstance(basis, str):
+        basisdict = {atmsymb: basis for atmsymb in atmsymbs}
+    elif isinstance(basis, dict):
+        assert(basis.keys() == atmsymbs)
+        basisdict = basis
+    else:
+        raise TypeError("Input basis must be either a str or dict.")
+# pruning pGTOs that have unwanted exponents
+    basisdict = remove_pGTO_from_cGTO_(basisdict, amax=amax, amin=amin)
+# make a new cell with the modified GTO basis
+    cell_cpw = cell.copy()
+    cell_cpw.basis = basisdict
+    if ke_or_mesh is not None:
+        if isinstance(ke_or_mesh, (list,tuple,np.ndarray)):
+            cell_cpw.mesh = ke_or_mesh
+        else:
+            cell_cpw.ke_cutoff = ke_or_mesh
+    cell_cpw.verbose = 0
+    cell_cpw.build()
+# GTOs --> CPWs
+    out = cpw_from_cell(cell_cpw, kpts, out=out)
+
+    return out
+
+
+def gtomf2pwmf(mf, chkfile=None):
+    """
+    Args:
+        chkfile (str):
+            A hdf5 file to store chk variables (mo_energy, mo_occ, etc.).
+            If not provided, a temporary file is generated.
+    """
+    from pyscf.pbc import scf
+    assert(isinstance(mf, (scf.khf.KRHF,scf.kuhf.KUHF,scf.uhf.UHF)))
+
+    from pyscf.pbc import pwscf
+    cell = mf.cell
+    kpts = getattr(mf, "kpts", np.zeros((1,3)))
+    nkpts = len(kpts)
+# transform GTO MO coeff to PW MO coeff
+    Cgto_ks = mf.mo_coeff
+    if isinstance(mf, scf.khf.KRHF):
+        pwmf = pwscf.KRHF(cell, kpts)
+        nmo_ks = [Cgto_ks[k].shape[1] for k in range(nkpts)]
+        pwmf.mo_coeff = C_ks = get_C_ks_G(cell, kpts, Cgto_ks, nmo_ks)
+        pwmf.mo_energy = moe_ks = mf.mo_energy
+        pwmf.mo_occ = mocc_ks = mf.mo_occ
+        pwmf.e_tot = mf.e_tot
+    elif isinstance(mf, scf.kuhf.KUHF):
+        pwmf = pwscf.KUHF(cell, kpts)
+        C_ks = [None] * 2
+        for s in [0,1]:
+            nmo_ks = [Cgto_ks[s][k].shape[1] for k in range(nkpts)]
+            C_ks[s] = get_C_ks_G(cell, kpts, Cgto_ks[s], nmo_ks)
+        pwmf.mo_coeff = C_ks
+        pwmf.mo_energy = moe_ks = mf.mo_energy
+        pwmf.mo_occ = mocc_ks = mf.mo_occ
+        pwmf.e_tot = mf.e_tot
+    elif isinstance(mf, scf.uhf.UHF):
+        pwmf = pwscf.KUHF(cell, kpts)
+        C_ks = [None] * 2
+        for s in [0,1]:
+            nmo_ks = [Cgto_ks[s].shape[1]]
+            C_ks[s] = get_C_ks_G(cell, kpts, [Cgto_ks[s]], nmo_ks)
+        pwmf.mo_coeff = C_ks
+        pwmf.mo_energy = moe_ks = [[mf.mo_energy[s]] for s in [0,1]]
+        pwmf.mo_occ = mocc_ks = [[mf.mo_occ[s]] for s in [0,1]]
+        pwmf.e_tot = mf.e_tot
+    else:
+        raise TypeError
+# update chkfile
+    if chkfile is None:
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        chkfile = swapfile.name
+        swapfile = None
+    pwmf.chkfile = chkfile
+    from pyscf.pbc.pwscf.chkfile import dump_scf
+    dump_scf(mf.cell, pwmf.chkfile, mf.e_tot, moe_ks, mocc_ks, C_ks)
+    pwmf.converged = True
+
+    return pwmf
+
+
+def get_kinetic(kpt, Gv):
+    kG = kpt + Gv if np.sum(np.abs(kpt)) > 1.E-9 else Gv
+    kG2 = np.einsum("gj,gj->g", kG, kG) * 0.5
+    return kG2
+
+
+""" kinetic energy
+"""
+def apply_kin_kpt(C_k, kpt, Gv, basis=None):
+    if basis is None:
+        Cbar_k = C_k * get_kinetic(kpt, Gv)
+    else:
+        Cbar_k = C_k * basis.ke
+    return Cbar_k
+
+
+""" Charge mixing methods
+"""
+class _Mixing:
+    """
+    Mixing class for the (semi)local effective potential.
+    For Hartree-Fock calculations, the Coulomb potential
+    is mixed. For DFT calculations, a concatenated vector
+    of the Coulomb potential, the local XC potential,
+    and (for meta-GGAs) the kinetic part of the XC potential is mixed.
+    """
+    def __init__(self, mf):
+        self.cycle = 0
+        if isinstance(mf, rks.KohnShamDFT):
+            self._ks = True
+        else:
+            self._ks = False
+
+    def _extract_kwargs(self, f):
+        if self._ks:
+            return {
+                "exc": f.exc,
+                "vxcdot": f.vxcdot,
+                "vxc_R": f.vxc_R,
+                "vtau_R": f.vtau_R,
+            }
+        else:
+            return {}
+
+    def _tag(self, f, kwargs):
+        if self._ks:
+            return lib.tag_array(f, **kwargs)
+        else:
+            return f
+
+    def _next_step(self, mf, f, ferr, i=0):
+        raise NotImplementedError
+
+    def next_step(self, mf, f, flast):
+        """
+        Compute the next mixed potential from the current
+        potential f and previous flast. For Hartree-Fock,
+        returns the mixed Coulomb potential. For DFT,
+        both f and flast must be tagged with exc, vxcdot,
+        vxc_R, and vtau_R (vtau_R can be None). The return
+        value is the mixed Coulomb potential tagged with
+        exc, vxcdot, and the mixed vxc_R and vtau_R.
+        """
+        if not self._ks:
+            return self._next_step(mf, f, f - flast)
+        ferr = f - flast
+        kwargs = self._extract_kwargs(f)
+        kwargslast = self._extract_kwargs(flast)
+        fxc_err = (kwargs["vxc_R"] - kwargslast["vxc_R"]).ravel()
+        f_list = [f, kwargs["vxc_R"]]
+        ferr_list = [ferr, fxc_err]
+        kw = "vtau_R"
+        if kw in kwargs and kwargs[kw] is not None:
+            f_list.append(kwargs[kw])
+            ferr_list.append((kwargs[kw] - kwargslast[kw]).ravel())
+        sizes = [x.size for x in f_list]
+        shapes = [x.shape for x in f_list]
+        f_list = np.concatenate([f.ravel() for f in f_list])
+        ferr_list = np.concatenate(ferr_list)
+        result = self._next_step(mf, f_list, ferr_list)
+        tagged_result = result[0 : sizes[0]]
+        tagged_result.shape = shapes[0]
+        start = sizes[0]
+        mid = start + sizes[1]
+        kwargs["vxc_R"] = result[start:mid]
+        kwargs["vxc_R"].shape = shapes[1]
+        if kw in kwargs and kwargs[kw] is not None:
+            kwargs[kw] = result[mid:]
+            kwargs[kw].shape = shapes[2]
+        return self._tag(tagged_result, kwargs)
+
+
+class SimpleMixing(_Mixing):
+    """
+    Simple mixing, i.e. f_mix = beta * f_old + (1 - beta) * f_new
+    """
+    def __init__(self, mf, beta=0.3):
+        super().__init__(mf)
+        self.beta = beta
+
+    def _next_step(self, mf, f, ferr):
+        self.cycle += 1
+        return f - ferr * self.beta
+
+
+class AndersonMixing(_Mixing):
+    """
+    Anderson mixing, i.e. mixing with DIIS.
+    """
+    def __init__(self, mf, ndiis=10, diis_start=1):
+        super().__init__(mf)
+        self.diis = DIIS()
+        self.diis.space = ndiis
+        self.diis.min_space = diis_start
+
+    def _next_step(self, mf, f, ferr):
+        self.cycle += 1
+        return self.diis.update(f, ferr)
+
diff --git a/pyscf/pbc/pwscf/smearing.py b/pyscf/pbc/pwscf/smearing.py
new file mode 100644
index 000000000..64b7582d6
--- /dev/null
+++ b/pyscf/pbc/pwscf/smearing.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" Occupation smearing for SCF methods in a plane-wave basis
+"""
+
+from pyscf.pbc.scf.addons import _SmearingKSCF
+from pyscf.pbc.pwscf import khf
+from pyscf.lib import logger
+from pyscf import lib
+from pyscf import __config__
+import numpy as np
+
+
+SMEARING_METHOD = getattr(__config__, 'pbc_scf_addons_smearing_method', 'fermi')
+
+
+def smearing(mf, sigma=None, method=SMEARING_METHOD, mu0=None, fix_spin=False):
+    """
+    Return a copy of mf with occupation smearing.
+    Fermi-Dirac and Gaussian smearing are supported.
+    """
+    if not isinstance(mf, khf.PWKSCF):
+        raise ValueError("For PW mode only")
+
+    if isinstance(mf, _SmearingPWKSCF):
+        mf.sigma = sigma
+        mf.smearing_method = method
+        mf.mu0 = mu0
+        mf.fix_spin = fix_spin
+        return mf
+
+    return lib.set_class(_SmearingPWKSCF(mf, sigma, method, mu0, fix_spin),
+                         (_SmearingPWKSCF, mf.__class__))
+
+
+def smearing_(mf, *args, **kwargs):
+    """
+    Apply smearing in-place to a PWKSCF object mf.
+    """
+    mf1 = smearing(mf, *args, **kwargs)
+    mf.__class__ = mf1.__class__
+    mf.__dict__ = mf1.__dict__
+    return mf
+
+
+def has_smearing(mf):
+    """
+    Check if occupation smearing is used by mf.
+    """
+    return isinstance(mf, _SmearingPWKSCF)
+
+
+class _SmearingPWKSCF(_SmearingKSCF):
+    """
+    Sub-class of _SmearingKSCF that uses defines the get_mo_occ
+    function for use in plane-wave calculations.
+    """
+    def get_mo_occ(self, moe_ks=None, C_ks=None, nocc=None):
+        cell = self.cell
+        if nocc is None:
+            nocc = cell.nelectron / 2.0
+        else:
+            assert nocc == cell.nelectron / 2.0
+        if moe_ks is None:
+            raise NotImplementedError(
+                "PWKSCF smearing without mo energy input"
+            )
+        mocc_ks = self.get_occ(mo_energy_kpts=moe_ks, mo_coeff_kpts=C_ks)
+        return mocc_ks
+
+    def energy_tot(self, C_ks, mocc_ks, moe_ks=None, mesh=None, Gv=None,
+                   vj_R=None, exxdiv=None):
+        e_tot = khf.PWKRHF.energy_tot(self, C_ks, mocc_ks, moe_ks=moe_ks,
+                                      mesh=mesh, Gv=Gv, vj_R=vj_R,
+                                      exxdiv=exxdiv)
+        if self.sigma and self.smearing_method and self.entropy is not None:
+            self.e_free = e_tot - self.sigma * self.entropy
+            self.e_zero = e_tot - self.sigma * self.entropy * .5
+            logger.info(self, '    Total E(T) = %.15g  Free energy = %.15g  E0 = %.15g',
+                        e_tot, self.e_free, self.e_zero)
+        return e_tot
+
diff --git a/pyscf/pbc/pwscf/test/__init__.py b/pyscf/pbc/pwscf/test/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pyscf/pbc/pwscf/test/test_gto_vs_pw.py b/pyscf/pbc/pwscf/test/test_gto_vs_pw.py
new file mode 100644
index 000000000..697871033
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_gto_vs_pw.py
@@ -0,0 +1,111 @@
+""" Check if the PW code gives same MO energies as the GTO code for a given
+wave function
+"""
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc import gto, df, scf, pwscf
+from pyscf.pbc.pwscf import khf, pw_helper
+from pyscf import lib
+import pyscf.lib.parameters as param
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def _run_test(self, cell, kpts, exxdiv):
+        # GTO
+        gmf = scf.KRHF(cell, kpts)
+        gmf.exxdiv = exxdiv
+        gmf.kernel()
+
+        vpp = lib.asarray(gmf.with_df.get_pp(kpts))
+        vkin = lib.asarray(gmf.cell.pbc_intor('int1e_kin', 1, 1, kpts))
+        dm = gmf.make_rdm1()
+        vj, vk = df.FFTDF(cell).get_jk(dm, kpts=kpts)
+
+        nkpts = len(kpts)
+        moe_comp_ks = np.zeros((4,nkpts), dtype=np.complex128)
+        for k in range(nkpts):
+            moe_comp_ks[0,k] = np.einsum("ij,ji->", vkin[k], dm[k])
+            moe_comp_ks[1,k] = np.einsum("ij,ji->", vpp[k], dm[k])
+            moe_comp_ks[2,k] = np.einsum("ij,ji->", vj[k], dm[k]) * 0.5
+            moe_comp_ks[3,k] = -np.einsum("ij,ji->", vk[k], dm[k]) * 0.25
+
+        # PW (both vanilla and ACE)
+        pmf = pwscf.KRHF(cell, kpts)
+        pmf.init_pp()
+        pmf.init_jk()
+        pmf.exxdiv = exxdiv
+        no_ks = pw_helper.get_nocc_ks_from_mocc(gmf.mo_occ)
+        C_ks = pw_helper.get_C_ks_G(cell, kpts, gmf.mo_coeff, no_ks)
+        mocc_ks = khf.get_mo_occ(cell, C_ks=C_ks)
+        pmf.update_pp(C_ks)
+        vj_R = pmf.get_vj_R(C_ks, mocc_ks)
+        mesh = cell.mesh
+        Gv = cell.get_Gv(mesh)
+
+        pmf.with_jk.ace_exx = False
+        pmf.update_k(C_ks, mocc_ks)
+        moe_comp_ks_pw = np.zeros((4, nkpts), dtype=np.complex128)
+        for k in range(nkpts):
+            C_k = C_ks[k]
+            kpt = kpts[k]
+            moe = pmf.apply_Fock_kpt(C_k, kpt, mocc_ks, mesh, Gv, vj_R, exxdiv,
+                                    ret_E=True)[1]
+            moe_comp_ks_pw[0,k] = moe[0]
+            moe_comp_ks_pw[1,k] = moe[1] + moe[2]
+            moe_comp_ks_pw[2:,k] = moe[3:]
+
+        pmf.with_jk.ace_exx = True
+        pmf.update_k(C_ks, mocc_ks)
+        ace_moe_comp_ks_pw = np.zeros((4, nkpts), dtype=np.complex128)
+        for k in range(nkpts):
+            C_k = C_ks[k]
+            kpt = kpts[k]
+            moe = pmf.apply_Fock_kpt(C_k, kpt, mocc_ks, mesh, Gv, vj_R, exxdiv,
+                                    ret_E=True)[1]
+            ace_moe_comp_ks_pw[0,k] = moe[0]
+            ace_moe_comp_ks_pw[1,k] = moe[1] + moe[2]
+            ace_moe_comp_ks_pw[2:,k] = moe[3:]
+
+        maxe_real = np.max(np.abs(moe_comp_ks.real - moe_comp_ks_pw.real))
+        maxe_imag = np.max(np.abs(moe_comp_ks.imag - moe_comp_ks_pw.imag))
+        ace_maxe_real = np.max(np.abs(moe_comp_ks.real - ace_moe_comp_ks_pw.real))
+        ace_maxe_imag = np.max(np.abs(moe_comp_ks.imag - ace_moe_comp_ks_pw.imag))
+
+        assert(maxe_real < 1e-6)
+        assert(maxe_imag < 1e-6)
+        assert(ace_maxe_real < 1e-6)
+        assert(ace_maxe_imag < 1e-6)
+
+    def test_gto_vs_pw(self):
+        nk = 2
+        kmesh = [2,1,1]
+        ke_cutoff = 150
+        pseudo = "gth-pade"
+        exxdiv = None
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994"
+        a = np.asarray(
+            [[0.       , 1.78339987, 1.78339987],
+            [1.78339987, 0.        , 1.78339987],
+            [1.78339987, 1.78339987, 0.        ]])
+
+        # cell
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff
+        )
+        cell.build()
+        cell.verbose = 0
+        kpts = cell.make_kpts(kmesh)
+        self._run_test(cell, kpts, exxdiv)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_hf_and_ks.py b/pyscf/pbc/pwscf/test/test_hf_and_ks.py
new file mode 100644
index 000000000..d045339e7
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_hf_and_ks.py
@@ -0,0 +1,394 @@
+import unittest
+import tempfile
+import numpy as np
+from pyscf.pbc import gto as pbcgto
+from pyscf.pbc import dft as pbcdft
+from pyscf.pbc.pwscf.smearing import smearing_
+from pyscf.pbc.pwscf import khf, kuhf, krks, kuks
+from pyscf.pbc.pwscf.ncpp_cell import NCPPCell
+import pyscf.pbc
+from numpy.testing import assert_allclose
+pyscf.pbc.DEBUG = False
+
+
+def setUpModule():
+    global CELL, KPTS, ATOM, KPT1
+    CELL = pbcgto.Cell(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        verbose=0,
+    )
+    CELL.mesh = [13, 13, 13]
+    # CELL.mesh = [27, 27, 27]
+    CELL.build()
+
+    kmesh = [3, 1, 1]
+    KPTS = CELL.make_kpts(kmesh)
+
+    ATOM = pbcgto.Cell(
+        atom = "C 0 0 0",
+        a = np.eye(3) * 4,
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        spin=-2,
+        verbose=0,
+    )
+    ATOM.mesh = [25, 25, 25]
+    ATOM.build()
+
+    nk = 1
+    kmesh = (nk,)*3
+    KPT1 = ATOM.make_kpts(kmesh)
+
+
+def tearDownModule():
+    global CELL, ATOM, KPTS, KPT1
+    del CELL, ATOM, KPTS, KPT1
+
+
+class KnownValues(unittest.TestCase):
+    def _get_calc(self, cell, kpts, spinpol=False, xc=None, run=True, **kwargs):
+        """
+        Helper function to make an SCF calculation for a test
+        """
+        ecut_wf = kwargs.pop("ecut_wf", None)
+        ecut_rho = kwargs.pop("ecut_rho", None)
+        if xc is None:
+            if not spinpol:
+                mf = khf.PWKRHF(cell, kpts, ecut_wf=ecut_wf, ecut_rho=ecut_rho)
+            else:
+                mf = kuhf.PWKUHF(
+                    cell, kpts, ecut_wf=ecut_wf, ecut_rho=ecut_rho
+                )
+        else:
+            if not spinpol:
+                mf = krks.PWKRKS(
+                    cell, kpts, xc=xc, ecut_wf=ecut_wf, ecut_rho=ecut_rho
+                )
+            else:
+                mf = kuks.PWKUKS(
+                    cell, kpts, xc=xc, ecut_wf=ecut_wf, ecut_rho=ecut_rho
+                )
+        mf.conv_tol = 1e-8
+        mf.__dict__.update(**kwargs)
+        if run:
+            mf.kernel()
+        return mf
+
+    def _check_fd(self, mf):
+        """
+        Check a bunch of properties of the mean-field calculation:
+        - that get_mo_energy matches the mo_energy from SCF
+        - that energy_tot with moe_ks gives same output as SCF e_tot
+          (also tests energy_elec for this consistency implicitly)
+        - that eig_subspace, get_mo_energy(full_ham=True), and
+          finite difference all give the same prediction for the
+          change in total energy upon perturbation of the orbitals.
+          This implicitly tests all routines for constructing
+          the effective Hamiltonian, especially the XC potential.
+        """
+        if not mf.converged:
+            mf.kernel()
+            assert mf.converged
+        mo_energy, mo_occ = mf.get_mo_energy(mf.mo_coeff, mf.mo_occ)
+        if mf.istype("KRHF"):
+            assert_allclose(mo_energy, mf.mo_energy, rtol=1e-7, atol=1e-7)
+        else:
+            assert_allclose(mo_energy[0], mf.mo_energy[0], rtol=1e-6, atol=1e-6)
+            assert_allclose(mo_energy[1], mf.mo_energy[1], rtol=1e-6, atol=1e-6)
+        etot_ref = mf.e_tot
+        etot_check = mf.energy_tot(mf.mo_coeff, mf.mo_occ,
+                                   moe_ks=mo_energy)
+        # This is a somewhat loose threshold, but occasionally with the
+        # isolated atoms the energy is very sensitive, so this makes
+        # sure the test passes
+        assert_allclose(etot_check, etot_ref, atol=30*mf.conv_tol, rtol=0)
+        delta = 1e-5
+        cell = mf.cell
+        mesh = mf.wf_mesh
+        Gv = cell.get_Gv(mesh)
+
+        spinpol = isinstance(mf, kuhf.PWKUHF)
+        if spinpol:
+            nkpts = len(mf.mo_coeff[0])
+        else:
+            nkpts = len(mf.mo_coeff)
+
+        def _update(Ct_ks):
+            mf.update_pp(Ct_ks)
+            mf.update_k(Ct_ks, mo_occ)
+
+        def _transform(C_ks, mocc_ks, k, s=None):
+            if s is not None:
+                Ctspin_ks, vbm, cbm = _transform(C_ks[s], mocc_ks[s], k)
+                Ct_ks = [[C.copy() for C in Cspin] for Cspin in C_ks]
+                Ct_ks[s] = Ctspin_ks
+                return Ct_ks, vbm, cbm
+            vbm = np.max(np.where(mocc_ks[k] > 0.9))
+            cbm = np.min(np.where(mocc_ks[k] < 0.1))
+            transform = np.identity(C_ks[k].shape[0])
+            transform[vbm, vbm] = np.sqrt(0.5)
+            transform[vbm, cbm] = np.sqrt(0.5)
+            transform[cbm, cbm] = np.sqrt(0.5)
+            transform[cbm, vbm] = -np.sqrt(0.5)
+            Ct_k = transform.dot(C_ks[k])
+            Ct_ks = [C_k.copy() for C_k in C_ks]
+            Ct_ks[k] = Ct_k.copy()
+            return Ct_ks, vbm, cbm
+
+        def _eig_subspace_ham(Ct_ks, k, s=None):
+            if s is not None:
+                Ctt_ks = [[C.copy() for C in Cspin] for Cspin in Ct_ks]
+            else:
+                Ctt_ks = [C.copy() for C in Ct_ks]
+            Ctt_ks, moett_ks = mf.eig_subspace(
+                Ctt_ks, mo_occ, Gv=Gv, mesh=mesh
+            )[:2]
+            if s is not None:
+                moett_ks = moett_ks[s]
+                Ct_ks = Ct_ks[s]
+                Ctt_ks = Ctt_ks[s]
+            ham1 = np.einsum("ig,jg->ij", Ctt_ks[k], Ct_ks[k].conj())
+            ham2 = np.einsum("ki,i,ij->kj", ham1.conj().T, moett_ks[k], ham1)
+            return ham2
+
+        def _new_vbms(Ct_ks, vbm, cbm, k, s=None):
+            vj_R = mf.get_vj_R(Ct_ks, mo_occ)
+            if s is not None:
+                Ct_ks = Ct_ks[s]
+            new_vbm = Ct_ks[k][vbm].copy()
+            new_cbm = Ct_ks[k][cbm].copy()
+            new_vbm_p = new_vbm + 0.5 * delta * new_cbm
+            new_vbm_m = new_vbm - 0.5 * delta * new_cbm
+            return new_vbm_p, new_vbm_m
+
+        def _run_test(s=None):
+            for k in range(nkpts):
+                Ct_ks, vbm, cbm = _transform(mf.mo_coeff, mo_occ, k, s=s)
+                _update(Ct_ks)
+                ham2 = _eig_subspace_ham(Ct_ks, k, s=s)
+                new_ham = mf.get_mo_energy(Ct_ks, mo_occ, full_ham=True)
+                if s is not None:
+                    new_ham = new_ham[s]
+                expected_de = new_ham[k][vbm, cbm] + new_ham[k][cbm, vbm]
+                if hasattr(mf, "xc"):
+                    if not mf._numint.libxc.is_hybrid_xc(mf.xc):
+                        ham2_term = ham2[vbm, cbm] + ham2[cbm, vbm],
+                        assert_allclose(ham2_term, expected_de)
+                new_vbm_p, new_vbm_m = _new_vbms(Ct_ks, vbm, cbm, k, s=s)
+
+                if s is None:
+                    Ct_ks[k][vbm] = new_vbm_m
+                else:
+                    Ct_ks[s][k][vbm] = new_vbm_m
+                _update(Ct_ks)
+                em = mf.energy_elec(Ct_ks, mo_occ, Gv=Gv, mesh=mesh)
+
+                if s is None:
+                    Ct_ks[k][vbm] = new_vbm_p
+                else:
+                    Ct_ks[s][k][vbm] = new_vbm_p
+                _update(Ct_ks)
+                ep = mf.energy_elec(Ct_ks, mo_occ, Gv=Gv, mesh=mesh)
+                fd = (ep - em) / delta
+
+                # NOTE the factor of nkpts is because the fd energy
+                # is per unit cell, but the gap is the energy derivative
+                # for the supercell with respect to perturbing the orbital.
+                # The factor of 2 is because perturbing an occupied orbital
+                # in spin-restricted mode affects 2 electrons.
+                expected_de = expected_de * 2 / nkpts
+                if spinpol:
+                    expected_de /= 2
+                assert_allclose(expected_de, fd, atol=1e-7, rtol=1e-7)
+
+        if not spinpol:
+            _run_test()
+        else:
+            _run_test(s=0)
+            _run_test(s=1)
+
+    def test_fd_hf(self):
+        """
+        Run the _check_fd tests for spin-restricted and unrestricted
+        Hartree-Fock.
+        """
+        ref = -10.649288588747416
+        rmf = self._get_calc(CELL, KPTS, nvir=2)
+        umf = self._get_calc(CELL, KPTS, nvir=2, spinpol=True)
+        assert_allclose(rmf.e_tot, ref, atol=1e-7, rtol=0)
+        assert_allclose(rmf.e_tot, umf.e_tot, atol=1e-7, rtol=0)
+        assert_allclose(rmf.mo_energy, umf.mo_energy[0])
+        assert_allclose(rmf.mo_energy, umf.mo_energy[1])
+        half_occ = [0.5 * occ for occ in rmf.mo_occ]
+        assert_allclose(half_occ, umf.mo_occ[0])
+        assert_allclose(half_occ, umf.mo_occ[1])
+        self._check_fd(rmf)
+        self._check_fd(umf)
+        umf = self._get_calc(ATOM, KPT1, nvir=2, spinpol=True,
+                             damp_type="anderson",
+                             ecut_wf=15)
+        self._check_fd(umf)
+
+    def _check_fd_ks(self, xc, mesh=None, ref=None, run_atom=False):
+        """
+        Run the _check_fd tests for spin-restricted and unrestricted
+        Kohn-Sham DFT.
+        """
+        if mesh is None:
+            cell = CELL
+            atom = ATOM
+        else:
+            cell = CELL.copy()
+            cell.mesh = mesh
+            atom = ATOM
+            cell.build()
+        rmf = self._get_calc(cell, KPTS, nvir=2, xc=xc, spinpol=False,
+                             damp_type="anderson", conv_tol=1e-8)
+        umf = self._get_calc(cell, KPTS, nvir=2, xc=xc, spinpol=True,
+                             damp_type="anderson", conv_tol=1e-8)
+        if ref is not None:
+            assert_allclose(rmf.e_tot, ref, atol=1e-7, rtol=0)
+        assert_allclose(rmf.e_tot, umf.e_tot, atol=1e-7, rtol=0)
+        assert_allclose(rmf.mo_energy, umf.mo_energy[0], atol=1e-7, rtol=0)
+        assert_allclose(rmf.mo_energy, umf.mo_energy[1], atol=1e-7, rtol=0)
+        half_occ = [0.5 * occ for occ in rmf.mo_occ]
+        assert_allclose(half_occ, umf.mo_occ[0], atol=1e-7, rtol=0)
+        assert_allclose(half_occ, umf.mo_occ[1], atol=1e-7, rtol=0)
+        self._check_fd(rmf)
+        self._check_fd(umf)
+        if run_atom:
+            # turning mixing off takes many steps to converge
+            # but anderson convergence is less consistent
+            umf = self._get_calc(atom, KPT1, nvir=2, xc=xc, spinpol=True,
+                                 damp_type="simple", damp_factor=0.0,
+                                 ecut_wf=15, ecut_rho=60, conv_tol=1e-8,
+                                 conv_tol_band=1e-6)
+            assert (umf.wf_mesh == umf.xc_mesh).all()
+            self._check_fd(umf)
+
+    def test_fd_ks_lda(self):
+        self._check_fd_ks("LDA", ref=-10.453600311477887, run_atom=True)
+
+    def test_fd_ks_gga(self):
+        self._check_fd_ks("PBE", ref=-10.931960348543591, run_atom=True)
+
+    def test_fd_ks_mgga(self):
+        self._check_fd_ks("R2SCAN", mesh=[21, 21, 21], ref=-10.881956126701505)
+
+    def test_fd_ks_hyb(self):
+        self._check_fd_ks("PBE0", ref=-10.940602656908139)
+
+    def test_smearing(self):
+        """
+        Make sure that smearing is working (should give similar energy
+        to the non-smearing calculation and have mf.mo_energy matching
+        get_mo_energy, e_tot matching energy_tot(..., moe_ks), etc.)
+        """
+        xc = "LDA,VWN"
+        rmf = self._get_calc(
+            CELL, KPTS, nvir=6, xc=xc, run=False, ecut_wf=15
+        )
+        umf1 = self._get_calc(
+            CELL, KPTS, nvir=6, spinpol=True, xc=xc, run=False, ecut_wf=15,
+        )
+        umf2 = self._get_calc(
+            ATOM, KPT1, nvir=2, spinpol=True, xc=xc, run=False, ecut_wf=15
+        )
+        assert_allclose(umf1.e_tot, rmf.e_tot, atol=1e-7)
+        check = True
+        sigmas = [0.05, 0.05, 0.01]
+        new_mfs = []
+        for mf, sigma in zip([rmf, umf1, umf2], sigmas):
+            mf.kernel()
+            etot_nosmear = mf.e_tot
+            mf = smearing_(mf, sigma=sigma, method="gauss")
+            mf.kernel()
+            etot_ref = mf.e_tot
+            # energy with and without smearing doesn't change too much
+            assert_allclose(etot_ref, etot_nosmear, atol=1e-2)
+            moe_tst = mf.mo_energy
+            mo_energy, mo_occ = mf.get_mo_energy(mf.mo_coeff, mf.mo_occ)
+            if check:
+                check = False
+                assert_allclose(mo_energy, moe_tst, rtol=1e-8, atol=1e-8)
+            etot_check = mf.energy_tot(mf.mo_coeff, mf.mo_occ, mo_energy)
+            # looser threshold needed for some CI tests
+            assert_allclose(etot_check, etot_ref, atol=1e-5, rtol=0)
+            new_mfs.append(mf)
+        assert_allclose(new_mfs[1].e_tot, new_mfs[0].e_tot, atol=1e-7)
+        assert_allclose(new_mfs[1].mo_energy[0], new_mfs[0].mo_energy, atol=1e-7)
+        assert_allclose(new_mfs[1].mo_energy[1], new_mfs[0].mo_energy, atol=1e-7)
+        half_occ = [0.5 * occ for occ in new_mfs[0].mo_occ]
+        assert_allclose(new_mfs[1].mo_occ[0], half_occ, atol=1e-7)
+        assert_allclose(new_mfs[1].mo_occ[1], half_occ, atol=1e-7)
+        umf1 = umf2 = None
+
+    def test_init_guesses(self):
+        """
+        Test a bunch of initial guesses for the SCF methods to make sure
+        they give consistent results and don't crash.
+        """
+        for spinpol in [False, True]:
+            mf = self._get_calc(
+                CELL, KPTS, nvir=2, xc="LDA,VWN", spinpol=spinpol,
+                ecut_wf=15, run=False
+            )
+            mf.conv_tol = 1e-8
+            mf.init_guess = "hcore"
+            e_ref = mf.kernel()
+            e_tots = []
+            for ig in ["h1e", "cycle1", "scf"]:
+                mf.init_guess = ig
+                e_tots.append(mf.kernel())
+            e_tots.append(mf.kernel(C0=mf.mo_coeff))
+            mf2 = self._get_calc(
+                CELL, KPTS, nvir=2, xc="LDA,VWN", spinpol=spinpol,
+                ecut_wf=15, run=False
+            )
+            e_tots.append(mf2.kernel(chkfile=mf.chkfile))
+            C_ks, mocc_ks = mf.from_chk(mf.chkfile)
+            e_tots.append(mf2.energy_tot(C_ks, mocc_ks))
+            assert_allclose(np.array(e_tots) - e_ref, 0, atol=1e-7)
+
+    def test_meshes(self):
+        """
+        Make sure that modifying the choices of meshes works
+        """
+        mf = self._get_calc(
+            CELL, KPTS, nvir=2, xc="LDA,VWN", spinpol=False,
+            ecut_wf=15, run=False
+        )
+        # check the meshes are what we expect
+        assert (mf.wf_mesh == (19, 19, 19)).all()
+        assert (mf.xc_mesh == (35, 35, 35)).all()
+        mf2 = self._get_calc(
+            CELL, KPTS, nvir=2, xc="LDA,VWN", spinpol=False, run=False
+        )
+        orig_wf_mesh = mf.wf_mesh
+        orig_xc_mesh = mf.xc_mesh
+        e1 = mf.kernel()
+        # check the meshes are what we expect
+        assert (mf2.wf_mesh == mf2.xc_mesh).all()
+        assert (mf2.wf_mesh == CELL.mesh).all()
+        # energy doesn't change because default wf_mesh avoids aliasing
+        mf.set_meshes(wf_mesh=[m+5 for m in orig_wf_mesh], xc_mesh=orig_xc_mesh)
+        e3 = mf.kernel()
+        assert_allclose(e1, e3, atol=1e-7)
+        mf.set_meshes(wf_mesh=orig_wf_mesh, xc_mesh=orig_wf_mesh)
+        e4 = mf.kernel()
+        # energy changes a bit bit the XC integration precision changes
+        assert_allclose(e1, e4, atol=1e-5)
+
+
+if __name__ == "__main__":
+    # Finite difference for pbc.pwscf -- khf, kuhf, krks, kuks
+    unittest.main()
+
diff --git a/pyscf/pbc/pwscf/test/test_kpt_symm.py b/pyscf/pbc/pwscf/test/test_kpt_symm.py
new file mode 100644
index 000000000..9d9599aed
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_kpt_symm.py
@@ -0,0 +1,175 @@
+import unittest
+from pyscf.pbc import gto as pbcgto
+from pyscf.pbc.pwscf import khf, krks, kuks, jk, kpt_symm
+from pyscf.pbc.pwscf.smearing import smearing_
+from pyscf.pbc.pwscf.pw_helper import wf_ifft
+import numpy as np
+from pyscf.pbc import tools
+from numpy.testing import assert_almost_equal
+import time
+
+
+ECUT_WF = 20
+PRINT_TIMES = False
+
+
+def get_mf_and_kpts():
+    cell = pbcgto.Cell(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        ke_cutoff=50,
+        pseudo="gth-pade",
+        verbose=0,
+    )
+    cell.mesh = [20, 20, 20]
+    cell.build()
+    kmesh = (3, 3, 3)
+    kpts = cell.make_kpts(kmesh)
+
+    cell_sym = cell.copy()
+    cell_sym.space_group_symmetry = True
+    cell_sym.symmorphic = True
+    cell_sym.build()
+    kpts_sym = cell_sym.make_kpts(
+        kmesh,
+        time_reversal_symmetry=True,
+        space_group_symmetry=True,
+    )
+
+    mf = krks.PWKRKS(cell, kpts, xc="LDA,VWN", ecut_wf=ECUT_WF)
+    mf = smearing_(mf, sigma=0.01, method='gauss')
+    mf.kernel()
+    return mf, cell, kpts_sym, cell_sym
+
+
+def setUpModule():
+    global mf, cell, kpts_sym, cell_sym
+    mf, cell, kpts_sym, cell_sym = get_mf_and_kpts()
+
+
+def tearDownModule():
+    global mf, cell, kpts_sym
+    del mf
+    del cell
+    del kpts_sym
+
+
+class KnownValues(unittest.TestCase):
+    def test_get_rho(self):
+        global mf, cell, kpts_sym
+        C_ks = [coeff.copy() for coeff in mf.mo_coeff]
+        mocc_ks = mf.mo_occ
+
+        mf2 = kpt_symm.KsymAdaptedPWKRKS(cell, kpts_sym, ecut_wf=ECUT_WF)
+
+        t0 = time.monotonic()
+        rho_R = jk.get_rho_R(C_ks, mocc_ks, mf.wf_mesh, basis_ks=mf._basis_data)
+        t1 = time.monotonic()
+        Csym_ks = [C_ks[k_bz].copy() for k_bz in kpts_sym.ibz2bz]
+        moccsym_ks = [mocc_ks[k_bz] for k_bz in kpts_sym.ibz2bz]
+        t2 = time.monotonic()
+        rhosym_R = kpt_symm.get_rho_R_ksym(
+            Csym_ks, moccsym_ks, mf2.wf_mesh, kpts_sym, basis_ks=mf2._basis_data
+        )
+        t3 = time.monotonic()
+        assert np.max(np.abs(rhosym_R - rho_R)) / np.mean(rho_R) < 1e-4
+        if PRINT_TIMES:
+            print("TIMES", t1 - t0, t3 - t2, len(C_ks), len(Csym_ks))
+
+        mf2 = smearing_(mf2, sigma=0.01, method='gauss')
+        mf2.init_jk()
+        mf2.init_pp()
+        eref = mf.energy_elec(C_ks, mocc_ks)
+        epred = mf2.energy_elec(Csym_ks, moccsym_ks)
+
+        rho1 = mf.get_rho_for_xc("LDA", C_ks, mocc_ks)
+        rho2 = mf2.get_rho_for_xc("LDA", Csym_ks, moccsym_ks)
+        assert_almost_equal(np.abs(rho1 - rho2).mean() * cell.vol, 0, 6)
+        assert_almost_equal(epred, eref, 6)
+
+    def test_get_wf(self):
+        global mf, cell, kpts_sym
+        C_ks = [coeff.copy() for coeff in mf.mo_coeff]
+        Csym_ks = [C_ks[k_bz].copy() for k_bz in kpts_sym.ibz2bz]
+        if mf._basis_data is not None:
+            def _ecut2grid_(basis_ks, C_ks):
+                for k, (basis, C_k) in enumerate(zip(basis_ks, C_ks)):
+                    nmo = C_k.shape[0]
+                    ngrid = np.prod(mf.wf_mesh)
+                    newC_k = np.zeros((nmo, ngrid), C_k.dtype)
+                    newC_k[:, basis.indexes] = C_k
+                    C_ks[k] = newC_k
+            _ecut2grid_(mf._basis_data, C_ks)
+            _ecut2grid_([mf._basis_data[k_bz] for k_bz in kpts_sym.ibz2bz], Csym_ks)
+        Cpred_ks = kpt_symm.get_C_from_C_ibz(Csym_ks, mf.wf_mesh, kpts_sym)
+        k = 0
+        for moe, Cref, Cpred in zip(mf.mo_energy, C_ks, Cpred_ks):
+            dot1 = np.einsum("ig,jg->ij", Cref.conj(), Cref)
+            dot2 = np.einsum("ig,jg->ij", Cref.conj(), Cpred)
+            dot3 = np.einsum("ig,jg->ij", Cpred.conj(), Cpred)
+            rdot1 = np.abs(dot1)
+            rdot2 = np.abs(dot2)
+            rdot3 = np.abs(dot3)
+            assert_almost_equal(rdot1[:2, :2], rdot2[:2, :2], 6)
+            assert_almost_equal(rdot1[:2], rdot2[:2], 4)
+            assert_almost_equal(rdot1, rdot3, 6)
+            k += 1
+
+    def test_get_wf_real(self):
+        global mf, cell, kpts_sym
+        C_ks = [coeff.copy() for coeff in mf.mo_coeff]
+        if mf._basis_data is None:
+            C_ks_R = [tools.ifft(C_k, mf.wf_mesh) for C_k in C_ks]
+        else:
+            C_ks_R = [wf_ifft(C_k, mf.wf_mesh, basis)
+                      for C_k, basis in zip(C_ks, mf._basis_data)]
+        Csym_ks_R = [C_ks_R[k_bz].copy() for k_bz in kpts_sym.ibz2bz]
+        Cpred_ks_R = kpt_symm.get_C_from_C_ibz(Csym_ks_R, mf.wf_mesh, kpts_sym,
+                                               realspace=True) 
+        k = 0
+        norm = C_ks[0].shape[-1]
+        for moe, Cref, Cpred in zip(mf.mo_energy, C_ks_R, Cpred_ks_R):
+            dot1 = norm * np.einsum("ig,jg->ij", Cref.conj(), Cref)
+            dot2 = norm * np.einsum("ig,jg->ij", Cref.conj(), Cpred)
+            dot3 = norm * np.einsum("ig,jg->ij", Cpred.conj(), Cpred)
+            rdot1 = np.abs(dot1)
+            rdot2 = np.abs(dot2)
+            rdot3 = np.abs(dot3)
+            assert_almost_equal(rdot1[:2, :2], rdot2[:2, :2], 6)
+            assert_almost_equal(rdot1[:2], rdot2[:2], 4)
+            assert_almost_equal(rdot1, rdot3, 6)
+            k += 1
+
+    def test_hf_symm(self):
+        global cell, cell_sym
+
+        import time
+
+        kmesh = (3, 3, 3)
+        kpts = cell.make_kpts(kmesh)
+
+        kpts_sym = cell_sym.make_kpts(
+            kmesh,
+            time_reversal_symmetry=True,
+            space_group_symmetry=True,
+        )
+        mf = khf.PWKRHF(cell, kpts, ecut_wf=10)
+        t0 = time.monotonic()
+        mf.kernel()
+        t1 = time.monotonic()
+        mf_sym = kpt_symm.KsymAdaptedPWKRHF(cell_sym, kpts_sym, ecut_wf=10)
+        t2 = time.monotonic()
+        mf_sym.kernel()
+        t3 = time.monotonic()
+        assert_almost_equal(mf_sym.e_tot, mf.e_tot, 5)
+        if PRINT_TIMES:
+            print(t1 - t0, t3 - t2)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
diff --git a/pyscf/pbc/pwscf/test/test_krccsd.py b/pyscf/pbc/pwscf/test/test_krccsd.py
new file mode 100644
index 000000000..e9471589f
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_krccsd.py
@@ -0,0 +1,77 @@
+""" First do RHF and RCCSD calcs in a Gaussian basis, then re-evaluate the RHF
+and RCCSD energies using the PW code (for the fixed orbitals obtained from the
+Gaussian-based calculations). The energies obtained from the two approaches
+should agree.
+"""
+
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc import gto, scf, pwscf, cc
+from pyscf.pbc.pwscf import khf, pw_helper
+from pyscf import lib
+import pyscf.lib.parameters as param
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def _run_test(self, atom, a, basis, pseudo, ke_cutoff, kmesh, exxdiv,
+                  test_scf=True):
+        # cell
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff
+        )
+        cell.build()
+        cell.verbose = 0
+        kpts = cell.make_kpts(kmesh)
+        nkpts = len(kpts)
+
+        # GTO
+        gmf = scf.KRHF(cell, kpts)
+        gmf.exxdiv = exxdiv
+        gmf.kernel()
+        gcc = cc.KCCSD(gmf)
+        gcc.kernel()
+
+        # PW
+        pmf = pw_helper.gtomf2pwmf(gmf)
+        pcc = pwscf.PWKRCCSD(pmf)
+        pcc.kernel()
+        assert(abs(gcc.e_corr - pcc.e_corr) < 1.e-6)
+
+        if test_scf:
+            pwmf = pwscf.KRHF(cell, kpts, ecut_wf=20)
+            # need some virtual orbitals to converge davidson
+            pwmf.nvir = 4
+            pwmf.kernel()
+            pwcc = pwscf.PWKRCCSD(pwmf)
+            pwcc.kernel()
+            # Just to make sure the code stays consistent
+            assert(abs(pwcc.e_corr + 0.032784696721506294) < 1.e-4)
+
+    def test_krccsd(self):
+        ke_cutoff = 50
+        basis = "gth-szv"
+        pseudo = "gth-pade"
+        exxdiv = "ewald"
+        atom = "Li 0 0 0; Li 1.75 1.75 1.75"
+        a = np.eye(3) * 3.5
+
+        # same occ per kpt
+        kmesh = [2,1,1]
+        self._run_test(atom, a, basis, pseudo, ke_cutoff, kmesh, exxdiv)
+        # diff occ per kpt (i.e., needs padding)
+        kmesh = [2,2,1]
+        self._run_test(atom, a, basis, pseudo, ke_cutoff, kmesh, exxdiv,
+                       test_scf=False)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_krhf_krmp2.py b/pyscf/pbc/pwscf/test/test_krhf_krmp2.py
new file mode 100644
index 000000000..67dc8f381
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_krhf_krmp2.py
@@ -0,0 +1,106 @@
+""" Check PW-KRHF, PW-KRMP2 and read init guess from chkfile
+"""
+
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc import gto, pwscf
+from pyscf import lib
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def _run_test(self, pseudo, atom, a, e_tot0, e_corr0, mesh=None):
+        kmesh = [2,1,1]
+        ke_cutoff = 30
+
+        # cell
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff,
+            mesh=mesh,
+        )
+        cell.build()
+        cell.verbose = 0
+
+        # kpts
+        kpts = cell.make_kpts(kmesh)
+        nkpts = len(kpts)
+
+        # tempfile
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        chkfile = swapfile.name
+        swapfile = None
+
+        # krhf
+        pwmf = pwscf.KRHF(cell, kpts)
+        pwmf.nvir = 10 # request 10 virtual states
+        pwmf.chkfile = chkfile
+        pwmf.kernel(save_ccecp_kb=True)
+        assert(abs(pwmf.e_tot - e_tot0) < 1.e-6)
+
+        # krhf init from chkfile
+        pwmf.init_guess = "chkfile"
+        pwmf.kernel()
+        assert(abs(pwmf.e_tot - e_tot0) < 1.e-6)
+
+        # input C0
+        pwmf.kernel(C0=pwmf.mo_coeff)
+        assert(abs(pwmf.e_tot - e_tot0) < 1.e-6)
+
+        # krmp2
+        pwmp = pwscf.KMP2(pwmf)
+        pwmp.kernel()
+        assert(abs(pwmp.e_corr - e_corr0) < 1.e-4)
+
+        pwmf = pwscf.KRHF(cell, kpts, ecut_wf=20)
+        pwmf.nvir = 10 # request 10 virtual states
+        pwmf.chkfile = chkfile
+        pwmf.kernel(save_ccecp_kb=True)
+
+        pwmp = pwscf.KMP2(pwmf)
+        pwmp.kernel()
+        # higher relative error threshold because the PW basis is different
+        assert(abs((pwmp.e_corr - e_corr0) / e_corr0) < 5.e-2)
+
+    def test_alle(self):
+        atom = "He 0 0 0"
+        a = np.eye(3) * 2
+        mesh = [10, 10, 10]
+        self._run_test(
+            None, atom, a, -3.01953411844147, -0.0184642869417647, mesh
+        )
+
+    def test_ccecp(self):
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994"
+        a = np.asarray(
+            [[0.        , 1.78339987, 1.78339987],
+             [1.78339987, 0.        , 1.78339987],
+             [1.78339987, 1.78339987, 0.        ]]
+        )
+        mesh = [10, 10, 10]
+        self._run_test(
+            "ccecp", atom, a, -10.6261884956522, -0.136781915070538, mesh
+        )
+
+    def test_gth(self):
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994"
+        a = np.asarray(
+            [[0.        , 1.78339987, 1.78339987],
+             [1.78339987, 0.        , 1.78339987],
+             [1.78339987, 1.78339987, 0.        ]]
+        )
+        mesh = [10, 10, 10]
+        self._run_test(
+            "gth-pade", atom, a, -10.6754927046184, -0.139309030515543, mesh
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_krmp2.py b/pyscf/pbc/pwscf/test/test_krmp2.py
new file mode 100644
index 000000000..d47fdb86b
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_krmp2.py
@@ -0,0 +1,57 @@
+""" First do RHF and RMP2 calcs in a Gaussian basis, then re-evaluate the RHF
+and RMP2 energies using the PW code (for the fixed orbitals obtained from the
+Gaussian-based calculations). The energies obtained from the two approaches
+should agree.
+"""
+
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc import gto, scf, pwscf, mp
+from pyscf.pbc.pwscf import khf, pw_helper
+from pyscf.pbc.pwscf import kmp2
+from pyscf import lib
+import pyscf.lib.parameters as param
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def test_krmp2(self):
+        kmesh = [2,1,1]
+        ke_cutoff = 100
+        pseudo = "gth-pade"
+        exxdiv = "ewald"
+        atom = "H 0 0 0; H 0.9 0 0"
+        a = np.eye(3) * 3
+
+        # cell
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff
+        )
+        cell.build()
+        cell.verbose = 0
+        kpts = cell.make_kpts(kmesh)
+
+        # GTO
+        gmf = scf.KRHF(cell, kpts)
+        gmf.exxdiv = exxdiv
+        gmf.kernel()
+        gmp = mp.KMP2(gmf)
+        gmp.kernel()
+
+        # PW
+        pmf = pw_helper.gtomf2pwmf(gmf)
+        pmp = kmp2.PWKRMP2(pmf)
+        pmp.kernel()
+        assert(abs(gmp.e_corr - pmp.e_corr) < 1.e-6)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_kuhf_kump2.py b/pyscf/pbc/pwscf/test/test_kuhf_kump2.py
new file mode 100644
index 000000000..93fe42918
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_kuhf_kump2.py
@@ -0,0 +1,91 @@
+""" Check PW-KUHF, PW-KUMP2 and read init guess from chkfile
+"""
+
+
+import h5py
+import tempfile
+import numpy as np
+from numpy.testing import assert_allclose
+
+from pyscf.pbc import gto, pwscf
+from pyscf import lib
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def _run_test(self, pseudo, atom, a, e_tot0, e_corr0):
+        nk = 1
+        ke_cutoff = 30
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            spin=2, # triplet
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff,
+            mesh=[19, 19, 19],
+        )
+        cell.build()
+        cell.verbose = 0
+
+        # kpts
+        kmesh = [nk]*3
+        kpts = cell.make_kpts(kmesh)
+
+        # tempfile
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        chkfile = swapfile.name
+        swapfile = None
+
+        # NOTE not sure why precision is lower here than for restricted
+        # krhf
+        pwmf = pwscf.KUHF(cell, kpts)
+        pwmf.nvir = 4 # request 4 virtual states
+        pwmf.chkfile = chkfile
+        pwmf.kernel()
+        assert_allclose(pwmf.e_tot, e_tot0, atol=1.e-4, rtol=0)
+
+        # krhf init from chkfile
+        pwmf.init_guess = "chkfile"
+        pwmf.kernel()
+        assert_allclose(pwmf.e_tot, e_tot0, atol=1.e-4, rtol=0)
+
+        # input C0
+        pwmf.kernel(C0=pwmf.mo_coeff)
+        assert_allclose(pwmf.e_tot, e_tot0, atol=1.e-4, rtol=0)
+
+        # kump2
+        pwmp = pwscf.KUMP2(pwmf)
+        pwmp.kernel()
+        assert_allclose(pwmp.e_corr, e_corr0, atol=1.e-4, rtol=0)
+
+        pwmf = pwscf.KUHF(cell, kpts, ecut_wf=20)
+        pwmf.nvir = 4
+        pwmf.chkfile = chkfile
+        pwmf.kernel(save_ccecp_kb=True)
+
+        pwmp = pwscf.KUMP2(pwmf)
+        pwmp.kernel()
+        # higher relative error threshold because the PW basis is different
+        assert(abs((pwmp.e_corr - e_corr0) / e_corr0) < 2e-2)
+
+    def test_gth(self):
+        pseudo = "gth-pade"
+        atom = "C 0 0 0"
+        a = np.eye(3) * 4   # atom in a cubic box
+        e_tot0 = -5.39796638192271
+        e_corr0 = -0.00682323936825284
+        self._run_test(pseudo, atom, a, e_tot0, e_corr0)
+
+    def test_ccecp(self):
+        pseudo = "ccecp"
+        atom = "C 0 0 0"
+        a = np.eye(3) * 4   # atom in a cubic box
+        e_tot0 = -5.35343662020727
+        e_corr0 = -0.00670287547309327
+        self._run_test(pseudo, atom, a, e_tot0, e_corr0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_kump2.py b/pyscf/pbc/pwscf/test/test_kump2.py
new file mode 100644
index 000000000..1f7cd4293
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_kump2.py
@@ -0,0 +1,59 @@
+""" First do UHF and UMP2 calcs in a Gaussian basis, then re-evaluate the UHF
+and UMP2 energies using the PW code (for the fixed orbitals obtained from the
+Gaussian-based calculations). The energies obtained from the two approaches
+should agree.
+"""
+
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc import gto, scf, pwscf, mp
+from pyscf.pbc.pwscf import kuhf, pw_helper
+from pyscf import lib
+from pyscf.pbc.pwscf import kump2
+import pyscf.lib.parameters as param
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def test_kump2(self):
+        kmesh = [1,1,1]
+        ke_cutoff = 50
+        pseudo = "gth-pade"
+        exxdiv = "ewald"
+        atom = "C 0 0 0"
+        a = np.eye(3) * 4
+
+        # cell
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff,
+            spin=2
+        )
+        cell.build()
+        cell.verbose = 0
+        kpts = cell.make_kpts(kmesh)
+        nkpts = len(kpts)
+
+        # GTO
+        gmf = scf.UHF(cell, kpts)
+        gmf.exxdiv = exxdiv
+        gmf.kernel()
+        gmp = mp.UMP2(gmf)
+        gmp.kernel()
+
+        # PW
+        pmf = pw_helper.gtomf2pwmf(gmf)
+        pmp = kump2.PWKUMP2(pmf)
+        pmp.kernel()
+        assert(abs(gmp.e_corr - pmp.e_corr) < 1.e-6)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_ncpp_cell.py b/pyscf/pbc/pwscf/test/test_ncpp_cell.py
new file mode 100644
index 000000000..257c58887
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_ncpp_cell.py
@@ -0,0 +1,118 @@
+import unittest
+from pyscf.pbc.gto.cell import Cell
+from pyscf.pbc.pwscf.ncpp_cell import NCPPCell, DEFAULT_SG15_PATH
+from pyscf.pbc.pwscf.upf import get_nc_data_from_upf
+from pyscf.pbc.pwscf.krks import PWKRKS
+from pyscf.pbc.pwscf.kuks import PWKUKS
+from pyscf.pbc.pwscf import kpt_symm
+import pyscf.pbc
+import numpy as np
+import os
+from numpy.testing import assert_allclose
+
+
+pyscf.pbc.DEBUG = False
+HAVE_SG15 = DEFAULT_SG15_PATH is not None and os.path.exists(DEFAULT_SG15_PATH)
+
+
+def setUpModule():
+    global CELL, KPTS, KPTS2, ATOM, KPT1
+    CELL = NCPPCell(
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994",
+        a = np.asarray([
+                [0.       , 1.78339987, 1.78339987],
+                [1.78339987, 0.        , 1.78339987],
+                [1.78339987, 1.78339987, 0.        ]]),
+        basis="gth-szv",
+        verbose=0,
+    )
+    if HAVE_SG15:
+        CELL.build()
+
+    kmesh = [2, 2, 2]
+    KPTS = CELL.make_kpts(kmesh)
+
+    kmesh2 = [1, 1, 3]
+    KPTS2 = CELL.make_kpts(kmesh2)
+
+    ATOM = NCPPCell(
+        atom = "C 0 0 0",
+        a = np.eye(3) * 4,
+        basis="gth-szv",
+        spin=2,
+        verbose=0,
+    )
+    if HAVE_SG15:
+        ATOM.build()
+
+    nk = 1
+    kmesh = (nk,)*3
+    KPT1 = ATOM.make_kpts(kmesh)
+
+
+def tearDownModule():
+    global CELL, ATOM, KPTS, KPTS2, KPT1
+    del CELL, ATOM, KPTS, KPTS2, KPT1
+
+
+@unittest.skipIf(not HAVE_SG15, "Missing SG15 pseudos")
+class KnownValues(unittest.TestCase):
+    def test_energy(self):
+        ecut_wf = 18.38235294
+        e_ref2 = -10.801827216069011
+        e_ref = -11.221518554994296
+        mf = PWKRKS(CELL, KPTS2, xc="PBE", ecut_wf=ecut_wf)
+        mf.conv_tol = 1e-9
+        mf.nvir = 4 # converge first 4 virtual bands
+        mf.kernel()
+        assert_allclose(mf.e_tot, e_ref2, atol=1e-7)
+        mf = PWKUKS(CELL, KPTS2, xc="PBE", ecut_wf=ecut_wf)
+        mf.nvir = 4
+        mf.conv_tol = 1e-9
+        mf.kernel()
+        assert_allclose(mf.e_tot, e_ref2, atol=1e-7)
+        mf = kpt_symm.KsymAdaptedPWKRKS(CELL, KPTS, xc="PBE", ecut_wf=ecut_wf)
+        mf.nvir = 4
+        mf.conv_tol = 1e-9
+        mf.kernel()
+        assert_allclose(mf.e_tot, e_ref, atol=1e-7)
+
+        # check loading and unloading the cell
+        cell2 = NCPPCell.loads(CELL.dumps())
+        mf2 = kpt_symm.KsymAdaptedPWKRKS(
+            cell2, KPTS, xc="PBE", ecut_wf=ecut_wf
+        )
+        mf2.nvir = 4
+        mf2.init_pp()
+        mf2.init_jk()
+        assert_allclose(
+            mf2.energy_tot(mf.mo_coeff, mf.mo_occ), e_ref, atol=1e-7
+        )
+        # make sure original cell was not affected
+        assert_allclose(
+            mf.energy_tot(mf.mo_coeff, mf.mo_occ), e_ref, atol=1e-7
+        )
+
+        # make sure a ghost atom doesn't mess anything up
+        gcell = NCPPCell(
+            atom = """
+            C 0 0 0
+            C 0.89169994 0.89169994 0.89169994
+            ghost:C -0.9 -0.9 -0.9
+            """,
+            a = np.asarray([
+                    [0.       , 1.78339987, 1.78339987],
+                    [1.78339987, 0.        , 1.78339987],
+                    [1.78339987, 1.78339987, 0.        ]]),
+            basis="gth-szv",
+            verbose=0,
+        )
+        gcell.build()
+        mf2 = PWKRKS(gcell, KPTS2, xc="PBE", ecut_wf=ecut_wf)
+        mf2.kernel()
+        assert_allclose(mf2.e_tot, e_ref2, atol=1e-6, rtol=0)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
diff --git a/pyscf/pbc/pwscf/test/test_proj.py b/pyscf/pbc/pwscf/test/test_proj.py
new file mode 100644
index 000000000..b03abb6d5
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_proj.py
@@ -0,0 +1,81 @@
+""" When orbitals from init guess uses a different grid mesh than the current
+calculation, perform a projection.
+"""
+
+import tempfile
+import numpy as np
+
+from pyscf import lib
+from pyscf.pbc import gto, pwscf
+
+import unittest
+
+
+def make_cell(atom, a, pseudo, ke_cutoff, mesh=None):
+    if mesh is None:
+        mesh = [12, 12, 12]
+    cell = gto.Cell(
+        atom=atom,
+        a=a,
+        basis="gth-szv",
+        pseudo=pseudo,
+        ke_cutoff=ke_cutoff,
+        mesh=mesh,
+    )
+    cell.build()
+    cell.verbose = 0
+    return cell
+
+
+def make_mf(cell, kmesh):
+    kpts = cell.make_kpts(kmesh)
+    mf = pwscf.KRHF(cell, kpts)
+    return mf
+
+
+class KnownValues(unittest.TestCase):
+    def test_proj(self):
+        kmesh = [2,1,1]
+        ke_cutoffs = [30,40,50]
+        pseudo = "gth-pade"
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994"
+        a = np.asarray(
+            [[0.       , 1.78339987, 1.78339987],
+            [1.78339987, 0.        , 1.78339987],
+            [1.78339987, 1.78339987, 0.        ]])
+
+        meshes = [
+            [10, 10, 10],
+            [12, 12, 12],
+            [13, 13, 13],
+        ]
+        cells = [
+            make_cell(atom, a, pseudo, ke, mesh)
+            for ke, mesh in zip(ke_cutoffs, meshes)
+        ]
+        mfs = [make_mf(cell, kmesh) for cell in cells]
+
+        erefs = [-10.6754924867542, -10.6700816768958, -10.6734527455548]
+
+        # tempfile
+        swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
+        chkfile = swapfile.name
+        swapfile = None
+        for mf in mfs:
+            mf.chkfile = chkfile
+
+        # run ke1
+        mfs[1].kernel()
+        assert(abs(mfs[1].e_tot-erefs[1]) < 1e-5)
+        # run ke0 with ke1 init guess (projection down)
+        mfs[0].init_guess = "chk"
+        mfs[0].kernel()
+        assert(abs(mfs[0].e_tot-erefs[0]) < 1e-5)
+        # run ke2 with ke0 init guess (projection up)
+        mfs[2].init_guess = "chk"
+        mfs[2].kernel()
+        assert(abs(mfs[2].e_tot-erefs[2]) < 1e-5)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/test/test_pwcpw.py b/pyscf/pbc/pwscf/test/test_pwcpw.py
new file mode 100644
index 000000000..714995390
--- /dev/null
+++ b/pyscf/pbc/pwscf/test/test_pwcpw.py
@@ -0,0 +1,73 @@
+""" Check PW occ + CPW vir for MP2
+CPW stands for "contracted PW", which refers to a PW expansion vector with
+*fixed* coefficient. This example generates such CPWs from the ccecp-cc-pvdz
+basis set.
+"""
+
+
+import h5py
+import tempfile
+import numpy as np
+
+from pyscf.pbc import gto, pwscf
+from pyscf import lib
+
+import unittest
+
+
+class KnownValues(unittest.TestCase):
+    def test_pwcpw(self):
+
+        kmesh = [2,1,1]
+        ke_cutoff = 30
+        basis_cpw = "ccecp-cc-pvdz"
+        pseudo = "gth-pade"
+        atom = "C 0 0 0; C 0.89169994 0.89169994 0.89169994"
+        a = np.asarray(
+            [[0.       , 1.78339987, 1.78339987],
+            [1.78339987, 0.        , 1.78339987],
+            [1.78339987, 1.78339987, 0.        ]])
+
+        # cell
+        cell = gto.Cell(
+            atom=atom,
+            a=a,
+            basis="gth-szv",
+            pseudo=pseudo,
+            ke_cutoff=ke_cutoff,
+            mesh=[10, 10, 10],
+        )
+        cell.build()
+        cell.verbose = 0
+
+        # kpts
+        kpts = cell.make_kpts(kmesh)
+        nkpts = len(kpts)
+
+        # HF
+        mf = pwscf.KRHF(cell, kpts)
+        mf.kernel()
+        assert(abs(mf.e_tot - -10.6754924867542) < 1.e-6)
+
+        # MP2
+        moe_ks, mocc_ks = mf.get_cpw_virtual(basis_cpw)
+        mf.dump_moe(moe_ks, mocc_ks)
+        mmp = pwscf.KMP2(mf)
+        mmp.kernel()
+        assert(abs(mmp.e_corr - -0.215895180360867) < 1.e-6)
+
+        # HF with a plane-wave cutoff
+        mf = pwscf.KRHF(cell, kpts, ecut_wf=20)
+        mf.kernel()
+
+        # MP2
+        moe_ks, mocc_ks = mf.get_cpw_virtual(basis_cpw)
+        mf.dump_moe(moe_ks, mocc_ks)
+        mmp = pwscf.KMP2(mf)
+        mmp.kernel()
+        # higher threshold because we use different basis
+        assert(abs(mmp.e_corr - -0.215895180360867) < 1.e-3)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyscf/pbc/pwscf/upf.py b/pyscf/pbc/pwscf/upf.py
new file mode 100644
index 000000000..1b3e1a394
--- /dev/null
+++ b/pyscf/pbc/pwscf/upf.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+# Copyright 2014-2025 The PySCF Developers. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: Kyle Bystrom <kylebystrom@gmail.com>
+#
+
+""" UPF file parser. Currently just for parsing SG15 pseudos.
+"""
+
+import xml.etree.ElementTree as ET
+import numpy as np
+from math import factorial as fac
+from scipy.integrate import trapezoid
+
+
+def _parse_array_upf(entry, dtype=float):
+    return np.fromstring(entry.text, dtype=dtype, sep=' ')
+
+
+def get_nc_data_from_upf(fname):
+    tree = ET.parse(fname)
+    root = tree.getroot()
+    pp_local = root.find('PP_LOCAL')
+    pp_local = _parse_array_upf(pp_local)
+    mesh_dat = root.find('PP_MESH')
+    pp_r = _parse_array_upf(mesh_dat.find('PP_R'))
+    pp_dr = _parse_array_upf(mesh_dat.find('PP_RAB'))
+    pp_nl = root.find('PP_NONLOCAL')
+    dij = None
+    projectors = []
+    # add some buffer to the arrays when Fourier transforming
+    # to get a denser grid in k-space
+    buf = pp_r.size
+    for child in pp_nl:
+        if child.tag == "PP_DIJ":
+            dij = _parse_array_upf(child)
+        else:
+            proj_index = int(child.attrib["index"]) - 1
+            l = int(child.attrib["angular_momentum"])
+            cutoff_index = int(child.attrib["cutoff_radius_index"])
+            projector = _parse_array_upf(child)
+            projectors.append({
+                "n": proj_index,
+                "l": l,
+                "cut": cutoff_index,
+                "rproj": projector,
+                "kproj": fft_upf(pp_r, projector, l, mul_by_r=False, buf=buf)[1]
+            })
+    assert dij is not None
+    dij = dij.reshape(len(projectors), len(projectors))
+    _deriv = make_radial_derivative_calculator(pp_r, 2, 2)[0]
+    d1 = _deriv(pp_local * pp_r)
+    charge = d1.copy()
+    # nelec = trapezoid(charge * pp_r, x=pp_r)
+    # NOTE this is the non-divergent G=0 term of the local pseudo.
+    # It should be 4*pi*Q1 in the expansion Q(k) = Q(0) + Q1 k^2 + ...
+    # where Q(k) is the pseudo-charge. Here this is computed
+    # from I2 = \int d^3r r^2 Q(r). Q1 is -I2/6.
+    g0lim = -0.5 * trapezoid(charge * pp_r**3, x=pp_r)
+    g0lim *= 4 * np.pi / 3
+    charge[1:] /= pp_r[1:]
+    charge[0] = charge[1]
+    pp_k, chargek = fft_upf(pp_r, charge, 0, buf=buf)
+    chargek[:] /= 4 * np.pi
+    locpotk = chargek.copy()
+    locpotk[1:] *= 4 * np.pi / pp_k[1:]**2
+    locpotk[0] = locpotk[1]
+    assert (np.diag(np.diag(dij)) == dij).all(), "dij must be diagonal"
+    # Another, less precise way to compute the Q1 term is finite difference
+    # derivative in reciprocal space.
+    # ikd0, ikd1 = 0, 1
+    # g0lim = 4 * np.pi * (chargek[ikd1] - chargek[ikd0])
+    # g0lim /= (pp_k[ikd1]**2 - pp_k[ikd0]**2)
+    return {
+        "z": int(round(float(root.find("PP_HEADER").attrib["z_valence"]))),
+        "projectors": projectors,
+        "dij": 0.5 * dij,  # convert to Ha
+        "local_part": {
+            "real": charge,
+            "recip": chargek,
+            "finite_g0": g0lim,
+            "locpotk": locpotk,
+        },
+        "grids": {
+            "r": pp_r,
+            "dr": pp_dr,
+            "k": pp_k,
+        }
+    }
+
+
+def _get_deriv_weights(r_g, D, i, istart, deriv_order):
+    y = np.zeros(D)
+    diffs = np.empty((D, D))
+    y[deriv_order] = 1
+    rc = r_g[i]
+    for j in range(D):
+        r = r_g[istart + j]
+        for k in range(D):
+            diffs[k, j] = (r - rc) ** k
+    return np.linalg.solve(diffs, y)
+
+
+def fsbt(l, f_g, r_g, G_k, mul_by_r):
+    """
+    This is the Fast spherical Bessel transform as implemented in GPAW.
+
+    Returns::
+
+          oo
+         / 2
+         |r dr j (Gr) f(r),
+         /      l
+          0
+
+    using l+1 fft's."""
+
+    N = (len(G_k) - 1) * 2
+    f_k = 0.0
+    if mul_by_r:
+        F_g = f_g * r_g
+    else:
+        F_g = f_g
+    for n in range(l + 1):
+        f_k += (r_g[1] * (1j)**(l + 1 - n) *
+                fac(l + n) / fac(l - n) / fac(n) / 2**n *
+                np.fft.rfft(F_g, N)).real * G_k**(l - n)
+        F_g[1:] /= r_g[1:]
+
+    f_k[1:] /= G_k[1:]**(l + 1)
+    if l == 0:
+        f_k[0] = np.dot(r_g, f_g * r_g) * r_g[1]
+    return f_k
+
+
+def fft_upf(r, f, l, mul_by_r=True, buf=0):
+    N = r.size
+    if buf > 0:
+        # NOTE TODO this assumes a linear grid
+        r = np.append(r, r[-1] + r[1] + r[1] * np.arange(buf))
+        f = np.append(f, np.zeros(buf, dtype=f.dtype))
+        N += buf
+    G = np.linspace(0, np.pi / r[1], N // 2 + 1)
+    fk = 4 * np.pi * fsbt(l, f, r, G, mul_by_r=mul_by_r)
+    return G, fk
+
+
+def make_radial_derivative_calculator(r_g, deriv_order=1, stencil_order=2):
+    """
+    This utility function takes an arbitrary radial grid and returns
+    a function that calculates numerical derivatives on that grid.
+    Based on the function in CiderPress of the same name. This
+    function might be less precise than more sophisticated
+    techniques of the same order, but it has the benefit that it
+    can be used on arbitrary radial grids, without knowledge of
+    the particular grid being used. A second function is also
+    returned that can evaluate the derivative of the radial
+    derivative with respect to a change in function value.
+
+    Args:
+        r_g (np.ndarray): grid on which to compute derivative
+        deriv_order (int): order of the derivative
+        stencil_order (int): 2*stencil_order+1 nearby points are
+            use to compute the derivative.
+    """
+    N = r_g.size
+    assert N > stencil_order, "Grid too small"
+    assert stencil_order > 0, "Order must be > 0"
+    D = 2 * stencil_order + 1
+    SO = stencil_order
+    weight_list = np.empty((D, N))
+    for i in range(SO):
+        weight_list[:, i] = _get_deriv_weights(r_g, D, i, 0, deriv_order)
+    for i in range(SO, N - SO):
+        weight_list[:, i] = _get_deriv_weights(r_g, D, i, i - SO, deriv_order)
+    for i in range(N - SO, N):
+        weight_list[:, i] = _get_deriv_weights(r_g, D, i, N - D, deriv_order)
+    end = N - D + 1
+
+    def _eval_radial_deriv(func_xg):
+        deriv_xg = np.empty_like(func_xg)
+        deriv_xg[..., :SO] = np.einsum(
+            "...g,gd->...d", func_xg[..., :D], weight_list[:, :SO]
+        )
+        deriv_xg[..., -SO:] = np.einsum(
+            "...g,gd->...d", func_xg[..., -D:], weight_list[:, -SO:]
+        )
+        deriv_xg[..., SO:-SO] = weight_list[0, SO:-SO] * func_xg[..., :end]
+        for d in range(1, D):
+            deriv_xg[..., SO:-SO] += (
+                weight_list[d, SO:-SO] * func_xg[..., d : end + d]
+            )
+        return deriv_xg
+
+    def _eval_radial_deriv_bwd(vderiv_xg):
+        vfunc_xg = np.zeros_like(vderiv_xg)
+        vfunc_xg[..., :end] = (
+            weight_list[0, SO:-SO] * vderiv_xg[..., SO:-SO]
+        )
+        for d in range(1, D):
+            vfunc_xg[..., d : end + d] += (
+                weight_list[d, SO:-SO] * vderiv_xg[..., SO:-SO]
+            )
+        vfunc_xg[..., :D] += np.einsum(
+            "...d,gd->...g", vderiv_xg[..., :SO], weight_list[:, :SO]
+        )
+        vfunc_xg[..., -D:] += np.einsum(
+            "...d,gd->...g", vderiv_xg[..., -SO:], weight_list[:, -SO:]
+        )
+        return vfunc_xg
+
+    return _eval_radial_deriv, _eval_radial_deriv_bwd