Skip to content
Merged
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 63 additions & 23 deletions pyscf/tools/trexio.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,21 @@
from pyscf import gto
from pyscf import scf
from pyscf import pbc
from pyscf import mcscf
from pyscf import fci

import trexio

def to_trexio(obj, filename, backend='h5'):
def to_trexio(obj, filename, backend='h5', ci_threshold=None, chunk_size=None):
with trexio.File(filename, 'u', back_end=_mode(backend)) as tf:
if isinstance(obj, gto.Mole):
_mol_to_trexio(obj, tf)
elif isinstance(obj, scf.hf.SCF):
_scf_to_trexio(obj, tf)
elif isinstance(obj, mcscf.casci.CASCI) or isinstance(obj, mcscf.CASSCF):
ci_threshold = ci_threshold if ci_threshold is not None else 0.
chunk_size = chunk_size if chunk_size is not None else 100000
_mcscf_to_trexio(obj, tf, ci_threshold=ci_threshold, chunk_size=chunk_size)
else:
raise NotImplementedError(f'Conversion function for {obj.__class__}')

Expand Down Expand Up @@ -248,8 +253,39 @@ def _scf_to_trexio(mf, trexio_file):
def _cc_to_trexio(cc_obj, trexio_file):
raise NotImplementedError

def _mcscf_to_trexio(cas_obj, trexio_file):
raise NotImplementedError
def _mcscf_to_trexio(cas_obj, trexio_file, ci_threshold=0., chunk_size=100000):
mol = cas_obj.mol
_mol_to_trexio(mol, trexio_file)
mo_energy_cas = cas_obj.mo_energy
mo_cas = cas_obj.mo_coeff
num_mo = mo_energy_cas.size
spin_cas = np.zeros(mo_energy_cas.size, dtype=int)
mo_type_cas = 'CAS'
trexio.write_mo_type(trexio_file, mo_type_cas)
idx = _order_ao_index(mol)
trexio.write_mo_num(trexio_file, num_mo)
trexio.write_mo_coefficient(trexio_file, mo_cas[idx].T.ravel())
trexio.write_mo_energy(trexio_file, mo_energy_cas)
trexio.write_mo_spin(trexio_file, spin_cas)

ncore = cas_obj.ncore
ncas = cas_obj.ncas
mo_classes = np.array(["Virtual"] * num_mo, dtype=str) # Initialize all MOs as Virtual
mo_classes[:ncore] = "Core"
mo_classes[ncore:ncore + ncas] = "Active"
trexio.write_mo_class(trexio_file, list(mo_classes))

occupation = np.zeros(num_mo)
occupation[:ncore] = 2.0
rdm1 = cas_obj.fcisolver.make_rdm1(cas_obj.ci, ncas, cas_obj.nelecas)
natural_occ = np.linalg.eigh(rdm1)[0]
occupation[ncore:ncore + ncas] = natural_occ[::-1]
occupation[ncore + ncas:] = 0.0
trexio.write_mo_occupation(trexio_file, occupation)

total_elec_cas = sum(cas_obj.nelecas)

det_to_trexio(cas_obj, ncas, total_elec_cas, trexio_file, ci_threshold, chunk_size)

def mol_from_trexio(filename):
mol = gto.Mole()
Expand Down Expand Up @@ -329,6 +365,13 @@ def write_eri(eri, filename, backend='h5'):
idx[:,:,2:] = idx_pair[None,:,:]
idx = idx[np.tril_indices(npair)]

# Started working on physicist notation
# idx=idx.reshape((num_integrals,4))
# for i in range(num_integrals):
# idx[i,1],idx[i,2]=idx[i,2],idx[i,1]
#
# idx=idx.flatten()

with trexio.File(filename, 'w', back_end=_mode(backend)) as tf:
trexio.write_mo_2e_int_eri(tf, 0, num_integrals, idx, eri.ravel())

Expand Down Expand Up @@ -410,17 +453,19 @@ def get_occsa_and_occsb(mcscf, norb, nelec, ci_threshold=0.):

return occsa_sorted, occsb_sorted, ci_values_sorted, num_determinants

def det_to_trexio(mcscf, norb, nelec, filename, backend='h5', ci_threshold=0., chunk_size=100000):
def det_to_trexio(mcscf, norb, nelec, trexio_file, ci_threshold=0., chunk_size=100000):
from trexio_tools.group_tools import determinant as trexio_det
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @NastaMauger, in my opinion, we should not import trexio_tools; otherwise, pyscf has an additional dependency on trexio_tools. In other words, when we make a pull request to the main repository in the future, we should ask the committee member of pyscfto add not only trexio but also trexio_tools in the setup.cfg, which is not a good idea. We should hardcode it. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to know your thoughts, @q-posev and @scemama.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to accept this PR as it is and open a new one with your idea? I agree that your suggestion would help reduce dependencies, but since this has already been validated by everyone and the only thing preventing the final merge is unrelated to the current modification, I would really appreciate if it could be merged

To be honest, I’m not sure how much work this would require, especially since this PR has been open for a while simply because I had forgotten about it

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to know your thoughts, @q-posev and @scemama.

I agree. We should not import trexio tools.
Also, I recently removed the déterminant module from trexio tools because it was redundant with native functions of trexio.


mo_num = norb
ncore = mcscf.ncore
int64_num = int((mo_num - 1) / 64) + 1
Copy link

@q-posev q-posev Jul 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can do int64_num = trexio.get_int64_num(trexio_file) here, the way @scemama did in PR #152

Copy link
Contributor Author

@NastaMauger NastaMauger Jul 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the user registers the determinant in a new/different HDF5 file, this will not work.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible that in previous versions of trexio you could bypass the need of storing the number of MOs in the trexio file, but it was because some internal consistency checking was lacking in TREXIO. In the current version, you can't write determinants if you don't write the number of MOs in the MO group to ensure that the determinants you store are consistent. So it should be totally equivalent.

I would rather use the library function call than compute it, because it will always return the value expected by the library. Here it is possible that user makes a mistake, and that norb is not the same as the value stored in the mo group.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I guess, I need to update TREXIO on my own laptop because int64_num = trexio.get_int64_num(trexio_file) does not work on my end


occsa, occsb, ci_values, num_determinants = get_occsa_and_occsb(mcscf, norb, nelec, ci_threshold)

det_list = []
for a, b, coeff in zip(occsa, occsb, ci_values):
occsa_upshifted = [orb + 1 for orb in a]
occsb_upshifted = [orb + 1 for orb in b]
occsa_upshifted = [orb for orb in range(ncore)] + [orb+ncore for orb in a]
occsb_upshifted = [orb for orb in range(ncore)] + [orb+ncore for orb in b]
det_tmp = []
det_tmp += trexio_det.to_determinant_list(occsa_upshifted, int64_num)
det_tmp += trexio_det.to_determinant_list(occsb_upshifted, int64_num)
Expand All @@ -431,24 +476,19 @@ def det_to_trexio(mcscf, norb, nelec, filename, backend='h5', ci_threshold=0., c
else:
n_chunks = 1

with trexio.File(filename, 'u', back_end=_mode(backend)) as tf:
if trexio.has_determinant(tf):
trexio.delete_determinant(tf)
trexio.write_mo_num(tf, mo_num)
trexio.write_electron_up_num(tf, len(a))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this removed? We need the number of up and down electrons to perform the CI determinants "correctness" check before writing them in the file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now handled by the _mcscf_to_trexio function, which calls _mol_to_trexio.

trexio.write_electron_dn_num(tf, len(b))
trexio.write_electron_num(tf, len(a) + len(b))
if trexio.has_determinant(trexio_file):
trexio.delete_determinant(trexio_file)

offset_file = 0
for i in range(n_chunks):
start = i * chunk_size
end = min((i + 1) * chunk_size, num_determinants)
current_chunk_size = end - start

if current_chunk_size > 0:
trexio.write_determinant_list(tf, offset_file, current_chunk_size, det_list[start:end])
trexio.write_determinant_coefficient(tf, offset_file, current_chunk_size, ci_values[start:end])
offset_file += current_chunk_size
offset_file = 0
for i in range(n_chunks):
start = i * chunk_size
end = min((i + 1) * chunk_size, num_determinants)
current_chunk_size = end - start

if current_chunk_size > 0:
trexio.write_determinant_list(trexio_file, offset_file, current_chunk_size, det_list[start:end])
trexio.write_determinant_coefficient(trexio_file, offset_file, current_chunk_size, ci_values[start:end])
offset_file += current_chunk_size

def read_det_trexio(filename):
with trexio.File(filename, 'r', back_end=trexio.TREXIO_AUTO) as tf:
Expand Down
Loading