Skip to content
Open
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
e3a3d08
update get_basis
AsymmetryChou Aug 23, 2024
ef97257
update get_structure
AsymmetryChou Aug 23, 2024
36e9430
update check_siesta.ipynb
AsymmetryChou Aug 23, 2024
d09c362
update check_siesta.ipynb
AsymmetryChou Aug 23, 2024
5dba3e2
Merge branch 'main' into io_siesta
AsymmetryChou Jul 7, 2025
8087e74
feat( SIESTA parser): enhance find_content method and implement get_e…
AsymmetryChou Jul 7, 2025
39d4144
fix(README): update SIESTA support status to include eigenvalues
AsymmetryChou Jul 7, 2025
c89819d
feat(siesta_parser): add k-points unit conversion and improve eigenva…
AsymmetryChou Jul 8, 2025
413a430
feat(test): add test data for SIESTA
AsymmetryChou Jul 8, 2025
5fc959d
update try_siesta.ipynb
AsymmetryChou Jul 8, 2025
309502b
fix(try_siesta): update notebook
AsymmetryChou Jul 8, 2025
a67cbca
set find_content as staticmethod
AsymmetryChou Jul 8, 2025
a2cff97
feature(siesta_parse): add unittest
AsymmetryChou Jul 8, 2025
092c7fb
Add unit test for parsing Siesta output data
AsymmetryChou Jul 8, 2025
1ec76f2
add PROJECT_ROOT to enforce pytest the local code ./dftio
AsymmetryChou Jul 8, 2025
250210d
add DM file
AsymmetryChou Jul 8, 2025
2b937d8
add PROJECT_root and test for DM
AsymmetryChou Jul 8, 2025
b95f5fd
update try_siesta.ipynb
AsymmetryChou Jul 8, 2025
36571a9
增强SiestaParser以支持多个文件搜索和错误处理,改进LatticeVectors和.band解析
AsymmetryChou Jul 12, 2025
5373c0e
更新SiestaParser以扩展跳过文件格式的列表,增加对'ion'和'psdump'格式的支持
AsymmetryChou Jul 12, 2025
0d5f02d
更新SiestaParser以改进警告信息,明确未找到目标字符串时使用SIESTA默认值
AsymmetryChou Jul 12, 2025
8f0264f
增强SiestaParser以检查SIESTA输入文件中WriteKbands设置,确保其为true以提取k点和特征值
AsymmetryChou Jul 12, 2025
235f8b6
增强SiestaParser findcontent函数以支持不区分大小写的字符串搜索
AsymmetryChou Jul 12, 2025
eaeba14
增强SiestaParser以支持不区分大小写的字符串搜索,并改进WriteKbands设置检查
AsymmetryChou Jul 12, 2025
5835a1a
增强SiestaParser以检查SIESTA输入文件中的WriteKbands设置,并确保SystemLabel被定义
AsymmetryChou Jul 12, 2025
e5ea14b
Add support for isolated system's enegy level plot in Bandplot
AsymmetryChou Jul 12, 2025
8c81a6e
Merge branch 'main' into io_siesta
AsymmetryChou Nov 2, 2025
b0b05d1
更新 .gitignore,添加测试文件和示例文件的忽略规则
AsymmetryChou Nov 2, 2025
6d79f49
add total energy extractor and its unittest (to do: check func in `md…
AsymmetryChou Nov 2, 2025
1dbac38
rename abacus as abacus_scf
AsymmetryChou Nov 3, 2025
c777c64
add abacus_md test data
AsymmetryChou Nov 3, 2025
b73b5b6
update test_abacus_energy.py with accuracy float64 and correct path
AsymmetryChou Nov 3, 2025
ce32e8b
add abacus_relax test data
AsymmetryChou Nov 3, 2025
5311322
将能量返回类型从 float32 更改为 float64,以提高精度
AsymmetryChou Nov 3, 2025
a8eecfb
更新 AbacusParser 以支持 MD 和 RELAX 模式下的能量提取,并添加相应的测试用例
AsymmetryChou Nov 3, 2025
ac56166
在能量提取函数中添加关于 ABACUS 版本 3.9.0 的验证说明
AsymmetryChou Nov 3, 2025
9b74469
remove un-necessary nframe and change _extract_energy_from_log as sta…
AsymmetryChou Nov 5, 2025
7e5e07e
change total_energy.npy as .dat format
AsymmetryChou Nov 16, 2025
1ac08b0
添加对未收敛帧的支持,包括在能量提取和写入过程中记录未收敛帧的索引
AsymmetryChou Nov 16, 2025
d5573f7
更新 AbacusParser 文档,详细说明能量提取返回值和未收敛帧的处理逻辑
AsymmetryChou Nov 16, 2025
dd216ce
Refactor: add _get_output_dir for general case
AsymmetryChou Nov 18, 2025
8852590
replace subprocee with os.system for safer operation
AsymmetryChou Nov 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,10 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


# test files
test/data/siesta/siesta_out/*
example/siesta_io/siesta_io.ipynb
CLAUDE.md
playground/*
6 changes: 6 additions & 0 deletions dftio/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ def main_parser() -> argparse.ArgumentParser:
default=0,
help="The initial band index for eigenvalues to save.(0-band_index_min) bands will be ignored!"
)
parser_parse.add_argument(
"-energy",
"--energy",
action="store_true",
help="Whether to parse the total energy (Etot) from DFT output",
)

parser_band = subparsers.add_parser(
"band",
Expand Down
1 change: 1 addition & 0 deletions dftio/data/_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@

PER_ATOM_ENERGY_KEY: Final[str] = "atomic_energy"
TOTAL_ENERGY_KEY: Final[str] = "total_energy"
UNCONVERGED_FRAME_INDICES_KEY: Final[str] = "unconverged_frames"
FORCE_KEY: Final[str] = "forces"
PARTIAL_FORCE_KEY: Final[str] = "partial_forces"
STRESS_KEY: Final[str] = "stress"
Expand Down
157 changes: 157 additions & 0 deletions dftio/io/abacus/abacus_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from dftio.data import _keys
from dftio.register import Register
import lmdb
import logging

log = logging.getLogger(__name__)
import pickle
import shutil

Expand Down Expand Up @@ -414,6 +417,160 @@ def transform(self, mat, l_lefts, l_rights):

return block_lefts @ mat @ block_rights.T

@staticmethod
def _extract_energy_from_log(loglines, mode, dump_freq=1):
"""
Extract total energy from ABACUS log file.

Note that this extractor is only validated for ABACUS versions 3.9.0.

For SCF/NSCF, extracts the final total energy.
For MD, extracts energies at each dump interval, filtering out unconverged frames.
For RELAX, extracts energies for the final converged structure.

Parameters
----------
loglines : list of str
Lines from the log file
mode : str
Calculation mode (scf, nscf, md, relax)
nframes : int, optional
Expected number of frames (for MD/relax validation)
dump_freq : int, optional
Dump frequency for MD calculations (default: 1)

Returns
-------
tuple or None
Tuple of (energy_array, unconverged_indices) where:
- energy_array: np.ndarray of energies in eV (filtered, converged only for MD)
- unconverged_indices: list of frame indices that did not converge
Returns None if extraction fails
"""
energy = []

if mode in ["scf", "nscf"]:
# For SCF/NSCF, search from the end for the final energy
for line in reversed(loglines):
if "final etot is" in line:
# LTS version format: "final etot is <value> eV"
Etot = float(line.split()[-2])
return (np.array([Etot], dtype=np.float64), [])
elif "TOTAL ENERGY" in line:
# Develop version format
Etot = float(line.split()[-2])
return (np.array([Etot], dtype=np.float64), [])
elif "convergence has NOT been achieved!" in line or \
"convergence has not been achieved" in line:
# SCF did not converge
return None
return None

elif mode == "md":
# For MD, extract all energies at dump intervals
nenergy = 0
for line in loglines:
if "final etot is" in line:
if nenergy % dump_freq == 0:
energy.append(float(line.split()[-2]))
nenergy += 1
elif "!! convergence has not been achieved" in line:
if nenergy % dump_freq == 0:
energy.append(np.nan)
nenergy += 1

if len(energy) == 0:
return None

# Filter out unconverged frames (NaN values) and track their indices
energy = np.array(energy, dtype=np.float64)
valid_mask = ~np.isnan(energy)
if not valid_mask.any():
return None

# Get indices of unconverged frames
unconverged_indices = np.where(~valid_mask)[0].tolist()

# Filter to keep only converged frames
energy = energy[valid_mask]
return (energy, unconverged_indices)

elif mode == "relax":
relax_success = False
# For RELAX, extract energy for the converged structures
for line in loglines:
if "Relaxation is converged!" in line:
relax_success = True
if "!FINAL_ETOT_IS" in line:
energy.append(float(line.split()[-2]))

if relax_success and len(energy) > 0:
return (np.array(energy, dtype=np.float64), [])
else:
# raise error when relaxation did not converge or no energies found
raise ValueError("Relaxation did not converge or no energies found.")

else:
return None

def get_etot(self, idx):
"""
Extract total energy (Etot) from ABACUS output.

Parameters
----------
idx : int
Index of the structure/trajectory

Returns
-------
dict
Dictionary with:
- _keys.TOTAL_ENERGY_KEY: energy array (shape: [1,] for SCF/NSCF, [nframes,] for MD/RELAX)
- _keys.UNCONVERGED_FRAME_INDICES_KEY: list of unconverged frame indices (empty if all converged)
Energy values are in eV.
Returns None if energy extraction fails or structures are unconverged.
"""
mode = self.get_mode(idx)
logfile = "running_" + mode + ".log"
logpath = os.path.join(self.raw_datas[idx], "OUT.ABACUS", logfile)

# Check if log file exists
if not os.path.exists(logpath):
raise FileNotFoundError(f"Log file {logpath} does not exist.")

# Read log file
with open(logpath, 'r') as f:
loglines = f.readlines()

# Get dump frequency for MD mode
dump_freq = 1
if mode == "md":
input_path = os.path.join(self.raw_datas[idx], "INPUT")
if os.path.exists(input_path):
with open(input_path, 'r') as f:
for line in f:
if len(line) > 0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]:
dump_freq = int(line.split()[1])
break

# Extract energy
result = self._extract_energy_from_log(loglines, mode, dump_freq)

if result is None:
return None

energy, unconverged_indices = result

# Log warning if there are unconverged frames
if len(unconverged_indices) > 0:
log.warning(f"Energy extraction: frames {unconverged_indices} did not converge")

return {
_keys.TOTAL_ENERGY_KEY: energy,
_keys.UNCONVERGED_FRAME_INDICES_KEY: unconverged_indices
}

def get_abs_h0_folders(self, h0_root):
# Build a map of all directory names to their full paths to avoid repeated os.walk calls
folder_path_map = {}
Expand Down
80 changes: 69 additions & 11 deletions dftio/io/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,13 @@ def check_blocks(self, idx, hamiltonian: bool=False, overlap: bool=False, densit

return True

def write(self, idx, outroot, format, eigenvalue, hamiltonian, overlap, density_matrix, band_index_min, **kwargs):
def write(self, idx, outroot, format, eigenvalue, hamiltonian, overlap, density_matrix, band_index_min, energy=False, **kwargs):
if format == "hdf5":
self.write_hdf5(idx=idx, outroot=outroot, eigenvalue=eigenvalue, hamiltonian=hamiltonian, overlap=overlap, density_matrix=density_matrix,band_index_min=band_index_min)
self.write_hdf5(idx=idx, outroot=outroot, eigenvalue=eigenvalue, hamiltonian=hamiltonian, overlap=overlap, density_matrix=density_matrix,band_index_min=band_index_min, energy=energy)
elif format in ["dat", "ase"]:
self.write_dat(idx=idx, outroot=outroot, fmt=format, eigenvalue=eigenvalue, hamiltonian=hamiltonian, overlap=overlap, density_matrix=density_matrix,band_index_min=band_index_min)
self.write_dat(idx=idx, outroot=outroot, fmt=format, eigenvalue=eigenvalue, hamiltonian=hamiltonian, overlap=overlap, density_matrix=density_matrix,band_index_min=band_index_min, energy=energy)
elif format == "lmdb":
self.write_lmdb(idx=idx, outroot=outroot, eigenvalue=eigenvalue, hamiltonian=hamiltonian, overlap=overlap, density_matrix=density_matrix,band_index_min=band_index_min)
self.write_lmdb(idx=idx, outroot=outroot, eigenvalue=eigenvalue, hamiltonian=hamiltonian, overlap=overlap, density_matrix=density_matrix,band_index_min=band_index_min, energy=energy)
else:
raise NotImplementedError(f"Format: {format} is not implemented!")

Expand All @@ -242,10 +242,10 @@ def write_struct(self, structure, out_dir, fmt='dat'):
else:
raise NotImplementedError(f"Format: {fmt} is not implemented!")

def write_dat(self, idx, outroot, fmt='dat', eigenvalue=False, hamiltonian=False, overlap=False, density_matrix=False, band_index_min=0):
def write_dat(self, idx, outroot, fmt='dat', eigenvalue=False, hamiltonian=False, overlap=False, density_matrix=False, band_index_min=0, energy=False):
# write structure
os.makedirs(outroot, exist_ok=True)

structure = self.get_structure(idx)

out_dir = os.path.join(outroot, self.formula(idx=idx)+".{}".format(idx))
Expand All @@ -255,7 +255,7 @@ def write_dat(self, idx, outroot, fmt='dat', eigenvalue=False, hamiltonian=False
# np.savetxt(os.path.join(out_dir, "positions.dat"), structure[_keys.POSITIONS_KEY].reshape(-1, 3))
# np.savetxt(os.path.join(out_dir, "atomic_numbers.dat"), structure[_keys.ATOMIC_NUMBERS_KEY], fmt='%d')
# np.savetxt(os.path.join(out_dir, "pbc.dat"), structure[_keys.PBC_KEY])

# write structure
self.write_struct(structure, out_dir, fmt=fmt)

Expand All @@ -266,6 +266,26 @@ def write_dat(self, idx, outroot, fmt='dat', eigenvalue=False, hamiltonian=False
np.save(os.path.join(out_dir, "kpoints.npy"), eigstatus[_keys.KPOINT_KEY])
np.save(os.path.join(out_dir, "eigenvalues.npy"), eigstatus[_keys.ENERGY_EIGENVALUE_KEY])

# write energy
if energy:
if hasattr(self, 'get_etot'):
energy_data = self.get_etot(idx)
if energy_data is not None:
np.savetxt(os.path.join(out_dir, "total_energy.dat"), energy_data[_keys.TOTAL_ENERGY_KEY])

# Write unconverged frame indices if present
if _keys.UNCONVERGED_FRAME_INDICES_KEY in energy_data:
unconverged_indices = energy_data[_keys.UNCONVERGED_FRAME_INDICES_KEY]
if len(unconverged_indices) > 0:
with open(os.path.join(out_dir, "unconverged_frames.dat"), 'w') as f:
f.write("# Frame indices that did not converge during MD/RELAX\n")
for idx_frame in unconverged_indices:
f.write(f"{idx_frame}\n")
else:
log.warning(f"Failed to extract energy for structure {idx}")
else:
log.warning(f"Parser does not implement get_etot method")

# write blocks
if any([hamiltonian is not None, overlap is not None, density_matrix is not None]) and any([hamiltonian, overlap, density_matrix]):
with open(os.path.join(out_dir, "basis.dat"), 'w') as f:
Expand All @@ -279,34 +299,55 @@ def write_dat(self, idx, outroot, fmt='dat', eigenvalue=False, hamiltonian=False
for key_str, value in ham[i].items():
default_group.create_dataset(key_str, data=value)
del ham

if overlap:
with h5py.File(os.path.join(out_dir, "overlaps.h5"), 'w') as fid:
for i in range(len(ovp)):
default_group = fid.create_group(str(i))
for key_str, value in ovp[i].items():
default_group.create_dataset(key_str, data=value)
del ovp

if density_matrix:
with h5py.File(os.path.join(out_dir, "density_matrices.h5"), 'w') as fid:
for i in range(len(dm)):
default_group = fid.create_group(str(i))
for key_str, value in dm[i].items():
default_group.create_dataset(key_str, data=value)

del dm

return True

def write_lmdb(self, idx, outroot, eigenvalue: bool=False, hamiltonian: bool=False, overlap: bool=False, density_matrix: bool=False,band_index_min=0):
def write_lmdb(self, idx, outroot, eigenvalue: bool=False, hamiltonian: bool=False, overlap: bool=False, density_matrix: bool=False,band_index_min=0, energy: bool=False):
os.makedirs(outroot, exist_ok=True)
out_dir = os.path.join(outroot, "data.{}.lmdb".format(os.getpid()))
structure = self.get_structure(idx)
if any([hamiltonian, overlap, density_matrix]):
ham, ovp, dm = self.get_blocks(idx, hamiltonian, overlap, density_matrix)
if eigenvalue:
eigstatus = self.get_eigenvalue(idx=idx, band_index_min=band_index_min)
if energy:
if hasattr(self, 'get_etot'):
energy_data = self.get_etot(idx)
else:
energy_data = None
log.warning(f"Parser does not implement get_etot method")

# Build frame index mapping for energy data
# If there are unconverged frames, energy array will be shorter than n_frames
energy_frame_mapping = None
if energy and energy_data is not None:
unconverged_indices = energy_data.get(_keys.UNCONVERGED_FRAME_INDICES_KEY, [])
if len(unconverged_indices) > 0:
# Build mapping: structure_frame_idx -> energy_array_idx
energy_frame_mapping = {}
energy_idx = 0
n_frames_total = structure[_keys.POSITIONS_KEY].shape[0]
for frame_idx in range(n_frames_total):
if frame_idx not in unconverged_indices:
energy_frame_mapping[frame_idx] = energy_idx
energy_idx += 1

n_frames = structure[_keys.POSITIONS_KEY].shape[0]
lmdb_env = lmdb.open(out_dir, map_size=1048576000000, lock=True)
Expand All @@ -321,6 +362,23 @@ def write_lmdb(self, idx, outroot, eigenvalue: bool=False, hamiltonian: bool=Fal
data_dict[_keys.ENERGY_EIGENVALUE_KEY] = eigstatus[_keys.ENERGY_EIGENVALUE_KEY][nf]
data_dict[_keys.KPOINT_KEY] = eigstatus[_keys.KPOINT_KEY]

if energy and energy_data is not None:
# For single structure (SCF/NSCF), energy_data has shape [1,]
# For trajectories (MD/RELAX), energy_data has shape [nframes,] or less if unconverged
if energy_data[_keys.TOTAL_ENERGY_KEY].shape[0] == 1:
# Single structure case
data_dict[_keys.TOTAL_ENERGY_KEY] = energy_data[_keys.TOTAL_ENERGY_KEY][0]
else:
# Trajectory case - use mapping if unconverged frames exist
if energy_frame_mapping is not None:
if nf in energy_frame_mapping:
energy_idx = energy_frame_mapping[nf]
data_dict[_keys.TOTAL_ENERGY_KEY] = energy_data[_keys.TOTAL_ENERGY_KEY][energy_idx]
# else: skip energy for unconverged frames (don't add to data_dict)
else:
# No unconverged frames, direct indexing
data_dict[_keys.TOTAL_ENERGY_KEY] = energy_data[_keys.TOTAL_ENERGY_KEY][nf]

if hamiltonian:
data_dict["hamiltonian"] = ham[nf]
if overlap:
Expand Down
Loading