diff --git a/gpu4pyscf/grad/tdrhf.py b/gpu4pyscf/grad/tdrhf.py index 870e45a6e..a62363f6a 100644 --- a/gpu4pyscf/grad/tdrhf.py +++ b/gpu4pyscf/grad/tdrhf.py @@ -15,6 +15,7 @@ from functools import reduce import cupy as cp +import numpy as np from pyscf import lib, gto from gpu4pyscf.lib import logger from gpu4pyscf.grad import rhf as rhf_grad @@ -24,6 +25,45 @@ from pyscf import __config__ from gpu4pyscf.lib import utils from gpu4pyscf import tdscf +from scipy.optimize import linear_sum_assignment + +def match_and_reorder_mos(s12_ao, mo_coeff_b, mo_coeff, threshold=0.4): + if mo_coeff_b.shape != mo_coeff.shape: + raise ValueError("Mo coeff b and mo coeff must have the same shape.") + if s12_ao.shape[0] != s12_ao.shape[1] or s12_ao.shape[0] != mo_coeff_b.shape[0]: + raise ValueError("S12 ao must be a square matrix with the same shape as mo coeff b.") + mo_overlap_matrix = mo_coeff_b.T @ s12_ao @ mo_coeff + abs_mo_overlap = cp.abs(mo_overlap_matrix) + cost_matrix = -abs_mo_overlap + below_threshold_mask = abs_mo_overlap < threshold + infinity_cost = mo_coeff_b.shape[1] + 1 + cost_matrix[below_threshold_mask] = infinity_cost + + row_ind, col_ind = linear_sum_assignment(cost_matrix.get()) + + matching_indices = col_ind + + mo2_reordered = mo_coeff[:, matching_indices] + + final_chosen_overlaps = abs_mo_overlap[row_ind, col_ind] + invalid_matches_mask = final_chosen_overlaps < threshold + + if cp.any(invalid_matches_mask): + num_invalid = cp.sum(invalid_matches_mask) + print( + f"{num_invalid} orbital below threshold {threshold}." + "This may indicate significant changes in the properties of these orbitals between the two structures." + ) + invalid_indices = cp.where(invalid_matches_mask)[0] + for idx in invalid_indices: + print(f"Warning: reference coeff #{idx}'s best match is {final_chosen_overlaps[idx]:.4f} (below threshold {threshold})") + s_mo_new = mo_coeff_b.T @ s12_ao @ mo2_reordered + sign_array = cp.ones(s_mo_new.shape[-1]) + for i in range(s_mo_new.shape[-1]): + if s_mo_new[i,i] < 0.0: + mo2_reordered[:,i] *= -1 + sign_array[i] = -1 + return mo2_reordered, matching_indices, sign_array def grad_elec(td_grad, x_y, singlet=True, atmlst=None, verbose=logger.INFO): @@ -236,6 +276,38 @@ def as_scanner(td_grad, state=1): (TDSCF_GradScanner, td_grad.__class__), name) +def check_flip(mol0, mol1, mo_coeff0, mo_coeff1, xy0, xy1, state, nocc): + + nao = mol0.nao + nvir = nao - nocc + + s = gto.intor_cross('int1e_ovlp', mol0, mol1) + s = cp.asarray(s) + mo1_reordered, matching_indices, sign_array = \ + match_and_reorder_mos(s, mo_coeff0, mo_coeff1, threshold=0.4) + mo1_reordered = cp.asarray(mo1_reordered) + x0 = xy0[state-1][0] + nstates = len(xy0) + s_state_list = [] + for istate in range(nstates): + x1 = xy1[istate][0] + s_state = 0 + for i in range(nocc): + for a in range(nvir): + for j in range(nocc): + for b in range(nvir): + mo_coeff0_tmp = mo_coeff0[:,:nocc]*1.0 + mo_coeff1_tmp = mo1_reordered[:,:nocc]*1.0 + mo_coeff0_tmp[:,i] = mo_coeff0[:,a+nocc] + mo_coeff1_tmp[:,j] = mo1_reordered[:,b+nocc] + s_mo = mo_coeff0_tmp.T @ s @ mo_coeff1_tmp + s_state += cp.linalg.det(s_mo)\ + *x0[i,a]*x1[j,b]*2 + s_state_list.append(float(s_state)) + s_state_list = np.asarray(s_state_list) + return np.argmax(np.abs(s_state_list))+1, s_state_list + + class TDSCF_GradScanner(lib.GradScanner): _keys = {'e_tot'} @@ -245,12 +317,15 @@ def __init__(self, g, state): self.state = state def __call__(self, mol_or_geom, state=None, **kwargs): + mol0 = self.mol.copy() if isinstance(mol_or_geom, gto.MoleBase): assert mol_or_geom.__class__ == gto.Mole mol = mol_or_geom else: mol = self.mol.set_geom_(mol_or_geom, inplace=False) self.reset(mol) + mo_coeff0 = self.base._scf.mo_coeff + xy0 = self.base.xy if state is None: state = self.state @@ -261,17 +336,25 @@ def __call__(self, mol_or_geom, state=None, **kwargs): assert td_scanner.device == 'gpu' assert self.device == 'gpu' td_scanner(mol) - # TODO: Check root flip. Maybe avoid the initial guess in TDHF otherwise - # large error may be found in the excited states amplitudes - de = self.kernel(state=state, **kwargs) - e_tot = self.e_tot[state-1] + mo_coeff1 = self.base._scf.mo_coeff + xy1 = self.base.xy + mo_occ = cp.asarray(self.base._scf.mo_occ) + nocc = int((mo_occ > 0).sum()) + + flip_state, s_state_list = check_flip(mol0, mol, mo_coeff0, mo_coeff1, xy0, xy1, state, nocc) + if flip_state != state: + logger.warn(self, 'Root %d is flipped to %d', state, flip_state) + logger.warn(self, f'S_state_list = {s_state_list}') + self.state = flip_state + de = self.kernel(state=flip_state, **kwargs) + e_tot = self.e_tot[flip_state-1] return e_tot, de @property def converged(self): td_scanner = self.base return all((td_scanner._scf.converged, - td_scanner.converged[self.state])) + td_scanner.converged[self.state-1])) class Gradients(rhf_grad.GradientsBase): diff --git a/gpu4pyscf/grad/tests/test_tddft_opt.py b/gpu4pyscf/grad/tests/test_tddft_opt.py index a1faa5193..7b6a0cf19 100644 --- a/gpu4pyscf/grad/tests/test_tddft_opt.py +++ b/gpu4pyscf/grad/tests/test_tddft_opt.py @@ -23,18 +23,24 @@ from packaging import version atom = """ -O 0.0000000000 0.0000000000 0.0000000000 -H 0.0000000000 -0.7570000000 0.5870000000 -H 0.0000000000 0.7570000000 0.5870000000 +H 0.000000 0.923274 1.238289 +H 0.000000 -0.923274 1.238289 +H 0.000000 0.923274 -1.238289 +H 0.000000 -0.923274 -1.238289 +C 0.000000 0.000000 0.668188 +C 0.000000 0.000000 -0.668188 """ atom_near_conv = """ -O -0.000000 -0.000000 0.391241 -H -0.000000 -1.283134 0.391326 -H -0.000000 1.283134 0.391326 +H 0.000000 0.889149 1.396667 +H -0.000000 -0.889149 1.396667 +H -0.000000 0.889149 -1.396667 +H -0.000000 -0.889149 -1.396667 +C 0.000000 0.000000 0.696208 +C 0.000000 0.000000 -0.696208 """ -bas0 = "631g" +bas0 = "sto-3g" def setUpModule(): global mol, mol_near_conv @@ -55,7 +61,7 @@ def test_opt_rhf_tda(self): mf = scf.RHF(mol).to_gpu() mf.kernel() assert mf.converged - td = mf.TDA().set(nstates=3) + td = mf.TDA().set(nstates=5) td.kernel() # TODO: store CPU results for comparison @@ -69,19 +75,26 @@ def test_opt_rks_tda(self): mf = dft.RKS(mol, xc='b3lyp').to_gpu() mf.kernel() assert mf.converged - td = mf.TDA().set(nstates=3) + td = mf.TDA().set(nstates=5) td.kernel() # TODO: store CPU results for comparison - td_cpu = td.to_cpu() mol_gpu = optimize(td) - mol_cpu = optimize(td_cpu) - assert np.linalg.norm(mol_gpu.atom_coords() - mol_cpu.atom_coords()) < 1e-4 + + mff = dft.RKS(mol_gpu, xc='b3lyp').to_gpu() + mff.kernel() + assert mff.converged + tdf = mff.TDA().set(nstates=5) + tdf.kernel()[0] + assert bool(np.all(tdf.converged)) + excited_gradf = tdf.nuc_grad_method() + excited_gradf.kernel() + assert np.linalg.norm(excited_gradf.de) < 2.0e-4 def test_opt_rks_tda_pcm_1(self): mf = dft.RKS(mol_near_conv, xc='b3lyp').PCM().to_gpu() mf.kernel() assert mf.converged - td = mf.TDA(equilibrium_solvation=True).set(nstates=3) + td = mf.TDA(equilibrium_solvation=True).set(nstates=5) td.kernel() mol_gpu = optimize(td) @@ -99,7 +112,7 @@ def test_opt_rks_tda_pcm_2(self): mf = dft.RKS(mol_near_conv, xc='b3lyp').PCM().to_gpu() mf.kernel() assert mf.converged - td = mf.TDA(equilibrium_solvation=True).set(nstates=3) + td = mf.TDA(equilibrium_solvation=True).set(nstates=5) td.kernel() excited_grad = td.nuc_grad_method().as_scanner(state=1)