Source code for iodata.formats.qchemlog

# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Q-Chem Log file format.

This module will load Q-Chem log file into IODATA.
"""

import numpy as np
from numpy.typing import NDArray

from ..docstrings import document_load_one
from ..orbitals import MolecularOrbitals
from ..periodic import sym2num
from ..utils import LineIterator, LoadError, angstrom, calmol, kcalmol, kjmol, strtobool

__all__ = ()

PATTERNS = ["*.qchemlog"]



[docs]
@document_load_one(
    "qchemlog",
    [
        "atcoords",
        "atmasses",
        "atnums",
        "energy",
        "g_rot",
        "mo",
        "lot",
        "obasis_name",
        "run_type",
        "extra",
    ],
    ["athessian"],
)
def load_one(lit: LineIterator) -> dict:
    """Do not edit this docstring. It will be overwritten."""
    data = load_qchemlog_low(lit)

    # add these labels if they are loaded
    result_labels = [
        "atcoords",
        "atmasses",
        "atnums",
        "energy",
        "g_rot",
        "run_type",
        "athessian",
        "lot",
        "obasis_name",
    ]
    result = {label: data[label] for label in result_labels if data.get(label) is not None}

    # mulliken charges
    if data.get("mulliken_charges") is not None:
        result["atcharges"] = {"mulliken": data["mulliken_charges"]}

    # build molecular orbitals
    # ------------------------
    if data["unrestricted"]:
        # unrestricted case
        mo_energies = np.concatenate(
            (data["mo_a_occ"], data["mo_a_vir"], data["mo_b_occ"], data["mo_b_vir"]), axis=0
        )
        mo_coeffs = np.full((data["nbasis"], data["norba"] + data["norbb"]), np.nan)
        mo_occs = np.zeros(mo_coeffs.shape[1])
        # number of alpha & beta electrons and number of alpha molecular orbitals
        na, nb = data["alpha_elec"], data["beta_elec"]
        na_mo = len(data["mo_a_occ"]) + len(data["mo_a_vir"])
        mo_occs[:na] = 1.0
        mo_occs[na_mo : na_mo + nb] = 1.0
        mo = MolecularOrbitals(
            "unrestricted", data["norba"], data["norbb"], mo_occs, mo_coeffs, mo_energies, None
        )
    else:
        # restricted case
        mo_energies = np.concatenate((data["mo_a_occ"], data["mo_a_vir"]), axis=0)
        mo_coeffs = np.full((data["nbasis"], data["norba"]), np.nan)
        mo_occs = np.zeros(mo_coeffs.shape[1])
        mo_occs[: data["alpha_elec"]] = 1.0
        mo_occs[: data["beta_elec"]] += 1.0
        mo = MolecularOrbitals(
            "restricted", data["norba"], data["norba"], mo_occs, mo_coeffs, mo_energies, None
        )
    result["mo"] = mo

    # moments
    moments = {}
    if "dipole" in data:
        moments[(1, "c")] = data["dipole"]
    if "quadrupole" in data:
        # Convert to alphabetical ordering: xx, xy, xz, yy, yz, zz
        moments[(2, "c")] = data["quadrupole"][[0, 1, 3, 2, 4, 5]]
    # check total dipole parsed
    if (
        "dipole_tol" in data
        and "dipole" in data
        and abs(np.linalg.norm(data["dipole"]) - data["dipole_tol"]) > 1.0e-4
    ):
        raise LoadError("Inconsistent dipole and dipole_tol", lit.filename)
    if moments:
        result["moments"] = moments

    # extra dictionary
    # ----------------
    # add labels to extra dictionary if they are loaded
    extra_labels = [
        "nuclear_repulsion_energy",
        "polarizability_tensor",
        "imaginary_freq",
        "vib_energy",
        "eda2",
        "frags",
    ]

    extra = {label: data[label] for label in extra_labels if data.get(label) is not None}
    # if present, convert vibrational energy from kcal/mol to "atomic units + K"
    if "vib_energy" in extra:
        extra["vib_energy"] *= kcalmol

    # if present, convert enthalpy terms from kcal/mol to "atomic units + K"
    if "enthalpy_dict" in data:
        extra["enthalpy_dict"] = {k: v * kcalmol for k, v in data["enthalpy_dict"].items()}

    # if present, convert entropy terms from cal/mol.K to "atomic units + Kalvin"
    if "entropy_dict" in data:
        extra["entropy_dict"] = {k: v * calmol for k, v in data["entropy_dict"].items()}

    # if present, convert eda terms from kj/mol to atomic units
    if "eda2" in data:
        extra["eda2"] = {k: v * kjmol for k, v in data["eda2"].items()}

    result["extra"] = extra
    return result




[docs]
def load_qchemlog_low(lit: LineIterator) -> dict:
    """Load the information from Q-Chem log file."""
    data = {}
    while True:
        try:
            line = next(lit).strip()
        except StopIteration:
            # Read until the end of the file.
            break

        # job specifications
        if line.startswith("$rem") and "run_type" not in data:
            data.update(_helper_rem_job(lit))

        # standard nuclear orientation (make sure multi-step jobs does not over-write this)
        elif line.startswith("Standard Nuclear Orientation (Angstroms)") and "atcoords" not in data:
            data.update(_helper_structure(lit))

        # standard nuclear orientation for fragments in EDA jobs
        elif line.startswith("Standard Nuclear Orientation (Angstroms)"):
            if "frags" not in data:
                data["frags"] = []
            data["frags"].append(_helper_structure(lit))

        # energy (the last energy in a multi-step job)
        elif line.startswith("Total energy in the final basis set"):
            data["energy"] = float(line.split()[-1])
        elif line.startswith("the SCF tolerance is set"):
            data["energy"] = _helper_energy(lit)

        # orbital energies (the last orbital energies in a multi-step job)
        elif line.startswith("Orbital Energies (a.u.)") and not data["unrestricted"]:
            result = _helper_orbital_energies_restricted(lit)
            data["mo_a_occ"], data["mo_a_vir"] = result
            # compute number of alpha
            data["norba"] = len(data["mo_a_occ"]) + len(data["mo_a_vir"])

        # orbital energies (the last orbital energies in a multi-step job)
        elif line.startswith("Orbital Energies (a.u.)") and data["unrestricted"]:
            data.update(_helper_orbital_energies_unrestricted(lit))
            # compute number of alpha and beta molecular orbitals
            data["norba"] = len(data["mo_a_occ"]) + len(data["mo_a_vir"])
            data["norbb"] = len(data["mo_b_occ"]) + len(data["mo_b_vir"])

        # mulliken charges (the last charges in a multi-step job)
        elif line.startswith("Ground-State Mulliken Net Atomic Charges"):
            data["mulliken_charges"] = _helper_mulliken(lit)

        # cartesian multipole moments (the last mutipole moments in a multi-step job)
        elif line.startswith("Cartesian Multipole Moments"):
            data["dipole"], data["quadrupole"], data["dipole_tol"] = _helper_dipole_moments(lit)

        # polarizability matrix
        elif line.startswith("Polarizability Matrix (a.u.)"):
            data["polarizability_tensor"] = _helper_polar(lit)

        # hessian matrix
        elif line.startswith("Hessian of the SCF Energy"):
            data["athessian"] = _helper_hessian(lit, len(data["atnums"]))

        # vibrational analysis
        elif line.startswith("**                       VIBRATIONAL ANALYSIS"):
            data["imaginary_freq"], data["vib_energy"], data["atmasses"] = _helper_vibrational(lit)

        # rotational symmetry number
        elif line.startswith("Rotational Symmetry Number"):
            data["g_rot"] = int(line.split()[-1])
            data["enthalpy_dict"], data["entropy_dict"] = _helper_thermo(lit)

        # energy decomposition analysis 2 (EDA2)
        elif line.startswith("Results of EDA2"):
            eda2 = _helper_eda2(lit)
            # add fragment energies to frags
            energies = eda2.pop("energies")
            for index, energy in enumerate(energies):
                data["frags"][index]["energy"] = energy
            data["eda2"] = eda2

    return data



def _helper_rem_job(lit: LineIterator) -> dict:
    """Load job specifications from Q-Chem output file format."""
    data_rem = {}
    for line in lit:
        words = line.strip().lower().split(maxsplit=1)
        if words[0] == "$end":
            break
        # parse job type section; some sections might not be available
        if words[0] == "jobtype":
            data_rem["run_type"] = words[1]
        elif words[0] == "method":
            data_rem["lot"] = words[1]
        elif words[0] == "unrestricted":
            data_rem["unrestricted"] = strtobool(words[1])
        elif words[0] == "basis":
            data_rem["obasis_name"] = words[1]
        elif words[0] == "symmetry":
            data_rem["symm"] = strtobool(words[1])
    return data_rem


def _helper_structure(lit: LineIterator):
    """Load electron information from Q-Chem output file format."""
    next(lit)
    next(lit)
    # atomic numbers and atomic coordinates (converted to A.U)
    atsymbols = []
    atcoords = []
    for line in lit:
        if line.strip().startswith("-------------"):
            break
        atsymbols.append(line.split()[1])
        atcoords.append([float(i) for i in line.split()[2:]])
    subdata = {
        "atnums": np.array([sym2num[i] for i in atsymbols]),
        "atcoords": np.array(atcoords) * angstrom,
        "nuclear_repulsion_energy": float(next(lit).split()[-2]),
    }
    # number of alpha and beta elections
    line = next(lit).split()
    subdata["alpha_elec"] = int(line[2])
    subdata["beta_elec"] = int(line[5])
    # number of basis functions
    next(lit)
    subdata["nbasis"] = int(next(lit).split()[-3])

    return subdata


def _helper_energy(lit: LineIterator):
    for line in lit:
        if line.strip().endswith("Convergence criterion met"):
            energy = float(line.split()[1])
            break
    return energy


def _helper_orbital_energies_restricted(lit: LineIterator) -> tuple:
    """Load occupied and virtual orbital energies for restricted calculation."""
    # alpha occupied MOs
    mo_a_occupied = _helper_section("-- Occupied --", "-- Virtual --", lit, backward=True)
    # alpha unoccupied MOs
    mo_a_unoccupied = _helper_section("-- Virtual --", "-" * 62, lit, backward=False)
    return mo_a_occupied, mo_a_unoccupied


def _helper_orbital_energies_unrestricted(lit: LineIterator) -> tuple:
    """Load occupied and virtual orbital energies for unrestricted calculation."""
    subdata = {}
    # alpha occupied MOs
    subdata["mo_a_occ"] = _helper_section("-- Occupied --", "-- Virtual --", lit, backward=True)
    # alpha unoccupied MOs
    subdata["mo_a_vir"] = _helper_section("-- Virtual --", "", lit, backward=False)
    # beta occupied MOs
    subdata["mo_b_occ"] = _helper_section("-- Occupied --", "-- Virtual --", lit, backward=True)
    # beta unoccupied MOs
    subdata["mo_b_vir"] = _helper_section("-- Virtual --", "-" * 62, lit, backward=False)
    return subdata


def _helper_section(
    start: str, end: str, lit: LineIterator, backward: bool = False
) -> NDArray[float]:
    """Load data between starting and ending strings."""
    data = []
    for line in lit:
        line_str = line.strip()
        if line_str == start:
            break
    for line in lit:
        if line.strip() != end:
            data.extend(line.split())
        else:
            break
    if backward:
        lit.back(line)
    return np.array(data, dtype=float)


def _helper_mulliken(lit: LineIterator) -> NDArray[float]:
    """Load mulliken net atomic charges."""
    # skip line between 'Ground-State Mulliken Net Atomic Charges' line & atomic charge entries
    while True:
        line = next(lit).strip()
        if line.startswith("------"):
            break
    # store atomic charges until enf of table is reached
    mulliken_charges = []
    for line in lit:
        if line.strip().startswith("--------"):
            break
        mulliken_charges.append(line.split()[2])
    return np.array(mulliken_charges, dtype=float)


def _helper_dipole_moments(lit: LineIterator) -> tuple:
    """Load cartesian multiple moments."""
    for line in lit:
        if line.strip().startswith("Dipole Moment (Debye)"):
            break
    # parse dipole moment (only load the float numbers)
    dipole = next(lit).split()
    dipole = np.array([float(dipole) for idx, dipole in enumerate(dipole) if idx % 2 != 0])
    # parse total dipole moment
    dipole_tol = float(next(lit).split()[-1])
    # parse quadrupole moment (xx, xy, yy, xz, yz, zz)
    next(lit)
    quadrupole = next(lit).split()
    quadrupole.extend(next(lit).split())
    quadrupole = np.array([float(dipole) for idx, dipole in enumerate(quadrupole) if idx % 2 != 0])
    return dipole, quadrupole, dipole_tol


def _helper_polar(lit: LineIterator) -> NDArray[float]:
    """Load polarizability matrix."""
    next(lit)
    polarizability_tensor = []
    for line in lit:
        if line.strip().startswith("Calculating analytic Hessian"):
            break
        polarizability_tensor.append(line.split()[1:])
    return np.array(polarizability_tensor, dtype=float)


def _helper_hessian(lit: LineIterator, natom: int) -> NDArray[float]:
    """Load hessian matrix."""
    # hessian in Cartesian coordinates, shape(3 * natom, 3 * natom)
    col_idx = [int(i) for i in next(lit).split()]
    hessian = np.empty((natom * 3, natom * 3), dtype=object)
    for line in lit:
        if line.strip().startswith("****************"):
            break
        if line.startswith("            "):
            col_idx = [int(i) for i in line.split()]
        else:
            line_list = line.split()
            row_idx = int(line_list[0]) - 1
            hessian[row_idx, col_idx[0] - 1 : col_idx[-1]] = line_list[1:]
    return hessian.astype(float)


def _helper_vibrational(lit: LineIterator) -> tuple:
    """Load vibrational analysis."""
    for line in lit:
        if line.strip().startswith("This Molecule has"):
            break
    imaginary_freq = int(line.split()[3])
    vib_energy = float(next(lit).split()[-2])
    next(lit)
    atmasses = []
    for line in lit:
        if line.strip().startswith("Molecular Mass:"):
            break
        atmasses.append(line.split()[-1])
    atmasses = np.array(atmasses, dtype=float)
    return imaginary_freq, vib_energy, atmasses


def _helper_thermo(lit: LineIterator) -> tuple:
    """Load thermodynamics properties."""
    enthalpy_dict = {}
    entropy_dict = {}
    for line in lit:
        line_str = line.strip()
        if line_str.startswith("Archival summary:"):
            break
        if line_str.startswith("Translational Enthalpy"):
            enthalpy_dict["trans_enthalpy"] = float(line_str.split()[-2])
        elif line_str.startswith("Rotational Enthalpy"):
            enthalpy_dict["rot_enthalpy"] = float(line_str.split()[-2])
        elif line_str.startswith("Vibrational Enthalpy"):
            enthalpy_dict["vib_enthalpy"] = float(line_str.split()[-2])
        elif line_str.startswith("Total Enthalpy"):
            enthalpy_dict["enthalpy_total"] = float(line_str.split()[-2])
        elif line_str.startswith("Translational Entropy"):
            entropy_dict["trans_entropy"] = float(line_str.split()[-2])
        elif line_str.startswith("Rotational Entropy"):
            entropy_dict["rot_entropy"] = float(line_str.split()[-2])
        elif line_str.startswith("Vibrational Entropy"):
            entropy_dict["vib_entropy"] = float(line_str.split()[-2])
        elif line_str.startswith("Total Entropy"):
            entropy_dict["entropy_total"] = float(line_str.split()[-2])
    return enthalpy_dict, entropy_dict


def _helper_eda2(lit: LineIterator) -> dict:
    """Load Energy decomposition information."""
    next(lit)
    eda2 = {}
    for line in lit:
        if line.strip().startswith("Fragment Energies"):
            for line_2 in lit:
                if line_2.strip().startswith("-----"):
                    break
                eda2.setdefault("energies", []).append(float(line_2.split()[-1]))

        if line.strip().startswith("Orthogonal Fragment Subspace Decomposition"):
            next(lit)
            for line_2 in lit:
                if line_2.strip().startswith("-----"):
                    break
                info = line_2.split()
                if info[0] in ["E_elec", "E_pauli", "E_disp"]:
                    eda2[info[0].lower()] = float(info[-1])

        elif line.strip().startswith("Terms summing to E_pauli"):
            next(lit)
            for line_2 in lit:
                if line_2.strip().startswith("-----"):
                    break
                info = line_2.split()
                if info[0] in ["E_kep_pauli", "E_disp_free_pauli"]:
                    eda2[info[0].lower()] = float(info[-1])

        elif line.strip().startswith("Classical Frozen Decomposition"):
            next(lit)
            for line_2 in lit:
                if line_2.strip().startswith("-----"):
                    break
                info = line_2.split()
                if info[0] in ["E_cls_elec", "E_cls_pauli"]:
                    eda2[info[0].lower()] = float(info[5])
                elif info[0].split("[")[1] == "E_mod_pauli":
                    eda2[info[0].split("[")[1].lower()] = float(info[5])

        elif line.strip().startswith("Simplified EDA Summary"):
            next(lit)
            for line_2 in lit:
                if line_2.strip().startswith("-----"):
                    break
                info = line_2.split()
                if info[0] in ["PREPARATION", "FROZEN", "DISPERSION", "POLARIZATION", "TOTAL"]:
                    eda2[info[0].lower()] = float(info[1])
                elif info[0].split("[")[-1] == "PAULI":
                    eda2[info[0].split("[")[-1].lower()] = float(info[1].split("]")[0])
                elif info[0] == "CHARGE":
                    eda2[info[0].lower() + " " + info[1].lower()] = float(info[2])

        elif line.strip().startswith("-------------------------------------------------------"):
            break

    return eda2