Source code for iodata.formats.cp2klog

# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""CP2K ATOM output file format."""

from typing import Union

import numpy as np
from numpy.typing import NDArray

from ..basis import HORTON2_CONVENTIONS, MolecularBasis, Shell, angmom_sti
from ..docstrings import document_load_one
from ..orbitals import MolecularOrbitals
from ..overlap import factorial2
from ..utils import LineIterator, LoadError

__all__ = []


PATTERNS = ["*.cp2k.out"]


CONVENTIONS = {
    (0, "c"): HORTON2_CONVENTIONS[(0, "c")],
    (1, "c"): HORTON2_CONVENTIONS[(1, "c")],
    (2, "p"): HORTON2_CONVENTIONS[(2, "p")],
    (3, "p"): HORTON2_CONVENTIONS[(3, "p")],
}


def _get_cp2k_norm_corrections(
    ell: int, alphas: Union[float, NDArray[float]]
) -> Union[float, NDArray[float]]:
    """Compute the corrections for the normalization of the basis functions.

    This correction is needed because the CP2K atom code works with a different
    type of normalization for the primitives. IOData assumes Gaussian primitives
    are always L2-normalized.

    Parameters
    ----------
    ell
        The angular momentum of the (pure) basis function. (s=0, p=1, ...)
    alphas
        The exponent or exponents of the Gaussian primitives for which the correction
        is to be computed.

    Returns
    -------
    corrections
        The scale factor for the expansion coefficients of the wavefunction in
        terms of primitive Gaussians. The inverse of this correction can be
        applied to the contraction coefficients.

    """
    expzet = 0.25 * (2 * ell + 3)
    prefac = np.sqrt(np.sqrt(np.pi) / 2.0 ** (ell + 2) * factorial2(2 * ell + 1))
    zeta = 2.0 * alphas
    return zeta**expzet / prefac


def _read_cp2k_contracted_obasis(lit: LineIterator) -> MolecularBasis:
    """Read a contracted basis set from an open CP2K ATOM output file.

    Parameters
    ----------
    lit
        The line iterator to read the data from.

    Returns
    -------
    The orbital basis.

    """
    shells = []
    while True:
        line = next(lit)
        if line[3:12] != "Functions":
            break
        angmom = angmom_sti(line[1:2])
        exponents = []
        coeffs = []
        for line in lit:
            if line[3:12] == "Functions" or line.startswith(" *******************"):
                break
            values = [float(w) for w in line.split()]
            # one exponent per line
            exponents.append(values[0])
            # many contraction coefficients per line, all corresponding to the
            # same primitive, so rows in coeffs
            coeffs.append(np.array(values[1:]) / _get_cp2k_norm_corrections(angmom, values[0]))
        # Push back the last line for the next iteration
        lit.back(line)
        # Build the shell
        exponents = np.array(exponents)
        coeffs = np.array(coeffs)
        kind = "c" if angmom < 2 else "p"
        shells.append(
            Shell(
                0, np.array([angmom] * coeffs.shape[1]), [kind] * coeffs.shape[1], exponents, coeffs
            )
        )

    return MolecularBasis(shells, CONVENTIONS, "L2")


def _read_cp2k_uncontracted_obasis(lit: LineIterator) -> MolecularBasis:
    """Read an uncontracted basis set from an open CP2K ATOM output file.

    Parameters
    ----------
    lit
        The line iterator to read the data from.

    Returns
    -------
    The orbital basis parameters read from the file.
    Can be used to initialize a GOBasis object.

    """
    # Load the relevant data from the file
    shells = []
    next(lit)
    while True:
        line = next(lit)
        if line[3:13] != "Exponents:":
            break
        angmom = angmom_sti(line[1:2])
        exponents = []
        coeffs = []
        while True:
            if line.strip() == "" or "*****" in line:
                break
            words = line.split()
            # read the exponent
            exponent = float(words[-1])
            exponents.append(exponent)
            coeffs.append(1.0 / _get_cp2k_norm_corrections(angmom, exponent))
            line = next(lit)
        # Build the shell
        kind = "c" if angmom < 2 else "p"
        for exponent, coeff in zip(exponents, coeffs):
            shells.append(
                Shell(0, np.array([angmom]), [kind], np.array([exponent]), np.array([[coeff]]))
            )

    return MolecularBasis(shells, CONVENTIONS, "L2")


def _read_cp2k_obasis(lit: LineIterator) -> dict:
    """Read atomic orbital basis set from a CP2K ATOM file object.

    Parameters
    ----------
    lit
        The line iterator to read the data from.

    Returns
    -------
    The atomic orbital basis data which can be used to initialize a ``GOBasis`` class.

    """
    next(lit)  # Skip empty line
    line = next(lit)  # Check for contracted versus uncontracted
    if line == (
        " ********************** Contracted Gaussian Type Orbitals **********************\n"
    ):
        return _read_cp2k_contracted_obasis(lit)
    if line == (
        " ********************* Uncontracted Gaussian Type Orbitals *********************\n"
    ):
        return _read_cp2k_uncontracted_obasis(lit)
    raise LoadError("Could not find basis set in CP2K ATOM output.", lit)


def _read_cp2k_occupations_energies(
    lit: LineIterator, restricted: bool
) -> list[tuple[int, int, float, float]]:
    """Read orbital occupation numbers and energies from a CP2K ATOM file object.

    Parameters
    ----------
    lit
        The line iterator to read the data from.
    restricted
        If ``True`` the wave-function is considered to be restricted. If
        ``False`` the unrestricted wave-function is assumed.

    Returns
    -------
    oe_alpha, oe_beta
        A list with orbital properties. Each element is a tuple with the
        following info: (angular_momentum l, spin component: 'alpha' or
        'beta', occupation number, orbital energy).

    """
    oe_alpha = []
    oe_beta = []
    empty = 0
    while empty < 2:
        line = next(lit)
        words = line.split()
        if not words:
            empty += 1
            continue
        empty = 0
        s = int(words[0])
        ell = int(words[2 - restricted])
        occ = float(words[3 - restricted])
        ener = float(words[4 - restricted])
        if restricted or words[1] == "alpha":
            oe_alpha.append((ell, s, occ, ener))
        else:
            oe_beta.append((ell, s, occ, ener))
    return oe_alpha, oe_beta


def _read_cp2k_orbital_coeffs(
    lit: LineIterator, oe: list[tuple[int, int, float, float]]
) -> dict[tuple[int, int], NDArray[float]]:
    """Read the expansion coefficients of the orbital from an open CP2K ATOM output.

    Parameters
    ----------
    lit
        The line iterator to read the data from.
    oe
        The orbital occupation numbers and energies read with
        ``_read_cp2k_occupations_energies``.

    Returns
    -------
    Dictionary in which keys are an (l, s) pair and values are arrays with orbital coefficients.

    """
    allcoeffs = {}
    next(lit)
    while len(allcoeffs) < len(oe):
        line = next(lit)
        assert line.startswith("    ORBITAL      L =")
        words = line.split()
        angmom = int(words[3])
        state = int(words[6])
        coeffs = []
        for line in lit:
            if line.strip() == "":
                break
            coeffs.append(float(line))
        allcoeffs[(angmom, state)] = np.array(coeffs)
    return allcoeffs


def _get_norb_nel(oe: list[tuple[int, int, float, float]]) -> tuple[int, float]:
    """Return number of orbitals and electrons.

    Parameters
    ----------
    oe
         The orbital occupation numbers and energies read with
         ``_read_cp2k_occupations_energies``.

    Returns
    -------
    norb
        The number of orbitals.
    nelec
        The number of electrons.

    """
    norb = 0
    nel = 0
    for row in oe:
        norb += 2 * row[0] + 1
        nel += row[2]
    return norb, nel


def _fill_orbitals(
    orb_coeffs: NDArray[float],
    orb_energies: NDArray[float],
    orb_occupations: NDArray[float],
    oe: list[tuple[int, int, float, float]],
    coeffs: dict[tuple[int, int], NDArray[float]],
    obasis: MolecularBasis,
    restricted: bool,
):
    """Fill in orbital coefficients, energies and occupation numbers.

    The data is entered int ``orb_coeffs``, ``orb_energies``, and ``orb_occupations``.

    Parameters
    ----------
    orb_coeffs
        The orbital coefficients. Will be written to.
    orb_energies
        The orbital energies. Will be written to.
    orb_occupations
        The orbital coefficients. Will be written to.
    oe
        The orbital occupation numbers and energies read with
        ``_read_cp2k_occupations_energies``.
    coeffs
        The orbital coefficients read with ``_read_cp2k_orbital_coeffs``.
    obasis
        The molecular basis set
    restricted
        Is wavefunction restricted or unrestricted?

    """
    # Find the offsets for each angular momentum
    offset = 0
    offsets = []
    ls = np.concatenate([shell.angmoms for shell in obasis.shells])
    for ell in sorted(set(ls)):
        offsets.append(offset)
        offset += (2 * ell + 1) * (ell == ls).sum()
    del offset

    # Fill in the coefficients
    iorb = 0
    for ell, state, occ, ener in oe:
        cs = coeffs.get((ell, state))
        stride = 2 * ell + 1
        for im in range(2 * ell + 1):
            orb_energies[iorb] = ener
            orb_occupations[iorb] = occ / float((restricted + 1) * (2 * ell + 1))
            for ic, c in enumerate(cs):
                orb_coeffs[offsets[ell] + stride * ic + im, iorb] = c
            iorb += 1


LOAD_ONE_NOTES = """

This function assumes that the following subsections are present in the CP2K
ATOM input file, in the section ``ATOM%PRINT``:

.. code-block:: text

  &PRINT
    &POTENTIAL
    &END POTENTIAL
    &BASIS_SET
    &END BASIS_SET
    &ORBITALS
    &END ORBITALS
  &END PRINT

"""


[docs] @document_load_one( "CP2K ATOM outupt", ["atcoords", "atcorenums", "atnums", "energy", "mo", "obasis"], [], {}, LOAD_ONE_NOTES, ) def load_one(lit: LineIterator) -> dict: """Do not edit this docstring. It will be overwritten.""" # Find the element number atnum = None for line in lit: if line.startswith(" Atomic Energy Calculation"): atnum = int(line[-5:-1]) break # Go to the all-electron basis set and read it. for line in lit: if line.startswith(" All Electron Basis"): break ae_obasis = _read_cp2k_obasis(lit) # Go to the pseudo basis set and read it. for line in lit: if line.startswith(" Pseudopotential Basis"): break pp_obasis = _read_cp2k_obasis(lit) # Search for (un)restricted restricted = None for line in lit: if line.startswith(" METHOD |"): if "U" in line: restricted = False break if "R" in line: restricted = True break # Search for the core charge (pseudo number) atcorenum = None for line in lit: if line.startswith(" Core Charge"): atcorenum = float(line[70:]) assert atcorenum == int(atcorenum) break if line.startswith(" Electronic structure"): atcorenum = float(atnum) break # Select the correct basis obasis = ae_obasis if atcorenum == atnum else pp_obasis # Search for energy for line in lit: if line.startswith(" Energy components [Hartree] Total Energy ::"): energy = float(line[60:]) break # Read orbital energies and occupations for line in lit: if line.startswith(" Orbital energies"): break next(lit) oe_alpha, oe_beta = _read_cp2k_occupations_energies(lit, restricted) # Read orbital expansion coefficients line = next(lit) if line not in [ " Atomic orbital expansion coefficients [Alpha]\n", " Atomic orbital expansion coefficients []\n", ]: raise LoadError("Could not find orbital coefficients in CP2K ATOM output.", lit) coeffs_alpha = _read_cp2k_orbital_coeffs(lit, oe_alpha) if not restricted: line = next(lit) if line != " Atomic orbital expansion coefficients [Beta]\n": raise LoadError("Could not find beta orbital coefficient in CP2K ATOM output.", lit) coeffs_beta = _read_cp2k_orbital_coeffs(lit, oe_beta) # Turn orbital data into a MolecularOrbitals object. if restricted: norb, nel = _get_norb_nel(oe_alpha) assert nel % 2 == 0 orb_alpha_coeffs = np.zeros([obasis.nbasis, norb]) orb_alpha_energies = np.zeros(norb) orb_alpha_occs = np.zeros(norb) _fill_orbitals( orb_alpha_coeffs, orb_alpha_energies, orb_alpha_occs, oe_alpha, coeffs_alpha, obasis, restricted, ) mo = MolecularOrbitals( "restricted", norb, norb, 2 * orb_alpha_occs, orb_alpha_coeffs, orb_alpha_energies ) else: norb_alpha = _get_norb_nel(oe_alpha)[0] norb_beta = _get_norb_nel(oe_beta)[0] assert norb_alpha == norb_beta orb_alpha_coeffs = np.zeros([obasis.nbasis, norb_alpha]) orb_alpha_energies = np.zeros(norb_alpha) orb_alpha_occs = np.zeros(norb_alpha) orb_beta_coeffs = np.zeros([obasis.nbasis, norb_beta]) orb_beta_energies = np.zeros(norb_beta) orb_beta_occs = np.zeros(norb_beta) _fill_orbitals( orb_alpha_coeffs, orb_alpha_energies, orb_alpha_occs, oe_alpha, coeffs_alpha, obasis, restricted, ) _fill_orbitals( orb_beta_coeffs, orb_beta_energies, orb_beta_occs, oe_beta, coeffs_beta, obasis, restricted, ) mo = MolecularOrbitals( "unrestricted", norb_alpha, norb_beta, np.concatenate((orb_alpha_occs, orb_beta_occs), axis=0), np.concatenate((orb_alpha_coeffs, orb_beta_coeffs), axis=1), np.concatenate((orb_alpha_energies, orb_beta_energies), axis=0), ) return { "obasis": obasis, "mo": mo, "atcoords": np.zeros((1, 3), float), "atnums": np.array([atnum]), "energy": energy, "atcorenums": np.array([atcorenum]), }