Source code for iodata.formats.molekel

# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Molekel file format.

This format is used by two programs:
`Molekel <http://ugovaretto.github.io/molekel/wiki/pmwiki.php/Main/HomePage.html>`_ and
`Orca <https://sites.google.com/site/orcainputlibrary/>`_.
"""

from typing import TextIO
from warnings import warn

import numpy as np
from numpy.typing import NDArray

from ..basis import MolecularBasis, Shell, angmom_its, angmom_sti
from ..convert import convert_conventions
from ..docstrings import document_dump_one, document_load_one
from ..iodata import IOData
from ..orbitals import MolecularOrbitals
from ..prepare import prepare_segmented, prepare_unrestricted_aminusb
from ..utils import DumpError, LineIterator, LoadError, LoadWarning, PrepareDumpError, angstrom
from .molden import CONVENTIONS, _fix_molden_from_buggy_codes

__all__ = ()


PATTERNS = ["*.mkl"]


def _load_helper_charge_spinpol(lit: LineIterator) -> list[int]:
    charge, spinmult = (int(word) for word in next(lit).split())
    spinpol = spinmult - 1
    return charge, spinpol


def _load_helper_charges(lit: LineIterator) -> dict:
    atcharges = []
    for line in lit:
        if line.strip() == "$END":
            break
        atcharges.append(float(line))

    return {"mulliken": np.array(atcharges)}


def _load_helper_atoms(lit: LineIterator) -> tuple[NDArray[int], NDArray[float]]:
    atnums = []
    atcoords = []
    for line in lit:
        if line.strip() == "$END":
            break
        words = line.split()
        atnums.append(int(words[0]))
        atcoords.append([float(words[1]), float(words[2]), float(words[3])])
    atnums = np.array(atnums, int)
    atcoords = np.array(atcoords) * angstrom
    return atnums, atcoords


def _load_helper_obasis(lit: LineIterator) -> MolecularBasis:
    shells = []
    icenter = 0
    while True:
        line = next(lit).strip()
        if line == "$END":
            break
        if line == "":
            continue
        if line == "$$":
            icenter += 1
            continue
        # Shell header, always assuming pure functions
        words = line.split()
        angmom = angmom_sti(words[1])
        nbasis_shell = int(words[0])
        if nbasis_shell == len(CONVENTIONS[(angmom, "c")]):
            kind = "c"
        elif nbasis_shell == len(CONVENTIONS[(angmom, "p")]):
            kind = "p"
        else:
            raise LoadError(
                f"Cannot interpret angmom={angmom} with nbasis_shell={nbasis_shell}.", lit
            )
        exponents = []
        coeffs = []
        for line in lit:
            words = line.split()
            if len(words) != 2:
                lit.back(line)
                break
            exponents.append(float(words[0]))
            coeffs.append([float(words[1])])
        shells.append(Shell(icenter, [angmom], [kind], np.array(exponents), np.array(coeffs)))
    return MolecularBasis(shells, CONVENTIONS, "L2")


def _load_helper_coeffs(
    lit: LineIterator, nbasis: int
) -> tuple[NDArray[float], NDArray[float], list]:
    coeffs = []
    energies = []
    irreps = []

    in_orb = 0
    for line in lit:
        if line.strip() == "$END":
            break
        if in_orb == 0:
            # read a1g line
            words = line.split()
            ncol = len(words)
            if ncol == 0:
                raise LoadError("Expect irrep, got empty line", line)
            irreps.extend(words)
            cols = [np.zeros((nbasis, 1), float) for _ in range(ncol)]
            in_orb = 1
        elif in_orb == 1:
            # read energies
            words = line.split()
            if len(words) != ncol:
                raise LoadError(f"Wrong number of energies: expected {ncol}, got {len(words)}", lit)
            energies.extend(float(word) for word in words)
            in_orb = 2
            ibasis = 0
        elif in_orb == 2:
            # read expansion coefficients
            words = line.split()
            if len(words) != ncol:
                raise LoadError(
                    f"Wrong number of coefficients: expected {ncol}, got {len(words)}", lit
                )
            for icol in range(ncol):
                cols[icol][ibasis] = float(words[icol])
            ibasis += 1
            if ibasis == nbasis:
                in_orb = 0
                coeffs.extend(cols)

    return np.hstack(coeffs), np.array(energies), irreps


def _load_helper_occ(lit: LineIterator) -> NDArray[float]:
    occs = []
    for line in lit:
        if line.strip() == "$END":
            break
        occs.extend(float(word) for word in line.split())
    return np.array(occs)



[docs]
@document_load_one(
    "Molekel",
    ["atcoords", "atnums", "mo", "obasis"],
    ["atcharges"],
    {
        "norm_threshold": "When the normalization of one of the orbitals exceeds "
        "norm_threshold, a correction is attempted or an error "
        "is raised when no suitable correction can be found."
    },
)
def load_one(lit: LineIterator, norm_threshold: float = 1e-4) -> dict:
    """Do not edit this docstring. It will be overwritten."""
    charge = None
    atnums = None
    atcoords = None
    obasis = None
    coeffsa = None
    energiesa = None
    occsa = None
    coeffsb = None
    energiesb = None
    occsb = None
    atcharges = None
    irrepsa = None
    irrepsb = None
    # Using a loop because we're not entirely sure if sections in an MKL file
    # have a fixed order.
    while True:
        try:
            line = next(lit).strip()
        except StopIteration:
            # There is no file-end marker we can use, so we only stop when
            # reaching the end of the file.
            break
        if line == "$CHAR_MULT":
            charge, spinpol = _load_helper_charge_spinpol(lit)
        elif line == "$CHARGES":
            atcharges = _load_helper_charges(lit)
        elif line == "$COORD":
            atnums, atcoords = _load_helper_atoms(lit)
        elif line == "$BASIS":
            obasis = _load_helper_obasis(lit)
        elif line == "$COEFF_ALPHA":
            coeffsa, energiesa, irrepsa = _load_helper_coeffs(lit, obasis.nbasis)
        elif line == "$OCC_ALPHA":
            occsa = _load_helper_occ(lit)
        elif line == "$COEFF_BETA":
            coeffsb, energiesb, irrepsb = _load_helper_coeffs(lit, obasis.nbasis)
        elif line == "$OCC_BETA":
            occsb = _load_helper_occ(lit)

    if charge is None:
        raise LoadError("Charge and spin polarization not found.", lit)
    if atcoords is None:
        raise LoadError("Coordinates not found.", lit)
    if obasis is None:
        raise LoadError("Orbital basis not found.", lit)
    if coeffsa is None:
        raise LoadError("Alpha orbitals not found.", lit)
    if occsa is None:
        raise LoadError("Alpha occupation numbers not found.", lit)

    nelec = atnums.sum() - charge
    if coeffsb is None:
        # restricted closed-shell
        if nelec % 2 != 0:
            raise LoadError("Odd number of electrons found in restricted case.", lit)
        if abs(occsa.sum() - nelec) > 1e-7:
            raise LoadError("Occupation numbers are inconsistent with number of electrons", lit)
        mo = MolecularOrbitals(
            "restricted", coeffsa.shape[1], coeffsa.shape[1], occsa, coeffsa, energiesa, irrepsa
        )
    else:
        if occsb is None:
            raise LoadError(
                "Beta occupation numbers not found in mkl file while beta orbitals were present.",
                lit,
            )
        nalpha = occsa.sum()
        nbeta = occsb.sum()
        if abs(spinpol - abs(nalpha - nbeta)) > 1e-7:
            warn(
                LoadWarning(
                    f"The spin polarization ({spinpol}) is inconsistent with the "
                    f"difference between alpha and beta occupation numbers ({nalpha} - {nbeta}). "
                    "The spin polarization will be rederived from the occupation numbers.",
                    lit,
                ),
                stacklevel=2,
            )
        if abs(nelec - (nalpha + nbeta)) > 1e-7:
            raise LoadError("Occupation numbers are inconsistent with number of electrons", lit)
        mo = MolecularOrbitals(
            "unrestricted",
            coeffsa.shape[1],
            coeffsb.shape[1],
            np.concatenate((occsa, occsb), axis=0),
            np.concatenate((coeffsa, coeffsb), axis=1),
            np.concatenate((energiesa, energiesb), axis=0),
            irrepsa + irrepsb,
        )

    result = {
        "atcoords": atcoords,
        "atnums": atnums,
        "obasis": obasis,
        "mo": mo,
        "atcharges": atcharges,
    }
    _fix_molden_from_buggy_codes(result, lit, norm_threshold)
    return result




[docs]
def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
    """Check the compatibility of the IOData object with the Molekel format.

    Parameters
    ----------
    data
        The IOData instance to be checked.
    allow_changes
        Whether conversion of the IOData object to a compatible form is allowed or not.
    filename
        The file to be written to, only used for error messages.

    Returns
    -------
    data
        The given ``IOData`` object or a shallow copy with some new attributes.

    Raises
    ------
    PrepareDumpError
        If the given ``IOData`` instance is not compatible with the WFN format.
    PrepareDumpWarning
        If the a converted ``IOData`` instance is returned.
    """
    if data.mo is None:
        raise PrepareDumpError("The Molekel format requires molecular orbitals.", filename)
    if data.obasis is None:
        raise PrepareDumpError("The Molekel format requires an orbital basis set.", filename)
    if data.mo.kind == "generalized":
        raise PrepareDumpError("Cannot write Molekel file with generalized orbitals.", filename)
    data = prepare_unrestricted_aminusb(data, allow_changes, filename, "Molekel")
    return prepare_segmented(data, False, allow_changes, filename, "Molekel")




[docs]
@document_dump_one("Molekel", ["atcoords", "atnums", "mo", "obasis"], ["atcharges"])
def dump_one(f: TextIO, data: IOData):
    """Do not edit this docstring. It will be overwritten."""
    # Header
    f.write("$MKL\n")
    f.write("#\n")
    f.write("# MKL format file produced by IOData\n")
    f.write("#\n")

    # CHAR_MUL
    f.write("$CHAR_MULT\n")
    f.write(f"  {data.charge:.0f} {data.spinpol + 1:.0f}\n")
    f.write("$END\n")
    f.write("\n")

    # COORD
    atcoords = data.atcoords / angstrom
    f.write("$COORD\n")
    for n, coord in zip(data.atnums, atcoords):
        f.write(f"   {n:d}   {coord[0]: ,.6f}  {coord[1]: ,.6f}  {coord[2]: ,.6f}\n")
    f.write("$END\n")
    f.write("\n")

    # CHARGES
    if "mulliken" in data.atcharges:
        f.write("$CHARGES\n")
        for charge in data.atcharges["mulliken"]:
            f.write(f"  {charge: ,.6f}\n")
        f.write("$END\n")
        f.write("\n")

    # BASIS
    f.write("$BASIS\n")
    iatom_last = 0
    for shell in data.obasis.shells:
        if shell.ncon != 1:
            raise RuntimeError("Generalized contractions not supported. Call prepare_dump first.")
        iatom_new = shell.icenter
        if iatom_new != iatom_last:
            f.write("$$\n")
        angmom = shell.angmoms[0]
        kind = shell.kinds[0]
        iatom_last = shell.icenter
        nbasis = len(CONVENTIONS[(angmom, kind)])
        f.write(f" {nbasis} {angmom_its(angmom).capitalize():1s} 1.00\n")
        for exponent, coeff in zip(shell.exponents, shell.coeffs[:, 0]):
            f.write(f"{exponent:20.10f} {coeff:17.10f}\n")
    f.write("\n")
    f.write("$END\n")
    f.write("\n")

    if data.mo.kind == "restricted":
        # COEFF_ALPHA
        f.write("$COEFF_ALPHA\n")
        _dump_helper_coeffs(f, data, spin="a")

        # OCC_ALPHA
        f.write("$OCC_ALPHA\n")
        _dump_helper_occ(f, data, spin="ab")

    # Not taking into account generalized.
    elif data.mo.kind == "unrestricted":
        # COEFF_ALPHA
        f.write("$COEFF_ALPHA\n")
        _dump_helper_coeffs(f, data, spin="a")

        # OCC_ALPHA
        f.write("$OCC_ALPHA\n")
        _dump_helper_occ(f, data, spin="a")
        f.write("\n")

        # COEFF_BETA
        f.write("$COEFF_BETA\n")
        _dump_helper_coeffs(f, data, spin="b")

        # OCC_BETA
        f.write("$OCC_BETA\n")
        _dump_helper_occ(f, data, spin="b")

    else:
        raise RuntimeError("Generalized orbitals are not support. Call prepare_dump first.")



# Defining help dumping functions
def _dump_helper_coeffs(f, data, spin=None):
    permutation, signs = convert_conventions(data.obasis, CONVENTIONS)
    if spin == "a":
        norb = data.mo.norba
        coeff = data.mo.coeffsa[permutation] * signs.reshape(-1, 1)
        ener = data.mo.energiesa
        irreps = data.mo.irreps[:norb] if data.mo.irreps is not None else ["a1g"] * norb
    elif spin == "b":
        norb = data.mo.norbb
        coeff = data.mo.coeffsb[permutation] * signs.reshape(-1, 1)
        ener = data.mo.energiesb
        irreps = data.mo.irreps[norb:] if data.mo.irreps is not None else ["a1g"] * norb
    else:
        raise DumpError("A spin must be specified", f)

    for j in range(0, norb, 5):
        en = " ".join([f"   {e: ,.12f}" for e in ener[j : j + 5]])
        irre = " ".join([f"{irr}" for irr in irreps[j : j + 5]])
        f.write(irre + "\n")
        f.write(en + "\n")
        for orb in coeff[:, j : j + 5]:
            coeffs = " ".join([f"  {c: ,.12f}" for c in orb])
            f.write(coeffs + "\n")

    f.write(" $END\n")
    f.write("\n")


def _dump_helper_occ(f, data, spin=None):
    if spin == "a":
        norb = data.mo.norba
        occ = data.mo.occsa
    elif spin == "b":
        norb = data.mo.norbb
        occ = data.mo.occsb
    elif spin == "ab":
        norb = data.mo.norba
        occ = data.mo.occs
    else:
        raise DumpError("A spin must be specified", f)

    for j in range(0, norb, 5):
        occs = " ".join([f"  {o: ,.7f}" for o in occ[j : j + 5]])
        f.write(occs + "\n")
    f.write(" $END\n")