# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Molekel file format.
This format is used by two programs:
`Molekel <http://ugovaretto.github.io/molekel/wiki/pmwiki.php/Main/HomePage.html>`_ and
`Orca <https://sites.google.com/site/orcainputlibrary/>`_.
"""
from typing import TextIO
from warnings import warn
import numpy as np
from numpy.typing import NDArray
from ..basis import MolecularBasis, Shell, angmom_its, angmom_sti
from ..convert import convert_conventions
from ..docstrings import document_dump_one, document_load_one
from ..iodata import IOData
from ..orbitals import MolecularOrbitals
from ..prepare import prepare_segmented, prepare_unrestricted_aminusb
from ..utils import DumpError, LineIterator, LoadError, LoadWarning, PrepareDumpError, angstrom
from .molden import CONVENTIONS, _fix_molden_from_buggy_codes
__all__ = ()
PATTERNS = ["*.mkl"]
def _load_helper_charge_spinpol(lit: LineIterator) -> list[int]:
charge, spinmult = (int(word) for word in next(lit).split())
spinpol = spinmult - 1
return charge, spinpol
def _load_helper_charges(lit: LineIterator) -> dict:
atcharges = []
for line in lit:
if line.strip() == "$END":
break
atcharges.append(float(line))
return {"mulliken": np.array(atcharges)}
def _load_helper_atoms(lit: LineIterator) -> tuple[NDArray[int], NDArray[float]]:
atnums = []
atcoords = []
for line in lit:
if line.strip() == "$END":
break
words = line.split()
atnums.append(int(words[0]))
atcoords.append([float(words[1]), float(words[2]), float(words[3])])
atnums = np.array(atnums, int)
atcoords = np.array(atcoords) * angstrom
return atnums, atcoords
def _load_helper_obasis(lit: LineIterator) -> MolecularBasis:
shells = []
icenter = 0
while True:
line = next(lit).strip()
if line == "$END":
break
if line == "":
continue
if line == "$$":
icenter += 1
continue
# Shell header, always assuming pure functions
words = line.split()
angmom = angmom_sti(words[1])
nbasis_shell = int(words[0])
if nbasis_shell == len(CONVENTIONS[(angmom, "c")]):
kind = "c"
elif nbasis_shell == len(CONVENTIONS[(angmom, "p")]):
kind = "p"
else:
raise LoadError(
f"Cannot interpret angmom={angmom} with nbasis_shell={nbasis_shell}.", lit
)
exponents = []
coeffs = []
for line in lit:
words = line.split()
if len(words) != 2:
lit.back(line)
break
exponents.append(float(words[0]))
coeffs.append([float(words[1])])
shells.append(Shell(icenter, [angmom], [kind], np.array(exponents), np.array(coeffs)))
return MolecularBasis(shells, CONVENTIONS, "L2")
def _load_helper_coeffs(
lit: LineIterator, nbasis: int
) -> tuple[NDArray[float], NDArray[float], list]:
coeffs = []
energies = []
irreps = []
in_orb = 0
for line in lit:
if line.strip() == "$END":
break
if in_orb == 0:
# read a1g line
words = line.split()
ncol = len(words)
if ncol == 0:
raise LoadError("Expect irrep, got empty line", line)
irreps.extend(words)
cols = [np.zeros((nbasis, 1), float) for _ in range(ncol)]
in_orb = 1
elif in_orb == 1:
# read energies
words = line.split()
if len(words) != ncol:
raise LoadError(f"Wrong number of energies: expected {ncol}, got {len(words)}", lit)
energies.extend(float(word) for word in words)
in_orb = 2
ibasis = 0
elif in_orb == 2:
# read expansion coefficients
words = line.split()
if len(words) != ncol:
raise LoadError(
f"Wrong number of coefficients: expected {ncol}, got {len(words)}", lit
)
for icol in range(ncol):
cols[icol][ibasis] = float(words[icol])
ibasis += 1
if ibasis == nbasis:
in_orb = 0
coeffs.extend(cols)
return np.hstack(coeffs), np.array(energies), irreps
def _load_helper_occ(lit: LineIterator) -> NDArray[float]:
occs = []
for line in lit:
if line.strip() == "$END":
break
occs.extend(float(word) for word in line.split())
return np.array(occs)
[docs]
@document_load_one(
"Molekel",
["atcoords", "atnums", "mo", "obasis"],
["atcharges"],
{
"norm_threshold": "When the normalization of one of the orbitals exceeds "
"norm_threshold, a correction is attempted or an error "
"is raised when no suitable correction can be found."
},
)
def load_one(lit: LineIterator, norm_threshold: float = 1e-4) -> dict:
"""Do not edit this docstring. It will be overwritten."""
charge = None
atnums = None
atcoords = None
obasis = None
coeffsa = None
energiesa = None
occsa = None
coeffsb = None
energiesb = None
occsb = None
atcharges = None
irrepsa = None
irrepsb = None
# Using a loop because we're not entirely sure if sections in an MKL file
# have a fixed order.
while True:
try:
line = next(lit).strip()
except StopIteration:
# There is no file-end marker we can use, so we only stop when
# reaching the end of the file.
break
if line == "$CHAR_MULT":
charge, spinpol = _load_helper_charge_spinpol(lit)
elif line == "$CHARGES":
atcharges = _load_helper_charges(lit)
elif line == "$COORD":
atnums, atcoords = _load_helper_atoms(lit)
elif line == "$BASIS":
obasis = _load_helper_obasis(lit)
elif line == "$COEFF_ALPHA":
coeffsa, energiesa, irrepsa = _load_helper_coeffs(lit, obasis.nbasis)
elif line == "$OCC_ALPHA":
occsa = _load_helper_occ(lit)
elif line == "$COEFF_BETA":
coeffsb, energiesb, irrepsb = _load_helper_coeffs(lit, obasis.nbasis)
elif line == "$OCC_BETA":
occsb = _load_helper_occ(lit)
if charge is None:
raise LoadError("Charge and spin polarization not found.", lit)
if atcoords is None:
raise LoadError("Coordinates not found.", lit)
if obasis is None:
raise LoadError("Orbital basis not found.", lit)
if coeffsa is None:
raise LoadError("Alpha orbitals not found.", lit)
if occsa is None:
raise LoadError("Alpha occupation numbers not found.", lit)
nelec = atnums.sum() - charge
if coeffsb is None:
# restricted closed-shell
if nelec % 2 != 0:
raise LoadError("Odd number of electrons found in restricted case.", lit)
if abs(occsa.sum() - nelec) > 1e-7:
raise LoadError("Occupation numbers are inconsistent with number of electrons", lit)
mo = MolecularOrbitals(
"restricted", coeffsa.shape[1], coeffsa.shape[1], occsa, coeffsa, energiesa, irrepsa
)
else:
if occsb is None:
raise LoadError(
"Beta occupation numbers not found in mkl file while beta orbitals were present.",
lit,
)
nalpha = occsa.sum()
nbeta = occsb.sum()
if abs(spinpol - abs(nalpha - nbeta)) > 1e-7:
warn(
LoadWarning(
f"The spin polarization ({spinpol}) is inconsistent with the "
f"difference between alpha and beta occupation numbers ({nalpha} - {nbeta}). "
"The spin polarization will be rederived from the occupation numbers.",
lit,
),
stacklevel=2,
)
if abs(nelec - (nalpha + nbeta)) > 1e-7:
raise LoadError("Occupation numbers are inconsistent with number of electrons", lit)
mo = MolecularOrbitals(
"unrestricted",
coeffsa.shape[1],
coeffsb.shape[1],
np.concatenate((occsa, occsb), axis=0),
np.concatenate((coeffsa, coeffsb), axis=1),
np.concatenate((energiesa, energiesb), axis=0),
irrepsa + irrepsb,
)
result = {
"atcoords": atcoords,
"atnums": atnums,
"obasis": obasis,
"mo": mo,
"atcharges": atcharges,
}
_fix_molden_from_buggy_codes(result, lit, norm_threshold)
return result
[docs]
def prepare_dump(data: IOData, allow_changes: bool, filename: str) -> IOData:
"""Check the compatibility of the IOData object with the Molekel format.
Parameters
----------
data
The IOData instance to be checked.
allow_changes
Whether conversion of the IOData object to a compatible form is allowed or not.
filename
The file to be written to, only used for error messages.
Returns
-------
data
The given ``IOData`` object or a shallow copy with some new attributes.
Raises
------
PrepareDumpError
If the given ``IOData`` instance is not compatible with the WFN format.
PrepareDumpWarning
If the a converted ``IOData`` instance is returned.
"""
if data.mo is None:
raise PrepareDumpError("The Molekel format requires molecular orbitals.", filename)
if data.obasis is None:
raise PrepareDumpError("The Molekel format requires an orbital basis set.", filename)
if data.mo.kind == "generalized":
raise PrepareDumpError("Cannot write Molekel file with generalized orbitals.", filename)
data = prepare_unrestricted_aminusb(data, allow_changes, filename, "Molekel")
return prepare_segmented(data, False, allow_changes, filename, "Molekel")
[docs]
@document_dump_one("Molekel", ["atcoords", "atnums", "mo", "obasis"], ["atcharges"])
def dump_one(f: TextIO, data: IOData):
"""Do not edit this docstring. It will be overwritten."""
# Header
f.write("$MKL\n")
f.write("#\n")
f.write("# MKL format file produced by IOData\n")
f.write("#\n")
# CHAR_MUL
f.write("$CHAR_MULT\n")
f.write(f" {data.charge:.0f} {data.spinpol + 1:.0f}\n")
f.write("$END\n")
f.write("\n")
# COORD
atcoords = data.atcoords / angstrom
f.write("$COORD\n")
for n, coord in zip(data.atnums, atcoords):
f.write(f" {n:d} {coord[0]: ,.6f} {coord[1]: ,.6f} {coord[2]: ,.6f}\n")
f.write("$END\n")
f.write("\n")
# CHARGES
if "mulliken" in data.atcharges:
f.write("$CHARGES\n")
for charge in data.atcharges["mulliken"]:
f.write(f" {charge: ,.6f}\n")
f.write("$END\n")
f.write("\n")
# BASIS
f.write("$BASIS\n")
iatom_last = 0
for shell in data.obasis.shells:
if shell.ncon != 1:
raise RuntimeError("Generalized contractions not supported. Call prepare_dump first.")
iatom_new = shell.icenter
if iatom_new != iatom_last:
f.write("$$\n")
angmom = shell.angmoms[0]
kind = shell.kinds[0]
iatom_last = shell.icenter
nbasis = len(CONVENTIONS[(angmom, kind)])
f.write(f" {nbasis} {angmom_its(angmom).capitalize():1s} 1.00\n")
for exponent, coeff in zip(shell.exponents, shell.coeffs[:, 0]):
f.write(f"{exponent:20.10f} {coeff:17.10f}\n")
f.write("\n")
f.write("$END\n")
f.write("\n")
if data.mo.kind == "restricted":
# COEFF_ALPHA
f.write("$COEFF_ALPHA\n")
_dump_helper_coeffs(f, data, spin="a")
# OCC_ALPHA
f.write("$OCC_ALPHA\n")
_dump_helper_occ(f, data, spin="ab")
# Not taking into account generalized.
elif data.mo.kind == "unrestricted":
# COEFF_ALPHA
f.write("$COEFF_ALPHA\n")
_dump_helper_coeffs(f, data, spin="a")
# OCC_ALPHA
f.write("$OCC_ALPHA\n")
_dump_helper_occ(f, data, spin="a")
f.write("\n")
# COEFF_BETA
f.write("$COEFF_BETA\n")
_dump_helper_coeffs(f, data, spin="b")
# OCC_BETA
f.write("$OCC_BETA\n")
_dump_helper_occ(f, data, spin="b")
else:
raise RuntimeError("Generalized orbitals are not support. Call prepare_dump first.")
# Defining help dumping functions
def _dump_helper_coeffs(f, data, spin=None):
permutation, signs = convert_conventions(data.obasis, CONVENTIONS)
if spin == "a":
norb = data.mo.norba
coeff = data.mo.coeffsa[permutation] * signs.reshape(-1, 1)
ener = data.mo.energiesa
irreps = data.mo.irreps[:norb] if data.mo.irreps is not None else ["a1g"] * norb
elif spin == "b":
norb = data.mo.norbb
coeff = data.mo.coeffsb[permutation] * signs.reshape(-1, 1)
ener = data.mo.energiesb
irreps = data.mo.irreps[norb:] if data.mo.irreps is not None else ["a1g"] * norb
else:
raise DumpError("A spin must be specified", f)
for j in range(0, norb, 5):
en = " ".join([f" {e: ,.12f}" for e in ener[j : j + 5]])
irre = " ".join([f"{irr}" for irr in irreps[j : j + 5]])
f.write(irre + "\n")
f.write(en + "\n")
for orb in coeff[:, j : j + 5]:
coeffs = " ".join([f" {c: ,.12f}" for c in orb])
f.write(coeffs + "\n")
f.write(" $END\n")
f.write("\n")
def _dump_helper_occ(f, data, spin=None):
if spin == "a":
norb = data.mo.norba
occ = data.mo.occsa
elif spin == "b":
norb = data.mo.norbb
occ = data.mo.occsb
elif spin == "ab":
norb = data.mo.norba
occ = data.mo.occs
else:
raise DumpError("A spin must be specified", f)
for j in range(0, norb, 5):
occs = " ".join([f" {o: ,.7f}" for o in occ[j : j + 5]])
f.write(occs + "\n")
f.write(" $END\n")