Source code for iodata.formats.molekel

# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Molekel file format.

This format is used by two programs:
`Molekel <http://ugovaretto.github.io/molekel/wiki/pmwiki.php/Main/HomePage.html>`_ and
`Orca <https://sites.google.com/site/orcainputlibrary/>`_.
"""

from typing import Tuple, List, TextIO

import numpy as np

from .molden import CONVENTIONS, _fix_molden_from_buggy_codes
from ..basis import angmom_sti, angmom_its, convert_conventions, MolecularBasis, Shell
from ..docstrings import document_load_one, document_dump_one
from ..iodata import IOData
from ..orbitals import MolecularOrbitals
from ..utils import angstrom, LineIterator


__all__ = []


PATTERNS = ['*.mkl']


def _load_helper_charge_spinpol(lit: LineIterator) -> List[int]:
    charge, spinmult = [int(word) for word in next(lit).split()]
    spinpol = spinmult - 1
    return charge, spinpol


def _load_helper_charges(lit: LineIterator) -> dict:
    atcharges = []
    for line in lit:
        line = line.strip()
        if line == '$END':
            break
        atcharges.append(float(line))

    return {'mulliken': np.array(atcharges)}


def _load_helper_atoms(lit: LineIterator) -> Tuple[np.ndarray, np.ndarray]:
    atnums = []
    atcoords = []
    for line in lit:
        if line.strip() == '$END':
            break
        words = line.split()
        atnums.append(int(words[0]))
        atcoords.append([float(words[1]), float(words[2]), float(words[3])])
    atnums = np.array(atnums, int)
    atcoords = np.array(atcoords) * angstrom
    return atnums, atcoords


def _load_helper_obasis(lit: LineIterator) -> MolecularBasis:
    shells = []
    icenter = 0
    while True:
        line = next(lit).strip()
        if line == '$END':
            break
        if line == "":
            continue
        if line == '$$':
            icenter += 1
            continue
        # Shell header, always assuming pure functions
        words = line.split()
        angmom = angmom_sti(words[1])
        nbasis_shell = int(words[0])
        if nbasis_shell == len(CONVENTIONS[(angmom, 'c')]):
            kind = 'c'
        elif nbasis_shell == len(CONVENTIONS[(angmom, 'p')]):
            kind = 'p'
        else:
            lit.error('Cannot interpret angmom={} with nbasis_shell={}'.format(
                angmom, nbasis_shell))
        exponents = []
        coeffs = []
        for line in lit:
            words = line.split()
            if len(words) != 2:
                lit.back(line)
                break
            exponents.append(float(words[0]))
            coeffs.append([float(words[1])])
        shells.append(Shell(icenter, [angmom], [kind], np.array(exponents), np.array(coeffs)))
    return MolecularBasis(shells, CONVENTIONS, 'L2')


def _load_helper_coeffs(lit: LineIterator, nbasis: int) -> Tuple[np.ndarray, np.ndarray]:
    coeffs = []
    energies = []
    irreps = []

    in_orb = 0
    for line in lit:
        line = line.strip()
        if line == '$END':
            break
        if in_orb == 0:
            # read a1g line
            words = line.split()
            ncol = len(words)
            assert ncol > 0
            for word in words:
                irreps.append(word)
            cols = [np.zeros((nbasis, 1), float) for _ in range(ncol)]
            in_orb = 1
        elif in_orb == 1:
            # read energies
            words = line.split()
            assert len(words) == ncol
            for word in words:
                energies.append(float(word))
            in_orb = 2
            ibasis = 0
        elif in_orb == 2:
            # read expansion coefficients
            words = line.split()
            assert len(words) == ncol
            for icol in range(ncol):
                cols[icol][ibasis] = float(words[icol])
            ibasis += 1
            if ibasis == nbasis:
                in_orb = 0
                coeffs.extend(cols)

    return np.hstack(coeffs), np.array(energies), irreps


def _load_helper_occ(lit: LineIterator) -> np.ndarray:
    occs = []
    for line in lit:
        line = line.strip()
        if line == '$END':
            break
        for word in line.split():
            occs.append(float(word))
    return np.array(occs)


# pylint: disable=too-many-branches,too-many-statements
[docs]@document_load_one("Molekel", ['atcoords', 'atnums', 'mo', 'obasis'], ['atcharges']) def load_one(lit: LineIterator) -> dict: """Do not edit this docstring. It will be overwritten.""" charge = None atnums = None atcoords = None obasis = None coeffsa = None energiesa = None occsa = None coeffsb = None energiesb = None occsb = None atcharges = None irrepsa = None irrepsb = None # Using a loop because we're not entirely sure if sections in an MKL file # have a fixed order. while True: try: line = next(lit).strip() except StopIteration: # There is no file-end marker we can use, so we only stop when # reaching the end of the file. break if line == '$CHAR_MULT': charge, spinpol = _load_helper_charge_spinpol(lit) elif line == '$CHARGES': atcharges = _load_helper_charges(lit) elif line == '$COORD': atnums, atcoords = _load_helper_atoms(lit) elif line == '$BASIS': obasis = _load_helper_obasis(lit) elif line == '$COEFF_ALPHA': coeffsa, energiesa, irrepsa = _load_helper_coeffs(lit, obasis.nbasis) elif line == '$OCC_ALPHA': occsa = _load_helper_occ(lit) elif line == '$COEFF_BETA': coeffsb, energiesb, irrepsb = _load_helper_coeffs(lit, obasis.nbasis) elif line == '$OCC_BETA': occsb = _load_helper_occ(lit) if charge is None: lit.error('Charge and spin polarization not found.') if atcoords is None: lit.error('Coordinates not found.') if obasis is None: lit.error('Orbital basis not found.') if coeffsa is None: lit.error('Alpha orbitals not found.') if occsa is None: lit.error('Alpha occupation numbers not found.') nelec = atnums.sum() - charge if coeffsb is None: # restricted closed-shell assert nelec % 2 == 0 assert abs(occsa.sum() - nelec) < 1e-7 mo = MolecularOrbitals( 'restricted', coeffsa.shape[1], coeffsa.shape[1], occsa, coeffsa, energiesa, irrepsa) else: if occsb is None: lit.error('Beta occupation numbers not found in mkl file while ' 'beta orbitals were present.') nalpha = int(np.round(occsa.sum())) nbeta = int(np.round(occsb.sum())) assert abs(spinpol - abs(nalpha - nbeta)) < 1e-7 assert nelec == nalpha + nbeta assert coeffsa.shape == coeffsb.shape assert energiesa.shape == energiesb.shape assert occsa.shape == occsb.shape mo = MolecularOrbitals( 'unrestricted', coeffsa.shape[1], coeffsb.shape[1], np.concatenate((occsa, occsb), axis=0), np.concatenate((coeffsa, coeffsb), axis=1), np.concatenate((energiesa, energiesb), axis=0), irrepsa + irrepsb) result = { 'atcoords': atcoords, 'atnums': atnums, 'obasis': obasis, 'mo': mo, 'atcharges': atcharges, } _fix_molden_from_buggy_codes(result, lit) return result
[docs]@document_dump_one("Molekel", ['atcoords', 'atnums', 'mo', 'obasis'], ['atcharges']) def dump_one(f: TextIO, data: IOData): """Do not edit this docstring. It will be overwritten.""" # Header f.write('$MKL\n') f.write('#\n') f.write('# MKL format file produced by IOData\n') f.write('#\n') # CHAR_MUL f.write('$CHAR_MULT\n') f.write(' {:.0f} {:.0f}\n'.format(data.charge, data.spinpol + 1)) f.write('$END\n') f.write('\n') # COORD atcoords = data.atcoords / angstrom f.write('$COORD\n') for n, coord in zip(data.atnums, atcoords): f.write(' {:d} {: ,.6f} {: ,.6f} {: ,.6f}\n'.format(n, coord[0], coord[1], coord[2])) f.write('$END\n') f.write('\n') # CHARGES if 'mulliken' in data.atcharges: f.write('$CHARGES\n') for charge in data.atcharges['mulliken']: f.write(' {: ,.6f}\n'.format(charge)) f.write('$END\n') f.write('\n') # BASIS f.write('$BASIS\n') iatom_last = 0 for shell in data.obasis.shells: iatom_new = shell.icenter if iatom_new != iatom_last: f.write('$$\n') for iangmom, (angmom, kind) in enumerate(zip(shell.angmoms, shell.kinds)): iatom_last = shell.icenter nbasis = len(CONVENTIONS[(angmom, kind)]) f.write(' {} {:1s} 1.00\n'.format(nbasis, angmom_its(angmom).capitalize())) for exponent, coeff in zip(shell.exponents, shell.coeffs[:, iangmom]): f.write('{:20.10f} {:17.10f}\n'.format(exponent, coeff)) f.write('\n') f.write('$END\n') f.write('\n') if data.mo.kind == 'restricted': # COEFF_ALPHA f.write('$COEFF_ALPHA\n') _dump_helper_coeffs(f, data, spin='a') # OCC_ALPHA f.write('$OCC_ALPHA\n') _dump_helper_occ(f, data, spin='ab') # Not taking into account generalized. elif data.mo.kind == 'unrestricted': # COEFF_ALPHA f.write('$COEFF_ALPHA\n') _dump_helper_coeffs(f, data, spin='a') # OCC_ALPHA f.write('$OCC_ALPHA\n') _dump_helper_occ(f, data, spin='a') f.write('\n') # COEFF_BETA f.write('$COEFF_BETA\n') _dump_helper_coeffs(f, data, spin='b') # OCC_BETA f.write('$OCC_BETA\n') _dump_helper_occ(f, data, spin='b') else: raise ValueError(f"The MKL format does not support {data.mo.kind} orbitals.")
# Defining help dumping functions def _dump_helper_coeffs(f, data, spin=None): permutation, signs = convert_conventions(data.obasis, CONVENTIONS) if spin == 'a': norb = data.mo.norba coeff = data.mo.coeffsa[permutation] * signs.reshape(-1, 1) ener = data.mo.energiesa if data.mo.irreps is not None: irreps = data.mo.irreps[:norb] else: irreps = ['a1g'] * norb elif spin == 'b': norb = data.mo.norbb coeff = data.mo.coeffsb[permutation] * signs.reshape(-1, 1) ener = data.mo.energiesb if data.mo.irreps is not None: irreps = data.mo.irreps[norb:] else: irreps = ['a1g'] * norb else: raise IOError('A spin must be specified') for j in range(0, norb, 5): en = ' '.join([' {: ,.12f}'.format(e) for e in ener[j:j + 5]]) irre = ' '.join(['{}'.format(irr) for irr in irreps[j:j + 5]]) f.write(irre + '\n') f.write(en + '\n') for orb in coeff[:, j:j + 5]: coeffs = ' '.join([' {: ,.12f}'.format(c) for c in orb]) f.write(coeffs + '\n') f.write(' $END\n') f.write('\n') def _dump_helper_occ(f, data, spin=None): if spin == 'a': norb = data.mo.norba occ = data.mo.occsa elif spin == 'b': norb = data.mo.norbb occ = data.mo.occsb elif spin == 'ab': norb = data.mo.norba occ = data.mo.occs else: raise IOError('A spin must be specified') for j in range(0, norb, 5): occs = ' '.join([' {: ,.7f}'.format(o) for o in occ[j:j + 5]]) f.write(occs + '\n') f.write(' $END\n')