# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Multiwfn MWFN file format."""
import numpy as np
from numpy.typing import NDArray
from ..basis import MolecularBasis, Shell
from ..convert import HORTON2_CONVENTIONS
from ..docstrings import document_load_one
from ..orbitals import MolecularOrbitals
from ..utils import LineIterator, LoadError, angstrom
__all__ = ()
PATTERNS = ["*.mwfn"]
# From the MWFN chemrxiv paper
# https://chemrxiv.org/articles/Mwfn_A_Strict_Concise_and_Extensible_Format
# _for_Electronic_Wavefunction_Storage_and_Exchange/11872524
# For cartesian shells
# S shell: S
# P shell: X, Y, Z
# D shell: XX, YY, ZZ, XY, XZ, YZ
# F shell: XXX, YYY, ZZZ, XYY, XXY, XXZ, XZZ, YZZ, YYZ, XYZ
# G shell: ZZZZ, YZZZ, YYZZ, YYYZ, YYYY, XZZZ, XYZZ, XYYZ, XYYY, XXZZ, XXYZ, XXYY, XXXZ, XXXY, XXXX
# H shell: ZZZZZ, YZZZZ, YYZZZ, YYYZZ, YYYYZ, YYYYY, XZZZZ, XYZZZ, XYYZZ, XYYYZ, XYYYY, XXZZZ,
# XXYZZ, XXYYZ, XXYYY, XXXZZ, XXXYZ, XXXYY, XXXXZ, XXXXY, XXXXX
# For pure shells, the order is
# D shell: D 0, D+1, D-1, D+2, D-2
# F shell: F 0, F+1, F-1, F+2, F-2, F+3, F-3
# G shell: G 0, G+1, G-1, G+2, G-2, G+3, G-3, G+4, G-4
# fmt: off
CONVENTIONS = {
(4, 'p'): HORTON2_CONVENTIONS[(4, 'p')],
(3, 'p'): HORTON2_CONVENTIONS[(3, 'p')],
(2, 'p'): HORTON2_CONVENTIONS[(2, 'p')],
(0, 'c'): ['1'],
(1, 'c'): ['x', 'y', 'z'],
(2, 'c'): ['xx', 'yy', 'zz', 'xy', 'xz', 'yz'],
(3, 'c'): ['xxx', 'yyy', 'zzz', 'xyy', 'xxy', 'xxz', 'xzz', 'yzz', 'yyz', 'xyz'],
(4, 'c'): ['zzzz', 'yzzz', 'yyzz', 'yyyz', 'yyyy', 'xzzz', 'xyzz', 'xyyz', 'xyyy',
'xxzz', 'xxyz', 'xxyy', 'xxxz', 'xxxy', 'xxxx'],
(5, 'c'): ['zzzzz', 'yzzzz', 'yyzzz', 'yyyzz', 'yyyyz', 'yyyyy', 'xzzzz', 'xyzzz',
'xyyzz', 'xyyyz', 'xyyyy', 'xxzzz', 'xxyzz', 'xxyyz', 'xxyyy', 'xxxzz',
'xxxyz', 'xxxyy', 'xxxxz', 'xxxxy', 'xxxxx'],
}
# fmt: on
def _load_helper_opener(lit: LineIterator) -> dict:
"""Read initial variables at the beginning of a MWFN file."""
keys = {
"Wfntype": int,
"Charge": float,
"Naelec": float,
"Nbelec": float,
"E_tot": float,
"VT_ratio": float,
"Ncenter": int,
}
max_count = len(keys)
count = 0
data = {}
while count < max_count:
line = next(lit)
for name, ftype in keys.items():
if name in line:
data[name] = ftype(line.split("=")[1].strip())
count += 1
# check values parsed
if data["Ncenter"] <= 0:
raise LoadError(
f"Ncenter should be a positive integer. Read Ncenter= {data['Ncenter']}.", lit
)
# Possible values of Wfntype (wavefunction type):
# 0: Restricted closed - shell single - determinant wavefunction(e.g.RHF, RKS)
# 1: Unrestricted open - shell single - determinant wavefunction(e.g.UHF, UKS)
# 2: Restricted open - shell single - determinant wavefunction(e.g.ROHF, ROKS)
# 3: Restricted multiconfiguration wavefunction(e.g.RMP2, RCCSD)
# 4: Unrestricted multiconfiguration wavefunction(e.g.UMP2, UCCSD)
if data["Wfntype"] in [0, 2, 3]:
# restricted wavefunction
data["mo_kind"] = "restricted"
elif data["Wfntype"] in [1, 4]:
# unrestricted wavefunction
data["mo_kind"] = "unrestricted"
else:
raise LoadError(
f"Wavefunction type cannot be determined. Read Wfntype= {data['Wfntype']}.", lit
)
return data
def _load_helper_basis(lit: LineIterator) -> dict:
"""Read basis set information typically labelled '# Basis function information'."""
# Nprims must be last or else it gets read in with Nprimshell
keys = ["Nbasis", "Nindbasis", "Nshell", "Nprimshell", "Nprims"]
count = 0
data = {}
next(lit)
while count < len(keys):
line = next(lit)
for name in keys:
if name in line:
data[name] = int(line.split("=")[1].strip())
count += 1
break
return data
def _load_helper_atoms(lit: LineIterator, natom: int) -> dict:
"""Read atoms section typically labelled '# Atom information'."""
data = {
"atnums": np.empty(natom, int),
"atcorenums": np.empty(natom, float),
"atcoords": np.empty((natom, 3), float),
}
# skip lines until "$Centers" section is reached
line = next(lit)
while "$Centers" not in line and line is not None:
line = next(lit)
for atom in range(natom):
words = next(lit).split()
data["atnums"][atom] = words[2]
data["atcorenums"][atom] = words[3]
data["atcoords"][atom, :] = words[4:7]
# coordinates are in angstrom in MWFN, so they are converted to atomic units
data["atcoords"] *= angstrom
# check atomic numbers
if min(data["atnums"]) <= 0:
raise LoadError(
f"Atomic numbers should be positive integers. Read atnums= {data['atnums']}.", lit
)
return data
def _load_helper_shells(lit: LineIterator, nshell: int) -> dict:
"""Read shell types, centers, and contraction degrees."""
sections = ["$Shell types", "$Shell centers", "$Shell contraction"]
var_name = ["shell_types", "shell_centers", "shell_ncons"]
# read lines until '$Shell types' is reached
line = next(lit)
while sections[0] not in line and line is not None:
line = next(lit)
data = {}
for section, name in zip(sections, var_name):
if not line.startswith(section):
raise LoadError(f"Expected line to start with {section}, but got line={line}.", lit)
data[name] = _load_helper_section(lit, nshell, " ", 0, int)
line = next(lit)
lit.back(line)
return data
def _load_helper_section(
lit: LineIterator, size: int, start: str, skip: int, dtype: np.dtype
) -> NDArray:
"""Read single or multiple line(s) section."""
section = []
while len(section) < size:
line = next(lit)
if not line.startswith(start):
raise LoadError(f"Expected line to start with {start}. Got line={line}.", lit)
section.extend(line.split()[skip:])
return np.array(section, dtype)
def _load_helper_mo(lit: LineIterator, n_basis: int, n_mo: int) -> dict:
"""Read molecular orbitals section typically labelled '# Orbital information'."""
data = {
"mo_numbers": np.empty(n_mo, int),
"mo_type": np.empty(n_mo, int),
"mo_energies": np.empty(n_mo, float),
"mo_occs": np.empty(n_mo, float),
"mo_sym": np.empty(n_mo, str),
"mo_coeffs": np.empty([n_basis, n_mo], float),
}
for index in range(n_mo):
line = next(lit)
while "Index" not in line:
line = next(lit)
if not line.startswith("Index"):
raise LoadError(f"Expecting line starting with 'Index', got '{line}'", lit)
data["mo_numbers"][index] = line.split()[1]
data["mo_type"][index] = next(lit).split()[1]
data["mo_energies"][index] = next(lit).split()[1]
data["mo_occs"][index] = next(lit).split()[1]
data["mo_sym"][index] = next(lit).split()[1]
# skip "$Coeff line
next(lit)
data["mo_coeffs"][:, index] = _load_helper_section(lit, n_basis, "", 0, float)
return data
def _load_mwfn_low(lit: LineIterator) -> dict:
"""Load data from a MWFN file into arrays.
Parameters
----------
lit
The line iterator to read the data from.
"""
# Note:
# -----
# 1) mwfn is a fortran program which loads *.mwfn by locating the line with the keyword,
# then uses `backspace`, then begins reading. Despite this flexibility, it is stated by
# the authors that the order of section, and indeed, entries in general, must be fixed.
# With this in mind the input utilized some hard-coding since order should be fixed.
# 2) mwfn ignores lines beginning with `#`.
# read title (assuming title is the 1st line, which seems to be the standard)
data = {"title": next(lit).strip()}
# load Wfntype, Charge, Naelec, Nbelec, E_tot, VT_ratio, & Ncenter
data.update(_load_helper_opener(lit))
# load atnums, atcorenums, & atcoords (in atomic units)
data.update(_load_helper_atoms(lit, data["Ncenter"]))
# load Nbasis, Nindbasis, Nprims, Nshell, & Nprimshell
data.update(_load_helper_basis(lit))
# load shell_types, shell_centers, & shell_contraction_degrees
data.update(_load_helper_shells(lit, data["Nshell"]))
# IOData indices start at 0, so the centers are shifted
data["shell_centers"] -= 1
# load primitive exponents & coefficients
if not next(lit).startswith("$Primitive exponents"):
raise LoadError("Expected '$Primitive exponents' section.", lit)
data["exponents"] = _load_helper_section(lit, data["Nprimshell"], "", 0, float)
if not next(lit).startswith("$Contraction coefficients"):
raise LoadError("Expected '$Contraction coefficients' section.", lit)
data["coeffs"] = _load_helper_section(lit, data["Nprimshell"], "", 0, float)
# get number of basis & molecular orbitals (MO)
# Note: MWFN includes virtual orbitals, so num_mo equals number independent basis functions
num_basis = data["Nindbasis"]
num_mo = data["Nindbasis"]
if data["mo_kind"] == "unrestricted":
num_mo *= 2
# load MO information
data.update(_load_helper_mo(lit, num_basis, num_mo))
return data
[docs]
@document_load_one(
"MWFN", ["atcoords", "atnums", "atcorenums", "energy", "mo", "obasis", "extra", "title"]
)
def load_one(lit: LineIterator) -> dict:
"""Do not edit this docstring. It will be overwritten."""
inp = _load_mwfn_low(lit)
# store certain information loaded from MWFN in extra dictionary
extra = {
"wfntype": inp["Wfntype"],
"nindbasis": inp["Nindbasis"],
"mo_sym": inp["mo_sym"],
"full_virial_ratio": inp["VT_ratio"],
}
# Build MolecularBasis instance
# Unlike WFN, MWFN does include orbital expansion coefficients.
shells = []
counter = 0
for center, stype, ncon in zip(inp["shell_centers"], inp["shell_types"], inp["shell_ncons"]):
shells.append(
Shell(
center,
[abs(stype)],
["p" if stype < 0 else "c"],
inp["exponents"][counter : counter + ncon],
inp["coeffs"][counter : counter + ncon][:, np.newaxis],
)
)
counter += ncon
obasis = MolecularBasis(shells, CONVENTIONS, "L2")
# check number of basis functions
if obasis.nbasis != inp["Nbasis"]:
raise LoadError(
f"Number of basis functions in MolecularBasis ({obasis.nbasis}) "
f"is not equal to the 'Nbasis' ({inp['Nbasis']}).",
lit.filename,
)
# Determine number of orbitals of each kind.
if inp["mo_kind"] == "restricted":
norba = len(inp["mo_type"])
norbb = norba
else:
# Legend
# 0: restricted doubly occupied
# 1: alpha
# 2: beta
norba = (inp["mo_type"] == 1).sum()
norbb = (inp["mo_type"] == 2).sum()
if (inp["mo_type"] == 0).sum() != 0:
raise LoadError("Restricted orbtials found in unrestricted wavefunction.", lit)
# Build MolecularOrbitals instance
mo = MolecularOrbitals(
inp["mo_kind"], norba, norbb, inp["mo_occs"], inp["mo_coeffs"], inp["mo_energies"], None
)
# check number of electrons
if mo.nelec != inp["Naelec"] + inp["Nbelec"]:
raise LoadError(
f"Number of electrons in MolecularOrbitals ({mo.nelec}) is not equal to "
f"the sum of 'Naelec' and 'Nbelec' ({inp['Naelec']} + {inp['Nbelec']}).",
lit.filename,
)
if mo.occsa.sum() != inp["Naelec"]:
raise LoadError(
f"Number of alpha electrons in MolecularOrbitals ({mo.occsa.sum()}) "
f"is not equal to the 'Naelec' ({inp['Naelec']}).",
lit.filename,
)
if mo.occsb.sum() != inp["Nbelec"]:
raise LoadError(
f"Number of beta electrons in MolecularOrbitals ({mo.occsb.sum()})"
f"is not equal to the 'Nbelec' ({inp['Nbelec']}).",
lit.filename,
)
return {
"title": inp["title"],
"atcoords": inp["atcoords"],
"atnums": inp["atnums"],
"atcorenums": inp["atcorenums"],
"obasis": obasis,
"mo": mo,
"energy": inp["E_tot"],
"extra": extra,
}