# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""AIM/AIMAll WFX file format.
See http://aim.tkgristmill.com/wfxformat.html
"""
from typing import TextIO, Iterator
import warnings
import numpy as np
from ..docstrings import document_load_one, document_dump_one
from ..orbitals import MolecularOrbitals
from ..periodic import num2sym
from ..iodata import IOData
from ..utils import LineIterator
from ..basis import MolecularBasis, Shell, convert_conventions
from .wfn import build_obasis, get_mocoeff_scales, CONVENTIONS
__all__ = []
PATTERNS = ['*.wfx']
def _wfx_labels() -> tuple:
"""Build labels for wfx parser."""
# labels of various sections in WFX file grouped based on their data type
# section labels with string data types
labels_str = {
'<Title>': 'title',
'<Keywords>': 'keywords',
'<Model>': 'model_name',
}
# section labels with integer number data types
labels_int = {
'<Number of Nuclei>': 'num_atoms',
'<Number of Occupied Molecular Orbitals>': 'num_occ_mo',
'<Number of Perturbations>': 'num_perturbations',
'<Number of Electrons>': 'num_electrons',
'<Number of Core Electrons>': 'num_core_electrons',
'<Number of Alpha Electrons>': 'num_alpha_electron',
'<Number of Beta Electrons>': 'num_beta_electron',
'<Number of Primitives>': 'num_primitives',
'<Electronic Spin Multiplicity>': 'spin_multi',
}
# section labels with float number data types
labels_float = {
'<Net Charge>': 'charge',
'<Energy = T + Vne + Vee + Vnn>': 'energy',
'<Virial Ratio (-V/T)>': 'virial_ratio',
'<Nuclear Virial of Energy-Gradient-Based Forces on Nuclei, W>': 'nuc_viral',
'<Full Virial Ratio, -(V - W)/T>': 'full_virial_ratio',
}
# section labels with array of integer data types
labels_array_int = {
'<Atomic Numbers>': 'atnums',
'<Primitive Centers>': 'centers',
'<Primitive Types>': 'types',
'<MO Numbers>': 'mo_numbers', # This is constructed in parse_wfx.
}
# section labels with array of float data types
labels_array_float = {
'<Nuclear Cartesian Coordinates>': 'atcoords',
'<Nuclear Charges>': 'nuclear_charge',
'<Primitive Exponents>': 'exponents',
'<Molecular Orbital Energies>': 'mo_energies',
'<Molecular Orbital Occupation Numbers>': 'mo_occs',
'<Molecular Orbital Primitive Coefficients>': 'mo_coeffs',
}
# section labels with other data types
labels_other = {
'<Nuclear Names>': 'nuclear_names',
'<Molecular Orbital Spin Types>': 'mo_spins',
'<Nuclear Cartesian Energy Gradients>': 'nuclear_gradient',
}
# list of tags corresponding to required sections based on WFX format specifications
required_tags = list(labels_str) + list(labels_int) + list(labels_float)
required_tags += list(labels_array_float) + list(labels_array_int) + list(labels_other)
# remove tags corresponding to optional sections
required_tags.remove('<Model>')
required_tags.remove('<Number of Core Electrons>')
required_tags.remove('<Electronic Spin Multiplicity>')
required_tags.remove('<Atomic Numbers>')
required_tags.remove('<Full Virial Ratio, -(V - W)/T>')
required_tags.remove('<Nuclear Virial of Energy-Gradient-Based Forces on Nuclei, W>')
required_tags.remove('<Nuclear Cartesian Energy Gradients>')
return (labels_str, labels_int, labels_float, labels_array_int, labels_array_float,
labels_other, required_tags)
[docs]def load_data_wfx(lit: LineIterator) -> dict:
"""Process loaded WFX data."""
# get all section labels and required labels for WFX files
lbs_str, lbs_int, lbs_float, lbs_aint, lbs_afloat, lbs_other, required_tags = _wfx_labels()
# load sections in WFX and check required tags exists
data = parse_wfx(lit, required_tags)
# process raw data to convert them to proper type based on their label
result = {}
for key, value in data.items():
if key in lbs_str:
assert len(value) == 1
result[lbs_str[key]] = value[0]
elif key in lbs_int:
assert len(value) == 1
result[lbs_int[key]] = int(value[0])
elif key in lbs_float:
assert len(value) == 1
result[lbs_float[key]] = float(value[0])
elif key in lbs_afloat:
result[lbs_afloat[key]] = np.fromstring(" ".join(value), dtype=np.float, sep=" ")
elif key in lbs_aint:
result[lbs_aint[key]] = np.fromstring(" ".join(value), dtype=np.int, sep=" ")
elif key in lbs_other:
result[lbs_other[key]] = value
else:
warnings.warn("Not recognized section label, skip {0}".format(key))
# reshape some arrays
result['atcoords'] = result['atcoords'].reshape(-1, 3)
result['mo_coeffs'] = result['mo_coeffs'].reshape(result['num_primitives'], -1, order='F')
# process nuclear gradient, if present
if 'nuclear_gradient' in result:
gradient_mix = np.array([i.split() for i in result.pop('nuclear_gradient')]).reshape(-1, 4)
gradient_atoms = gradient_mix[:, 0].astype(np.unicode_)
index = [result['nuclear_names'].index(atom) for atom in gradient_atoms]
result['atgradient'] = np.full((len(result['nuclear_names']), 3), np.nan)
result['atgradient'][index] = gradient_mix[:, 1:].astype(float)
# check keywords & number of perturbations
perturbation_check = {'GTO': 0, 'GIAO': 3, 'CGST': 6}
key = result['keywords']
num = result['num_perturbations']
if key not in perturbation_check.keys():
lit.error(f"The keywords is {key}, but it should be either GTO, GIAO or CGST")
if num != perturbation_check[key]:
lit.error(f"Number of perturbations of {key} is {num}, expected {perturbation_check[key]}")
return result
[docs]def parse_wfx(lit: LineIterator, required_tags: list = None) -> dict:
"""Load data in all sections existing in the given WFX file LineIterator."""
# pylint: disable=too-many-branches
data = {}
mo_start = "<Molecular Orbital Primitive Coefficients>"
section_start = None
while True:
# get a new line
try:
line = next(lit).strip()
except StopIteration:
break
# check whether line is the start of a section
if section_start is None and line.startswith("<"):
# set start & end of the section and add it to data dictionary
section_start = line
if section_start in data.keys():
lit.error("Section with tag={} is repeated!".format(section_start))
data[section_start] = []
section_end = line[:1] + "/" + line[1:]
# special handling of <Molecular Orbital Primitive Coefficients> section
if section_start == mo_start:
data['<MO Numbers>'] = []
# check whether line is the (correct) end of the section
elif section_start is not None and line.startswith("</"):
# In some cases, closing tags have a different number of spaces. 8-[
if line.replace(" ", "") != section_end.replace(" ", ""):
lit.error("Expecting line {} but got {}.".format(section_end, line))
# reset section_start variable to signal that section ended
section_start = None
# handle <MO Number> line under <Molecular Orbital Primitive Coefficients> section
elif section_start == mo_start and line == '<MO Number>':
# add MO Number to list
data['<MO Numbers>'].append(next(lit).strip())
# skip '</MO Number>' line
next(lit)
# add section content to the corresponding list in data dictionary
else:
data[section_start].append(line)
# check if last section was closed
if section_start is not None:
lit.error("Section {} is not closed at end of file.".format(section_start))
# check required section tags
if required_tags is not None:
for section_tag in required_tags:
if section_tag not in data.keys():
lit.error(f'Section {section_tag} is missing from loaded WFX data.')
return data
[docs]@document_load_one("WFX", ['atcoords', 'atgradient', 'atnums', 'energy',
'extra', 'mo', 'obasis', 'title'])
def load_one(lit: LineIterator) -> dict:
"""Do not edit this docstring. It will be overwritten."""
# get data contained in WFX file with the proper type & shape
data = load_data_wfx(lit)
# Build molecular basis
# ---------------------
# build molecular basis and permutation needed to regroup shells
obasis, permutation = build_obasis(
data['centers'] - 1, data['types'] - 1, data['exponents'], lit)
# Build molecular orbitals
# ------------------------
# re-order MO coefficients because the loaded expansion coefficients from WFX typically
# corresponds to basis sets grouped based on their type; that is, all MO coefficients of px
# basis functions are listed together first, then MO coefficients of py basis functions, and
# finally MO coefficients of pz (the same is true for higher angular momentum). However, IOData
# stores basis shells (instead of basis functions), so the p shell with angmom=1 represents
# the px, py, pz basis functions. These shells are used by MolecularBasis (obasis) in
# constructing the basis function. If that is the case for the loaded MO coefficients from WFX,
# they need to be permuted to match obasis expansion of basis set (i.e. to appear in shells).
data['mo_coeffs'] = data['mo_coeffs'][permutation]
# fix normalization because the loaded expansion coefficients from WFX corresponds to
# un-normalized primitives for each normalized MO (which means the primitive normalization
# constants has been included in the MO coefficients). However, IOData expects normalized
# primitives (either L2 or L1 as recorded in MolecularBasis primitive types), so we need to
# divide the MO coefficients by the primitive normalization constants to have them correspond
# to expansion coefficients for normalized primitives. Here, we assume primitives are
# L2-normalized (as stored in obasis.primitive_normalization) which is used in scaling MO
# coefficients to be stored in MolecularOrbitals instance.
data['mo_coeffs'] /= get_mocoeff_scales(obasis).reshape(-1, 1)
# process mo_spins and convert it into restricted or unrestricted & count alpha/beta orbitals
# we do not using the <Model> section for this because it is not guaranteed to be present
# check whether restricted case with "Alpha and Beta" in mo_spins
if any("and" in word for word in data['mo_spins']):
# count number of alpha & beta molecular orbitals
norbb = data['mo_spins'].count("Alpha and Beta")
norba = norbb + data['mo_spins'].count("Alpha")
# check that mo_spin list contains no surprises
if data['mo_spins'] != ["Alpha and Beta"] * norbb + ["Alpha"] * (norba - norbb):
lit.error("Unsupported <Molecular Orbital Spin Types> values.")
if norba != data['mo_coeffs'].shape[1]:
lit.error("Number of orbitals inconsistent with orbital spin types.")
# create molecular orbitals, which requires knowing the number of alpha and beta molecular
# orbitals. These are expected to be the same for 'restricted' case, however, the number of
# Alpha/Beta counts might not be the same for the restricted WFX (e.g., restricted
# open-shell calculations that do not print virtual orbitals), so it is safer to use
# `norba` to denote number of both alpha and beta orbitals in MolecularOrbitals.
# See orbitals.py for details to see how number of orbitals are dealt with.
# For restricted wavefunctions, IOData uses the
# occupation numbers to identify the spin types. IOData also has different
# conventions for norba and norbb, see orbitals.py for details.
mo = MolecularOrbitals(
"restricted", norba, norba, # This is not a typo!
data['mo_occs'], data['mo_coeffs'], data['mo_energies'], None)
# unrestricted case with "Alpha" and "Beta" in mo_spins
else:
norba = data['mo_spins'].count("Alpha")
norbb = data['mo_spins'].count("Beta")
# check that mo_spin list contains no surprises
if data['mo_spins'] != ["Alpha"] * norba + ["Beta"] * norbb:
lit.error("Unsupported molecular orbital spin types.")
# check that number of orbitals match number of MO coefficients
if norba + norbb != data['mo_coeffs'].shape[1]:
lit.error("Number of orbitals inconsistent with orbital spin types.")
# Create orbitals. For unrestricted wavefunctions, IOData uses the same
# conventions as WFX.
mo = MolecularOrbitals(
"unrestricted", norba, norbb,
data['mo_occs'], data['mo_coeffs'], data['mo_energies'], None)
# prepare WFX-specific data for IOData
extra_labels = ['keywords', 'model_name', 'num_perturbations', 'num_core_electrons',
'spin_multi', 'virial_ratio', 'nuc_viral', 'full_virial_ratio', 'mo_spin']
extra = {label: data.get(label, None) for label in extra_labels}
extra["permutations"] = permutation
return {
'atcoords': data['atcoords'],
'atgradient': data.get('atgradient'),
'atnums': data['atnums'],
'atcorenums': data['nuclear_charge'],
'energy': data['energy'],
'extra': extra,
'mo': mo,
'obasis': obasis,
'title': data['title'],
}
[docs]@document_dump_one("WFX", ['atcoords', 'atnums', 'atcorenums', 'mo', 'obasis', 'charge'],
['title', 'energy', 'spinpol', 'lot', 'atgradient', 'extra'])
def dump_one(f: TextIO, data: IOData):
"""Do not edit this docstring. It will be overwritten."""
# pylint: disable=too-many-branches,too-many-statements
# get all tags/labels that can be written into a WFX file
lbs_str, lbs_int, lbs_float, lbs_aint, lbs_afloat, lbs_other, _ = _wfx_labels()
# put all labels in one dictionary and flip key and value for easier use
lbs = {**lbs_str, **lbs_int, **lbs_float, **lbs_aint, **lbs_afloat, **lbs_other}
lbs = {v: k for k, v in lbs.items()}
# de-contract data.obasis
# -----------------------
# get shells for the de-contracted basis
shells = []
for shell in data.obasis.shells:
for i, (angmom, kind) in enumerate(zip(shell.angmoms, shell.kinds)):
for exponent, coeff in zip(shell.exponents, shell.coeffs.T[i]):
if kind != 'c':
raise ValueError("WFX can be generated only for Cartesian MolecularBasis!")
shells.append(Shell(shell.icenter, [angmom], [kind], np.array([exponent]),
coeff.reshape(-1, 1)))
# make a new instance of MolecularBasis with de-contracted basis shells; ideally for WFX we
# want the primitive basis set, but IOData only supports shells.
obasis = MolecularBasis(shells, data.obasis.conventions, data.obasis.primitive_normalization)
# expand mo.coeffs in de-contracted basis primitives
# --------------------------------------------------
# expand mo.coeffs in the new basis by repeating de-contracted basis coefficients
permutation, signs = convert_conventions(data.obasis, CONVENTIONS)
raw_coeffs = data.mo.coeffs[permutation] * signs.reshape(-1, 1)
mo_coeffs = np.zeros((obasis.nbasis, data.mo.norb))
index_mo_old, index_mo_new = 0, 0
# loop over the shells of the old basis
for shell in data.obasis.shells:
for angmom, kind in zip(shell.angmoms, shell.kinds):
n = len(data.obasis.conventions[angmom, kind])
c = raw_coeffs[index_mo_old: index_mo_old + n]
for j in range(shell.nprim):
mo_coeffs[index_mo_new: index_mo_new + n] = c
index_mo_new += n
index_mo_old += n
# fix MO coefficients
# 1) expansion coefficients in WFX correspond to un-normalized primitives, so the primitive
# normalization constants should be included in the MO coefficients. However, IOData stores
# normalized primitives (either L2 or L1 as recorded in MolecularBasis primitive types), so
# we need to multiply the MO coefficients by the primitive normalization constants
scales = get_mocoeff_scales(obasis)
# 2) expansion coefficients in WFX represent the primitive basis coefficients, so contraction
# coefficients needs to be multiplied by the MO expansion coefficients.
contractions = []
for shell in obasis.shells:
contractions.extend(np.repeat(shell.coeffs.ravel(), [shell.nbasis], axis=0))
contractions = np.array(contractions)
# update MO coefficients to include primitives contraction coefficients & normalization
for index in range(mo_coeffs.shape[1]):
mo_coeffs[:, index] *= contractions * scales
# write title & keywords
_write_xml_single(tag=lbs["title"], info=data.title or '<Created with IOData>', file=f)
_write_xml_single(tag=lbs["keywords"], info=data.extra.get("keywords", "GTO"), file=f)
# write number of nuclei & number of primitives
_write_xml_single(tag=lbs["num_atoms"], info=data.natom, file=f)
_write_xml_single(tag=lbs["num_primitives"], info=obasis.nbasis, file=f)
# write number of occupied molecular orbitals
# in practice wfx prints the total number of MO, even though the section title specifies
# "Number of Occupied Molecular Orbitals", which is different from total number of MO when
# you print virtual orbitals in wfx file.
_write_xml_single(tag=lbs["num_occ_mo"], info=data.mo.occs.shape[0], file=f)
# write number of perturbations
_write_xml_single(lbs["num_perturbations"], data.extra.get("num_perturbations", 0), file=f)
# write nuclear names, atomic numbers, and nuclear charges
# add ghost atom, represented by Bq and atomic number 0
num2sym.update({0: 'Bq'})
nuclear_names = [f' {num2sym[num]}{index + 1}' for index, num in enumerate(data.atcorenums)]
_write_xml_iterator(tag=lbs["nuclear_names"], info=nuclear_names, file=f)
_write_xml_iterator(tag=lbs["atnums"], info=data.atnums, file=f)
_write_xml_iterator_scientific(tag=lbs["nuclear_charge"], info=data.atcorenums, file=f)
# write nuclear cartesian coordinates
print("<Nuclear Cartesian Coordinates>", file=f)
for item in data.atcoords:
print('{: ,.14E} {: ,.14E} {: ,.14E}'.format(item[0], item[1], item[2]), file=f)
print("</Nuclear Cartesian Coordinates>", file=f)
# write net charge, number of electrons, number of alpha electrons, and number beta electrons
_write_xml_single_scientific(tag=lbs["charge"], info=data.charge, file=f)
_write_xml_single(tag=lbs["num_electrons"], info=int(data.nelec), file=f)
# wfx expects integer values for number of alpha/beta electrons but int rounds down the float
# so round is used before turning it to integer to get the correct number.
_write_xml_single(tag=lbs["num_alpha_electron"], info=int(round(sum(data.mo.occsa))), file=f)
_write_xml_single(tag=lbs["num_beta_electron"], info=int(round(sum(data.mo.occsb))), file=f)
# write electronic spin multiplicity and model (both optional)
if data.spinpol is not None:
_write_xml_single(tag=lbs["spin_multi"], info=int(data.spinpol + 1), file=f)
if data.lot is not None:
_write_xml_single(tag=lbs["model_name"], info=data.lot, file=f)
# write primitive centers
prim_centers = [shell.icenter + 1 for shell in obasis.shells for _ in range(shell.nbasis)]
print("<Primitive Centers>", file=f)
for j in range(0, len(prim_centers), 10):
print(' '.join(['{:d}'.format(c) for c in prim_centers[j:j + 10]]), file=f)
print("</Primitive Centers>", file=f)
# write primitive types
angmom_prim = {}
count = 1
for angmom in range(max([shell.angmoms[0] for shell in obasis.shells]) + 1):
angmom_prim[angmom] = [count + i for i in range(len(obasis.conventions[angmom, 'c']))]
count += len(obasis.conventions[angmom, 'c'])
prim_types = [item for shell in obasis.shells for item in angmom_prim[shell.angmoms[0]]]
print("<Primitive Types>", file=f)
for j in range(0, len(prim_types), 10):
print(' '.join(['{:d}'.format(c) for c in prim_types[j:j + 10]]), file=f)
print("</Primitive Types>", file=f)
# write primitive exponents
exponents = [shell.exponents[0] for shell in obasis.shells for _ in range(shell.nbasis)]
print("<Primitive Exponents>", file=f)
for j in range(0, len(exponents), 4):
print(' '.join(['{: ,.14E}'.format(e) for e in exponents[j:j + 4]]), file=f)
print("</Primitive Exponents>", file=f)
# write molecular orbital occupation numbers
_write_xml_iterator_scientific(tag=lbs["mo_occs"], info=data.mo.occs, file=f)
# write molecular orbital energies
_write_xml_iterator_scientific(tag=lbs["mo_energies"], info=data.mo.energies, file=f)
# write molecular orbital spin types
if data.mo.kind == 'restricted':
mo_spin = ['Alpha and Beta '] * len(data.mo.occs)
else:
mo_spin = ['Alpha'] * len(data.mo.occsa) + ['Beta'] * len(data.mo.occsb)
_write_xml_iterator(tag=lbs["mo_spins"], info=mo_spin, file=f)
# write MO primitive coefficients
print("<Molecular Orbital Primitive Coefficients>", file=f)
for mo in range(len(data.mo.occs)):
print("<MO Number>", file=f)
print(str(mo + 1), file=f)
print("</MO Number>", file=f)
for j in range(0, obasis.nbasis, 4):
print(' '.join(['{: ,.14E}'.format(c) for c in mo_coeffs.T[mo][j:j + 4]]), file=f)
print("</Molecular Orbital Primitive Coefficients>", file=f)
# write energy and virial ratio; use ' NAN' when None (not available)
_write_xml_single_scientific(tag=lbs["energy"], info=data.energy or np.nan, file=f)
_write_xml_single_scientific(lbs["virial_ratio"], data.extra.get("virial_ratio", np.nan), f)
# write nuclear Cartesian energy gradients (optional)
if data.atgradient is not None:
nuc_cart_energy_grad = list(zip(nuclear_names, data.atgradient))
print("<Nuclear Cartesian Energy Gradients>", file=f)
for atom in nuc_cart_energy_grad:
print(atom[0], '{: ,.14E} {: ,.14E} {: ,.14E}'.format(atom[1][0], atom[1][1],
atom[1][2]), file=f)
print("</Nuclear Cartesian Energy Gradients>", file=f)
# nuclear virial of energy-gradient-based forces on nuclei (optional)
if data.extra.get("nuc_viral") is not None:
_write_xml_single_scientific(tag=lbs["nuc_viral"], info=data.extra["nuc_viral"], file=f)
# write full virial ratio (optional)
if data.extra.get("full_virial_ratio") is not None:
_write_xml_single_scientific(lbs["full_virial_ratio"], data.extra["full_virial_ratio"], f)
# number of core electrons (optional)
if data.extra.get("num_core_electrons") is not None:
_write_xml_single(lbs["num_core_electrons"], data.extra["num_core_electrons"], f)
def _write_xml_single(tag: str, info: [str, int], file: TextIO) -> None:
"""Write header, tail and the data between them into the file."""
print(tag, file=file)
print(info, file=file)
print('</' + tag.lstrip('<'), file=file)
def _write_xml_single_scientific(tag: str, info: float, file: TextIO) -> None:
"""Write header, tail and the data between them into the file."""
print(tag, file=file)
print('{: ,.14E}'.format(info), file=file)
print('</' + tag.lstrip('<'), file=file)
def _write_xml_iterator(tag: str, info: Iterator, file: TextIO) -> None:
"""Write list of arrays to file."""
print(tag, file=file)
for info_line in info:
print(info_line, file=file)
print('</' + tag.lstrip('<'), file=file)
def _write_xml_iterator_scientific(tag: str, info: Iterator, file: TextIO) -> None:
"""Write list of arrays to file."""
print(tag, file=file)
for info_line in info:
print('{: ,.14E}'.format(info_line), file=file)
print('</' + tag.lstrip('<'), file=file)