# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""XYZ file format.
Usually, the different frames in a trajectory describe different geometries of the same
molecule, with atoms in the same order. The ``load_many`` and ``dump_many`` functions
below can also handle an XYZ with different molecules, e.g. a molecular database.
The ``load_*`` and ``dump_*`` functions all accept the optional argument
``atom_columns``. This argument fixes the meaning of the columns to be loaded
from or dumped to an XYZ file. The following example defines, in addition to the
conventional columns, also a column with atomic charges and three columns with
atomic forces.
.. code-block :: python
atom_columns = iodata.formats.xyz.DEFAULT_ATOM_COLUMNS + [
# Atomic charges are stored in a dictionary atcharges and they key
# refers to the name of the partitioning method.
("atcharges", "mulliken", (), float, float, "{:10.5f}".format),
# Note that in IOData, the energy gradient is stored, which contains the
# negative forces.
("atgradient", None, (3,), float,
(lambda word: -float(word)),
(lambda value: "{:15.10f}".format(-value)))
]
mol = load_one("test.xyz", atom_columns=atom_columns)
# The following attributes are present:
print(mol.atnums)
print(mol.atcoords)
print(mol.atcharges["mulliken"])
print(mol.atgradient)
When defining ``atom_columns``, no columns can be skipped, such that all
information loaded from a file can also be written back out when dumping it.
"""
from collections.abc import Iterator
from typing import TextIO
import numpy as np
from ..docstrings import (
document_dump_many,
document_dump_one,
document_load_many,
document_load_one,
)
from ..iodata import IOData
from ..periodic import num2sym, sym2num
from ..utils import LineIterator, angstrom
__all__ = ()
PATTERNS = ["*.xyz"]
DEFAULT_ATOM_COLUMNS = [
(
"atnums",
None,
(),
int,
(lambda word: int(word) if word.isdigit() else sym2num[word.title()]),
(lambda atnum: f"{num2sym[atnum]:2s}"),
),
(
"atcoords",
None,
(3,),
float,
(lambda word: float(word) * angstrom),
(lambda value: f"{value / angstrom:15.10f}"),
),
]
ATOM_COLUMNS_DOC = """\
A list of atomic fields to be loaded. Each field as a tuple with the following
items: **attribute** (``str``), **key** (``None`` or ``str``, when ``str`` the
``IOData`` attribute is a ``dict``), **shape** for one atom (``tuple``),
**dtype**, **load_word** (function taking string and returning a value with the
correct type), **dump_word** (function taking a value and returning a formatted
string).
"""
[docs]
@document_load_one("XYZ", ["atcoords", "atnums", "title"], [], {"atom_columns": ATOM_COLUMNS_DOC})
def load_one(lit: LineIterator, atom_columns=None) -> dict:
"""Do not edit this docstring. It will be overwritten."""
# Load the header.
natom = int(next(lit))
title = next(lit).strip()
if atom_columns is None:
atom_columns = DEFAULT_ATOM_COLUMNS
data = {"title": title}
# Initialize the arrays to be loaded from the XYZ file.
for attrname, keyname, shapesuffix, dtype, _loadword, _dumpword in atom_columns:
array = np.zeros((natom, *shapesuffix), dtype=dtype)
if keyname is None:
# Store the initial array as a normal attribute.
data[attrname] = array
else:
# Store the initial array as a value in an dictionary attribute.
data.setdefault(attrname, {})[keyname] = array
# Load the atom lines.
for iatom in range(natom):
words = next(lit).split()
for attrname, keyname, _shapesuffix, _dtype, loadword, _dumpword in atom_columns:
# Get the slice of the array where properties for the current atom
# must be stored.
if keyname is None:
# The array is a normal attribute.
atom_array = data[attrname][iatom : iatom + 1]
else:
# The array is a value of a dictionary attribute.
atom_array = data[attrname][keyname][iatom : iatom + 1]
# Fill in array elements with atomic properties. For each new value
# to be loaded, the first element of the list words is consumed and
# converted to the right format for IOData.
for ifield in range(atom_array.size):
atom_array.flat[ifield] = loadword(words.pop(0))
return data
[docs]
@document_load_many("XYZ", ["atcoords", "atnums", "title"], [], {"atom_columns": ATOM_COLUMNS_DOC})
def load_many(lit: LineIterator, atom_columns=None) -> Iterator[dict]:
"""Do not edit this docstring. It will be overwritten."""
# XYZ Trajectory files are a simple concatenation of individual XYZ files,'
# making it trivial to load many frames.
try:
while True:
# Check for and skip empty lines at the end of file
line = next(lit)
if line.strip() == "":
return
lit.back(line)
yield load_one(lit, atom_columns)
except StopIteration:
return
[docs]
@document_dump_one("XYZ", ["atcoords", "atnums"], ["title"], {"atom_columns": ATOM_COLUMNS_DOC})
def dump_one(f: TextIO, data: IOData, atom_columns=None):
"""Do not edit this docstring. It will be overwritten."""
if atom_columns is None:
atom_columns = DEFAULT_ATOM_COLUMNS
# Write the header
print(data.natom, file=f)
print(data.title or "Created with IOData", file=f)
# Write the atom lines
for iatom in range(data.natom):
words = []
for attrname, keyname, _shapesuffix, _dtype, _loadword, dumpword in atom_columns:
values = getattr(data, attrname)
if keyname is not None:
# The data to be written is a value of a dictionary attribute.
values = values[keyname]
words.extend(dumpword(value) for value in values[iatom].flat)
print(" ".join(words), file=f)
[docs]
@document_dump_many("XYZ", ["atcoords", "atnums"], ["title"], {"atom_columns": ATOM_COLUMNS_DOC})
def dump_many(f: TextIO, datas: Iterator[IOData], atom_columns=None):
"""Do not edit this docstring. It will be overwritten."""
# Similar to load_many, this is relatively easy.
for data in datas:
dump_one(f, data, atom_columns)