Source code for iodata.formats.xyz

# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""XYZ file format.

Usually, the different frames in a trajectory describe different geometries of the same
molecule, with atoms in the same order. The ``load_many`` and ``dump_many`` functions
below can also handle an XYZ with different molecules, e.g. a molecular database.

The ``load_*`` and ``dump_*`` functions all accept the optional argument
``atom_columns``. This argument fixes the meaning of the columns to be loaded
from or dumped to an XYZ file. The following example defines, in addition to the
conventional columns, also a column with atomic charges and three columns with
atomic forces.

.. code-block :: python

    atom_columns = iodata.formats.xyz.DEFAULT_ATOM_COLUMNS + [
        # Atomic charges are stored in a dictionary atcharges and they key
        # refers to the name of the partitioning method.
        ("atcharges", "mulliken", (), float, float, "{:10.5f}".format),
        # Note that in IOData, the energy gradient is stored, which contains the
        # negative forces.
        ("atgradient", None, (3,), float,
         (lambda word: -float(word)),
         (lambda value: "{:15.10f}".format(-value)))
    ]

    mol = load_one("test.xyz", atom_columns=atom_columns)
    # The following attributes are present:
    print(mol.atnums)
    print(mol.atcoords)
    print(mol.atcharges["mulliken"])
    print(mol.atgradient)

When defining ``atom_columns``, no columns can be skipped, such that all
information loaded from a file can also be written back out when dumping it.

"""

from collections.abc import Iterator
from typing import TextIO

import numpy as np

from ..docstrings import (
    document_dump_many,
    document_dump_one,
    document_load_many,
    document_load_one,
)
from ..iodata import IOData
from ..periodic import num2sym, sym2num
from ..utils import LineIterator, angstrom

__all__ = ()


PATTERNS = ["*.xyz"]


DEFAULT_ATOM_COLUMNS = [
    (
        "atnums",
        None,
        (),
        int,
        (lambda word: int(word) if word.isdigit() else sym2num[word.title()]),
        (lambda atnum: f"{num2sym[atnum]:2s}"),
    ),
    (
        "atcoords",
        None,
        (3,),
        float,
        (lambda word: float(word) * angstrom),
        (lambda value: f"{value / angstrom:15.10f}"),
    ),
]


ATOM_COLUMNS_DOC = """\
A list of atomic fields to be loaded. Each field as a tuple with the following
items: **attribute** (``str``), **key** (``None`` or ``str``, when ``str`` the
``IOData`` attribute is a ``dict``), **shape** for one atom (``tuple``),
**dtype**, **load_word** (function taking string and returning a value with the
correct type), **dump_word** (function taking a value and returning a formatted
string).
"""



[docs]
@document_load_one("XYZ", ["atcoords", "atnums", "title"], [], {"atom_columns": ATOM_COLUMNS_DOC})
def load_one(lit: LineIterator, atom_columns=None) -> dict:
    """Do not edit this docstring. It will be overwritten."""
    # Load the header.
    natom = int(next(lit))
    title = next(lit).strip()
    if atom_columns is None:
        atom_columns = DEFAULT_ATOM_COLUMNS
    data = {"title": title}
    # Initialize the arrays to be loaded from the XYZ file.
    for attrname, keyname, shapesuffix, dtype, _loadword, _dumpword in atom_columns:
        array = np.zeros((natom, *shapesuffix), dtype=dtype)
        if keyname is None:
            # Store the initial array as a normal attribute.
            data[attrname] = array
        else:
            # Store the initial array as a value in an dictionary attribute.
            data.setdefault(attrname, {})[keyname] = array
    # Load the atom lines.
    for iatom in range(natom):
        words = next(lit).split()
        for attrname, keyname, _shapesuffix, _dtype, loadword, _dumpword in atom_columns:
            # Get the slice of the array where properties for the current atom
            # must be stored.
            if keyname is None:
                # The array is a normal attribute.
                atom_array = data[attrname][iatom : iatom + 1]
            else:
                # The array is a value of a dictionary attribute.
                atom_array = data[attrname][keyname][iatom : iatom + 1]
            # Fill in array elements with atomic properties. For each new value
            # to be loaded, the first element of the list words is consumed and
            # converted to the right format for IOData.
            for ifield in range(atom_array.size):
                atom_array.flat[ifield] = loadword(words.pop(0))
    return data




[docs]
@document_load_many("XYZ", ["atcoords", "atnums", "title"], [], {"atom_columns": ATOM_COLUMNS_DOC})
def load_many(lit: LineIterator, atom_columns=None) -> Iterator[dict]:
    """Do not edit this docstring. It will be overwritten."""
    # XYZ Trajectory files are a simple concatenation of individual XYZ files,'
    # making it trivial to load many frames.
    try:
        while True:
            # Check for and skip empty lines at the end of file
            line = next(lit)
            if line.strip() == "":
                return
            lit.back(line)
            yield load_one(lit, atom_columns)
    except StopIteration:
        return




[docs]
@document_dump_one("XYZ", ["atcoords", "atnums"], ["title"], {"atom_columns": ATOM_COLUMNS_DOC})
def dump_one(f: TextIO, data: IOData, atom_columns=None):
    """Do not edit this docstring. It will be overwritten."""
    if atom_columns is None:
        atom_columns = DEFAULT_ATOM_COLUMNS
    # Write the header
    print(data.natom, file=f)
    print(data.title or "Created with IOData", file=f)
    # Write the atom lines
    for iatom in range(data.natom):
        words = []
        for attrname, keyname, _shapesuffix, _dtype, _loadword, dumpword in atom_columns:
            values = getattr(data, attrname)
            if keyname is not None:
                # The data to be written is a value of a dictionary attribute.
                values = values[keyname]
            words.extend(dumpword(value) for value in values[iatom].flat)
        print(" ".join(words), file=f)




[docs]
@document_dump_many("XYZ", ["atcoords", "atnums"], ["title"], {"atom_columns": ATOM_COLUMNS_DOC})
def dump_many(f: TextIO, datas: Iterator[IOData], atom_columns=None):
    """Do not edit this docstring. It will be overwritten."""
    # Similar to load_many, this is relatively easy.
    for data in datas:
        dump_one(f, data, atom_columns)