Source code for iodata.test.test_json_qcschema

# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Test iodata.formats.json module."""

import json
import os
from importlib.resources import as_file, files

import numpy as np
import pytest

from ..api import dump_one, load_one
from ..utils import LoadError, LoadWarning

# Tests for qcschema_molecule
# GEOMS: dict of str: NDArray(N, 3)
GEOMS = {
    "LiCl": np.array([[0.000000, 0.000000, -1.631761], [0.000000, 0.000000, 0.287958]]),
    "OHr": np.array([[0.0, 0.0, -0.12947694], [0.0, -1.49418734, 1.02744651]]),
    "CuSCN": np.array(
        [
            [1.469987, -0.328195, 0.052136],
            [3.593873, -0.020962, 0.010402],
            [3.968446, -1.653292, 0.232148],
            [4.253724, -2.762010, 0.382764],
        ]
    ),
    "H2O": np.array([[0.0, 0.0, -0.1295], [0.0, -1.4942, 1.0274], [0.0, 1.4942, 1.0274]]),
    "H2O_MP2": np.array([[0.0, 0.0, -0.1294], [0.0, -1.4941, 1.0274], [0.0, 1.4941, 1.0274]]),
}
# These molecule examples were manually generated for testing
# MOL_FILES: (filename, atnums, charge, spinpol, geometry)
MOL_FILES = [
    ("LiCl_molecule.json", [3, 17], 0, 0, GEOMS["LiCl"], 0),
    # Manual validation of molpro_uks_hydroxyl_radical_gradient_output.json
    ("Hydroxyl_radical_molecule.json", [8, 1], 0, 1, GEOMS["OHr"], 0),
    # Warnings:
    #   has both masses and mass numbers
    ("CuSCN_molecule.json", [29, 16, 6, 7], 0, 0, GEOMS["CuSCN"], 1),
]


[docs] @pytest.mark.parametrize( ("filename", "atnums", "charge", "spinpol", "geometry", "nwarn"), MOL_FILES ) def test_qcschema_molecule(filename, atnums, charge, spinpol, geometry, nwarn): """Test qcschema_molecule parsing using manually generated files.""" with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_molecule: if nwarn == 0: mol = load_one(str(qcschema_molecule), fmt="json_qcschema") else: with pytest.warns(LoadWarning) as record: mol = load_one(str(qcschema_molecule), fmt="json_qcschema") assert len(record) == nwarn np.testing.assert_equal(mol.atnums, atnums) assert mol.charge == charge assert mol.spinpol == spinpol np.testing.assert_allclose(mol.atcoords, geometry)
# Not a single valid example of qcschema_molecule is easily found for anything but water # These molecule examples are sourced from the QCEngineRecords repo or from the QCSchema site # MOLSSI_MOL_FILES: (filename, atnums, charge, spinpol, warnings) MOLSSI_MOL_FILES = [ # Extracted from qchem_logonly_rimp2_watercluster_gradient_output.json # Warnings: # has both masses and mass numbers ("water_cluster.json", np.array([8, 1, 1, 8, 1, 1, 8, 1, 1]), 0, 0, 1), # Extracted from qchem_hf_water_energy_input.json # Warnings: # has both masses and mass numbers ("water_full.json", np.array([8, 1, 1]), 0, 0, 1), # Copied from QCSchema RTD site # Warnings: # no schema_name (warned in load_one and parsing molecule keys) # no schema_version (warned in load_one & parsing molecule keys & unknown version) # missing molecular_charge key # missing molecular_multiplicity key ("incomplete_water.json", np.array([8, 1, 1]), 0, 0, 7), # Copied from QCSchema RTD site # Warnings: # missing molecular_charge key # missing molecular_multiplicity key ("old_water.json", np.array([8, 1, 1]), 0, 0, 2), ]
[docs] @pytest.mark.parametrize(("filename", "atnums", "charge", "spinpol", "nwarn"), MOLSSI_MOL_FILES) def test_molssi_qcschema_molecule(filename, atnums, charge, spinpol, nwarn): """Test qcschema_molecule parsing using MolSSI-sourced files.""" with ( as_file(files("iodata.test.data").joinpath(filename)) as qcschema_molecule, pytest.warns(LoadWarning) as record, ): mol = load_one(str(qcschema_molecule), fmt="json_qcschema") np.testing.assert_equal(mol.atnums, atnums) assert mol.charge == charge assert mol.spinpol == spinpol assert len(record) == nwarn
# Unparsed dicts for test files UNPARSED = { "extra": {"another_field": True}, "nested_extra": { "related_projects": {"HSAB": {"id": "HSAB_2019_LALB"}, "4PB3": {"id": "4PB3_2020_Group1"}} }, } # Test passthrough for molecule files using modified versions of CuSCN_molecule.json # PASSTHROUGH_MOL_FILES: {filename, unparsed_dict} PASSTHROUGH_MOL_FILES = [ ("CuSCN_molecule_extra.json", UNPARSED["extra"]), ("CuSCN_molecule_nested_extra.json", UNPARSED["nested_extra"]), ]
[docs] @pytest.mark.parametrize(("filename", "unparsed_dict"), PASSTHROUGH_MOL_FILES) def test_passthrough_qcschema_molecule(filename, unparsed_dict): """Test qcschema_molecule parsing for passthrough of unparsed keys.""" with ( as_file(files("iodata.test.data").joinpath(filename)) as qcschema_molecule, pytest.warns(LoadWarning) as record, ): mol = load_one(str(qcschema_molecule), fmt="json_qcschema") assert mol.extra["molecule"]["unparsed"] == unparsed_dict assert len(record) == 1
def _check_provenance(mol1, mol2): """Test the provenance information, if available, to avoid updating version on test files.""" if "provenance" not in mol1: return isinstance(mol2["provenance"], dict) if isinstance(mol1["provenance"], dict): return mol1["provenance"] in mol2["provenance"] if isinstance(mol1["provenance"], list): for entry in mol1["provenance"]: assert entry in mol2["provenance"] return True return False INOUT_MOL_FILES = [ ("LiCl_molecule.json", 0), ("Hydroxyl_radical_molecule.json", 0), ("CuSCN_molecule.json", 1), ("CuSCN_molecule_extra.json", 1), ("CuSCN_molecule_nested_extra.json", 1), ]
[docs] @pytest.mark.parametrize(("filename", "nwarn"), INOUT_MOL_FILES) def test_inout_qcschema_molecule(tmpdir, filename, nwarn): """Test that loading and dumping qcschema_molecule files retains all data.""" with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_molecule: if nwarn == 0: mol = load_one(str(qcschema_molecule), fmt="json_qcschema") else: with pytest.warns(LoadWarning) as record: mol = load_one(str(qcschema_molecule), fmt="json_qcschema") assert len(record) == nwarn mol1 = json.loads(qcschema_molecule.read_bytes()) fn_tmp = os.path.join(tmpdir, "test_qcschema_mol.json") dump_one(mol, fn_tmp, fmt="json_qcschema") with open(fn_tmp) as mol2_in: mol2 = json.load(mol2_in) # Check that prior provenance info is kept assert _check_provenance(mol1, mol2) if "provenance" in mol1: del mol1["provenance"] if "provenance" in mol2: del mol2["provenance"] assert mol1 == mol2
INOUT_MOLSSI_MOL_FILES = [ "water_cluster.json", "water_full.json", ]
[docs] @pytest.mark.parametrize("filename", INOUT_MOLSSI_MOL_FILES) def test_inout_molssi_qcschema_molecule(tmpdir, filename): """Test that loading and dumping qcschema_molecule files retains all relevant data.""" with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_molecule: with pytest.warns(LoadWarning) as record: mol = load_one(str(qcschema_molecule), fmt="json_qcschema") mol1_preproc = json.loads(qcschema_molecule.read_bytes()) assert len(record) == 1 fn_tmp = os.path.join(tmpdir, "test_qcschema_mol.json") dump_one(mol, fn_tmp, fmt="json_qcschema") with open(fn_tmp) as mol2_in: mol2 = json.load(mol2_in) # Extra processing for testing: # Remove all null entries and empty dicts in json # QCEngine seems to add null entries and empty dicts even for optional and empty keys fix_keys = {k: v for k, v in mol1_preproc.items() if v is not None} fix_subkeys = {} for key in fix_keys: if isinstance(fix_keys[key], dict): fix_subkeys[key] = {k: v for k, v in fix_keys[key].items() if v is not None} mol1 = {**fix_keys, **fix_subkeys} # Remove empty dicts keys = list(mol1.keys()) for key in keys: if isinstance(mol1[key], dict) and not bool(mol1[key]): del mol1[key] # Check that prior provenance info is kept assert _check_provenance(mol1, mol2) if "provenance" in mol1: del mol1["provenance"] if "provenance" in mol2: del mol2["provenance"] assert mol1 == mol2
[docs] def test_ghost(tmpdir): source = files("iodata.test.data").joinpath("water_cluster_ghost.json") with as_file(source) as qcschema_molecule: mol = load_one(str(qcschema_molecule), fmt="json_qcschema") np.testing.assert_allclose(mol.atcorenums, [8, 1, 1, 0, 0, 0, 0, 0, 0]) fn_tmp = os.path.join(tmpdir, "test_ghost.json") dump_one(mol, fn_tmp, fmt="json_qcschema") with open(fn_tmp) as mol2_in: mol2 = json.load(mol2_in) assert mol2["real"] == [True] * 3 + [False] * 6
# input_files: (filename, explicit_basis, lot, obasis_name, run_type, geometry) INPUT_FILES = [ ("H2O_HF_STO3G_Gaussian_input.json", False, "HF", "STO-3G", "energy", GEOMS["H2O"]), ("LiCl_string_STO4G_input.json", False, "B3LYP", "Def2TZVP", None, GEOMS["LiCl"]), ("LiCl_explicit_STO4G_input.json", True, "HF", None, None, GEOMS["LiCl"]), ("LiCl_STO4G_Gaussian_input.json", False, "HF", "STO-4G", "freq", GEOMS["LiCl"]), ("water_mp2_input.json", False, "MP2", "cc-pVDZ", None, GEOMS["H2O_MP2"]), ]
[docs] @pytest.mark.parametrize( ("filename", "explicit_basis", "lot", "obasis_name", "run_type", "geometry"), INPUT_FILES ) def test_qcschema_input(filename, explicit_basis, lot, obasis_name, run_type, geometry): with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_input: try: with pytest.warns(LoadWarning): mol = load_one(str(qcschema_input), fmt="json_qcschema") assert mol.lot == lot if obasis_name: assert mol.obasis_name == obasis_name if run_type: assert mol.run_type == run_type np.testing.assert_allclose(mol.atcoords, geometry) # This will change if QCSchema Basis gets supported except LoadError: assert explicit_basis
# Test passthrough for input files using modified versions of CuSCN_molecule.json # PASSTHROUGH_INPUT_FILES: {filename, unparsed_dict, location} PASSTHROUGH_INPUT_FILES = [ ("LiCl_STO4G_Gaussian_input_extra.json", UNPARSED["extra"], "input"), ("LiCl_STO4G_Gaussian_input_nested_extra.json", UNPARSED["nested_extra"], "input"), ("LiCl_STO4G_Gaussian_input_extra_molecule.json", UNPARSED["extra"], "molecule"), ]
[docs] @pytest.mark.parametrize(("filename", "unparsed_dict", "location"), PASSTHROUGH_INPUT_FILES) def test_passthrough_qcschema_input(filename, unparsed_dict, location): """Test qcschema_molecule parsing for passthrough of unparsed keys.""" with ( as_file(files("iodata.test.data").joinpath(filename)) as qcschema_input, pytest.warns(LoadWarning), ): mol = load_one(str(qcschema_input), fmt="json_qcschema") assert mol.extra[location]["unparsed"] == unparsed_dict
INOUT_INPUT_FILES = [ ("H2O_HF_STO3G_Gaussian_input.json", 0), ("LiCl_string_STO4G_input.json", 0), ("LiCl_STO4G_Gaussian_input.json", 0), ("LiCl_STO4G_Gaussian_input_extra.json", 0), ("LiCl_STO4G_Gaussian_input_nested_extra.json", 0), ("LiCl_STO4G_Gaussian_input_extra_molecule.json", 0), ]
[docs] @pytest.mark.parametrize(("filename", "nwarn"), INOUT_INPUT_FILES) def test_inout_qcschema_input(tmpdir, filename, nwarn): """Test that loading and dumping qcschema_molecule files retains all data.""" with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_input: if nwarn == 0: with pytest.warns(LoadWarning): mol = load_one(str(qcschema_input), fmt="json_qcschema") else: with pytest.warns(LoadWarning) as record: mol = load_one(str(qcschema_input), fmt="json_qcschema") assert len(record) == nwarn mol1 = json.loads(qcschema_input.read_bytes()) fn_tmp = os.path.join(tmpdir, "test_input_mol.json") dump_one(mol, fn_tmp, fmt="json_qcschema") with open(fn_tmp) as mol2_in: mol2 = json.load(mol2_in) # Check that prior provenance info is kept assert _check_provenance(mol1, mol2) if "provenance" in mol1: del mol1["provenance"] if "provenance" in mol1["molecule"]: del mol1["molecule"]["provenance"] if "provenance" in mol2: del mol2["provenance"] if "provenance" in mol2["molecule"]: del mol2["molecule"]["provenance"] assert mol1 == mol2
# output_files: (filename, lot, obasis_name, run_type, nwarn) OUTPUT_FILES = [ ("H2O_CCSDprTpr_STO3G_output.json", "CCSD(T)", "sto-3g", None, 0), ("LiCl_STO4G_Gaussian_output.json", "HF", "STO-4G", "Freq", 0), ("xtb_water_no_basis.json", "XTB", None, None, 3), ]
[docs] @pytest.mark.parametrize(("filename", "lot", "obasis_name", "run_type", "nwarn"), OUTPUT_FILES) def test_qcschema_output(filename, lot, obasis_name, run_type, nwarn): with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_output: if nwarn == 0: with pytest.warns(LoadWarning): mol = load_one(str(qcschema_output), fmt="json_qcschema") else: with pytest.warns(LoadWarning) as record: mol = load_one(str(qcschema_output), fmt="json_qcschema") assert len(record) == nwarn assert mol.lot == lot assert mol.obasis_name == obasis_name assert mol.run_type == run_type
# Not a single valid example of qcschema_molecule is easily found for anything but water # Some of these files have been manually validated, as reflected in the provenance # bad_mol_files: (filename, error) BAD_OUTPUT_FILES = [ ("turbomole_water_energy_hf_output.json", LoadError), ("turbomole_water_gradient_rimp2_output.json", LoadError), ]
[docs] @pytest.mark.parametrize(("filename", "error"), BAD_OUTPUT_FILES) def test_bad_qcschema_files(filename, error): # FIXME: these will move with ( as_file(files("iodata.test.data").joinpath(filename)) as qcschema_input, pytest.raises(error), ): load_one(str(qcschema_input), fmt="json_qcschema")
INOUT_OUTPUT_FILES = [ "H2O_CCSDprTpr_STO3G_output.json", "LiCl_STO4G_Gaussian_output.json", ]
[docs] @pytest.mark.parametrize("filename", INOUT_OUTPUT_FILES) def test_inout_qcschema_output(tmpdir, filename): """Test that loading and dumping qcschema_molecule files retains all data.""" with as_file(files("iodata.test.data").joinpath(filename)) as qcschema_input: with pytest.warns(LoadWarning): mol = load_one(str(qcschema_input), fmt="json_qcschema") mol1 = json.loads(qcschema_input.read_bytes()) fn_tmp = os.path.join(tmpdir, "test_input_mol.json") dump_one(mol, fn_tmp, fmt="json_qcschema") with open(fn_tmp) as mol2_in: mol2 = json.load(mol2_in) # Check that prior provenance info is kept assert _check_provenance(mol1, mol2) if "provenance" in mol1: del mol1["provenance"] if "provenance" in mol1["molecule"]: del mol1["molecule"]["provenance"] if "provenance" in mol2: del mol2["provenance"] if "provenance" in mol2["molecule"]: del mol2["molecule"]["provenance"] assert mol1 == mol2