Source code for masci_tools.io.hdf5_util

###############################################################################
# Copyright (c), Forschungszentrum Jülich GmbH, IAS-1/PGI-1, Germany.         #
#                All rights reserved.                                         #
# This file is part of the Masci-tools package.                               #
# (Material science tools)                                                    #
#                                                                             #
# The code is hosted on GitHub at https://github.com/judftteam/masci-tools.   #
# For further information on the license, see the LICENSE.txt file.           #
# For further information please visit http://judft.de/.                      #
#                                                                             #
###############################################################################
"""
Small utility functions for inspecting hdf files and converting the
complete file structure into a python dictionary
"""
from __future__ import annotations

from typing import Any
import h5py
import numpy as np

from masci_tools.util.typing import FileLike


[docs]def hdfList(name: str, obj: h5py.HLObject) -> None: """ Print the name of the current object (indented to create a nice tree structure) Also prints attribute values and dataset shapes and datatypes """ print(f"{name.split('/')[-1]:>{len(name)-1}}: {type(obj)}") ref_length = len(name) - len(name.split('/')[-1]) + 4 if isinstance(obj, h5py.Dataset): print(f"{'Datatype:':>{ref_length+9}} {obj.dtype}") print(f"{'Shape:':>{ref_length+6}} {obj.shape}\n") if obj.attrs: print(f"{'Attributes:':>{ref_length+9}}") for attr_name, attr_val in obj.attrs.items(): if isinstance(attr_val, list) and len(attr_val) == 1: attr_val = attr_val[0] print(f'{attr_name:>{ref_length+len(attr_name)}}: {attr_val}') print('')
[docs]def h5dump(file: FileLike, group: str = '/') -> None: """ Shows the overall filestructure of an hdf file Goes through all groups and subgroups and prints the attributes or the shape and datatype of the datasets :param filepath: path to the hdf file """ with h5py.File(file, 'r') as file_hdf: if group != '/': print(f'Starting from path {group}') hdfList(group, file_hdf[group]) print('This path contains:\n') file_hdf[group].visititems(hdfList)
[docs]def read_hdf_simple(file: FileLike, flatten: bool = False) -> tuple[dict[str, Any], dict[str, Any]]: """ Reads in an hdf file and returns its context in a nested dictionary :param filepath: path or filehandle to the hdf file :param flatten: bool, if True the dictionary will be flattened (does not check for lost information) :returns: two dictionaries, one with the datasets the other with the attributes in the file **Non unique group attribute or dataset names will be overwritten in the return dict** """ with h5py.File(file, 'r') as file_hdf: datasets, group_attrs = read_groups(file_hdf, flatten=flatten) return datasets, group_attrs
[docs]def read_groups(hdfdata: h5py.Group, flatten: bool = False) -> tuple[dict[str, Any], dict[str, Any]]: """ Recursive function to read a hdf datastructure and extract the datasets and attributes :param hdfdata: current hdf group to process :param flatten: bool, if True the dictionary will be flattened (does not check for lost information) :returns: two dictionaries, one with the datasets the other with the attributes in the file """ datasets: dict[str, Any] = {} attrs: dict[str, Any] = {} for name, attr_val in hdfdata.attrs.items(): if len(attr_val) == 1: attr_val = attr_val[0] attrs[name] = attr_val for key, val in hdfdata.items(): if isinstance(val, h5py.Dataset): datasets[key] = np.array(val) else: new_datasets, new_attrs = read_groups(val, flatten=flatten) if not flatten: if new_datasets: datasets[key] = new_datasets if new_attrs: attrs[key] = new_attrs else: datasets.update(new_datasets) attrs.update(new_attrs) return datasets, attrs