Source code for scilpy.io.hdf5

# -*- coding: utf-8 -*-

import logging

from dipy.io.stateful_tractogram import StatefulTractogram, Space, Origin
from dipy.io.utils import create_nifti_header
import numpy as np

from scilpy.io.streamlines import reconstruct_streamlines



[docs]
def reconstruct_sft_from_hdf5(hdf5_handle, group_keys, space=Space.VOX,
                              origin=Origin.TRACKVIS, load_dps=False,
                              load_dpp=False, ref_img=None,
                              merge_groups=False, allow_empty=False):
    """
    Reconstructs one or more SFT from the HDF5 data.

    Parameters
    ----------
    hdf5_handle: hdf5.file
        Opened hdf5 file.
    group_keys: str or list or None
        Name of the streamlines group(s) in the HDF5. If None, loads all
        groups. If more than one group, by default, will return one sft per
        group.
    space: Space
        The space in which the HDF5 was saved. Default: Space.Vox
    origin: Origin
        The origin in which the HDF5 was saved. Default: Origin.TRACKVIS
    load_dps: bool
        If true, loads the data_per_streamline if present in the hdf5.
    load_dpp: bool
        If true, loads the data_per_point if present in the hdf5.
    ref_img: nib.Nifti1Image
        If set, will verify that the HDF5's header is compatible with this
        reference.
    merge_groups: bool
        If true, and if groups_keys refer to more than one group, will merge
        all bundles into one SFT. Else, returns one SFT per group.
    allow_empty: bool
        If true, if no streamlines are found, an empty tractogram will be
        returned. If one or more group_keys do not exist, no error will be
        raised.

    Returns
    -------
    sft: StatefulTractogram or list[StatefulTractogram]
        The tractogram(s)
    groups_len: list[int]
        The number of streamlines per loaded group.
    """
    if ref_img is not None:
        assert_header_compatible_hdf5(hdf5_handle, ref_img)

    # Prepare header
    affine = hdf5_handle.attrs['affine']
    dimensions = hdf5_handle.attrs['dimensions']
    voxel_sizes = hdf5_handle.attrs['voxel_sizes']
    header = create_nifti_header(affine, dimensions, voxel_sizes)

    # Find the groups to load
    if isinstance(group_keys, str):
        group_keys = [group_keys]
    elif group_keys is None:
        group_keys = list(hdf5_handle.keys())  # Get all groups

    if not isinstance(group_keys, list):
        raise ValueError("Expecting key to be either a str, a list or None. "
                         "(our fault. Code needs revision).")
    if len(group_keys) == 1:
        merge_groups = True

    # Get streamlines. They have been stored inside a group (or one per
    # bundle). Load all groups and either merge together or return many sft.
    groups_len = []
    streamlines = []
    dps = []
    for i, group_key in enumerate(group_keys):
        # Get streamlines
        if group_key not in hdf5_handle:
            if allow_empty:
                tmp_streamlines = []
            else:
                raise ValueError("Group key {} not found in the hdf5. "
                                 "Possible choices: {}"
                                 .format(group_key, list(hdf5_handle.keys())))
        else:
            # If key exists, tmp_streamlines should not be empty.
            tmp_streamlines = reconstruct_streamlines_from_hdf5(
                hdf5_handle[group_key])

        if merge_groups:
            streamlines.extend(tmp_streamlines)
            groups_len.append(len(tmp_streamlines))
        else:
            streamlines.append(tmp_streamlines)

        # Load dps / dpp
        if i == 0 or not merge_groups:
            dps.append({})
        if len(tmp_streamlines) > 0:
            discarded_keys = []

            for sub_key in hdf5_handle[group_key].keys():
                if sub_key not in ['data', 'offsets', 'lengths']:
                    data = np.asarray(hdf5_handle[group_key][sub_key])
                    if data.shape == hdf5_handle[group_key]['offsets'].shape or np.isreal(data).all():
                        if data.shape == ():  # If data is a scalar (data_per_group coming from afd_fixel)
                            data = np.asarray(data).astype(float) * np.ones(hdf5_handle[group_key]['offsets'].shape)
                        # Discovered dps (the array is the same length as
                        # offsets, so it is per streamline)
                        if load_dps:
                            if i == 0 or not merge_groups:
                                dps[i][sub_key] = data
                            else:
                                dps[i][sub_key] = np.concatenate(
                                    (dps[i][sub_key], data))
                    elif data.shape == hdf5_handle[group_key]['data'].shape \
                            and load_dpp and sub_key not in discarded_keys:
                        # Discovered dpp (the array is not the same length as
                        # offsets, so it is per point)
                        logging.warning("DPP discoverted in the HDF5, but "
                                        "not implemented yet. Skipping "
                                        "loading of {}.".format(sub_key))
                        discarded_keys.append(sub_key)

    # 3) Format as SFT
    if merge_groups:
        if len(streamlines) == 0 and not allow_empty:
            raise ValueError("Cannot load an empty tractogram from HDF5. Set "
                             "`allow_empty` to True if you want to force it.")
        sft = StatefulTractogram(streamlines, header, space=space,
                                 origin=origin, data_per_streamline=dps[0])
        return sft, groups_len
    else:
        sfts = []
        for (sub_streamlines, sub_dps) in zip(streamlines, dps):
            if len(streamlines) == 0 and not allow_empty:
                raise ValueError("Cannot load an empty tractogram from HDF5. "
                                 "Set `allow_empty` to True if you want to "
                                 "force it.")
            else:
                sfts.append(
                    StatefulTractogram(sub_streamlines, header, space=space,
                                       origin=origin,
                                       data_per_streamline=sub_dps))
        return sfts, groups_len




[docs]
def assert_header_compatible_hdf5(hdf5_handle, ref):
    """
    Parameters
    ----------
    hdf5_handle: hdf5.file
        Opened hdf5 handle.
    ref: Either a tuple (affine, dimension) or a nib.Nifti1Image reference.
    """
    if isinstance(ref, tuple) or isinstance(ref, list):
        ref_affine, ref_dimension = ref
        name = 'reference'
    else:
        # Expecting nibabel reference
        ref_affine = ref.affine
        ref_dimension = ref.shape
        name = ref.get_filename()

    affine = hdf5_handle.attrs['affine']
    dimensions = hdf5_handle.attrs['dimensions']
    if not (np.allclose(affine, ref_affine, atol=1e-03)
            and np.array_equal(dimensions, ref_dimension[0:3])):
        raise IOError('HDF5 file does not have a compatible header with'
                      ' {}'.format(name))




[docs]
def reconstruct_streamlines_from_hdf5(hdf5_group):
    """
    Function to reconstruct streamlines from hdf5, mainly to facilitate
    decomposition into thousands of connections and decrease I/O usage.

    Parameters
    ----------
    hdf5_group: h5py.group
        Handle to the hdf5 group. Ex: hdf5_file[bundle_key].

    Returns
    -------
    streamlines : list of np.ndarray
        List of streamlines.
    """
    if 'data' not in hdf5_group:
        raise ValueError("Expecting data in bundle's group.")

    data = np.array(hdf5_group['data']).flatten()
    offsets = np.array(hdf5_group['offsets'])
    lengths = np.array(hdf5_group['lengths'])

    return reconstruct_streamlines(data, offsets, lengths)




[docs]
def construct_hdf5_from_sft(hdf5_handle, sfts, groups_keys='streamlines',
                            save_dps=False, save_dpp=False):
    """
    Create a hdf5 from a SFT.

    Parameters
    ----------
    hdf5_handle: h5py.file
        Opened handle to the hdf5 group.
    sfts: StatefulTractogram or list[StatefulTractogram]
    groups_keys: str or list[str]
        The streamlines' hdf5 group name.
    save_dps: bool
        If True, save the DPS keys to hdf5.
    save_dpp: bool
        If True, save the DPP keys to hdf5.
    """
    if isinstance(sfts, StatefulTractogram):
        sfts = [sfts]
    if isinstance(groups_keys, str):
        groups_keys = [groups_keys]

    assert len(sfts) == len(groups_keys)

    # Prepare header
    construct_hdf5_header(hdf5_handle, sfts[0])

    # Prepare streamline groups
    for sft, group_key in zip(sfts, groups_keys):
        sft.to_vox()
        sft.to_corner()
        group = hdf5_handle.create_group(group_key)
        construct_hdf5_group_from_streamlines(
            group, sft.streamlines,
            sft.data_per_streamline if save_dps else None,
            sft.data_per_point if save_dpp else None)




[docs]
def construct_hdf5_header(hdf5_handle, ref_sft):
    ref_sft.to_vox()
    ref_sft.to_corner()
    hdf5_handle.attrs['affine'] = ref_sft.affine
    hdf5_handle.attrs['dimensions'] = ref_sft.dimensions
    hdf5_handle.attrs['voxel_sizes'] = ref_sft.voxel_sizes
    hdf5_handle.attrs['voxel_order'] = ref_sft.voxel_order




[docs]
def construct_hdf5_group_from_streamlines(hdf5_group, streamlines,
                                          dps=None, dpp=None):
    """
    Create a hdf5 group from streamlines.

    Parameters
    ----------
    hdf5_group: h5py.group
        Handle to the hdf5 group. Ex: hdf5_file[bundle_key].
    streamlines: ArraySequence
        The streamlines. Expecting streamlines in voxel space, corner origin.
    dps: dict or None
        The data_per_streamline
    dpp: dict or None
        The data_per_point
    """
    hdf5_group.create_dataset('data', data=streamlines.get_data(),
                              dtype=np.float32)
    hdf5_group.create_dataset('offsets', data=streamlines.copy()._offsets,
                              dtype=np.int64)
    hdf5_group.create_dataset('lengths', data=streamlines.copy()._lengths,
                              dtype=np.int32)
    if dps is not None:
        for dps_key, dps_value in dps.items():
            if dps_key not in ['data', 'offsets', 'lengths']:
                hdf5_group.create_dataset(dps_key, data=dps_value,
                                          dtype=np.float32)
            else:
                raise ValueError("Please do not use data_per_streamline keys "
                                 "'data', 'offsets' or 'lengths', this "
                                 "causes unclear management in the hdf5.")

    if dpp is not None:
        raise NotImplementedError(
            "NOT IMPLEMENTED: Cannot save data_per_point in the hdf5 yet.")