Source code for scilpy.io.fetcher

# -*- coding: utf-8 -*-

import hashlib
import inspect
import logging
import os
import pathlib
import requests
import zipfile

from scilpy import SCILPY_HOME

DVC_URL = "https://scil.usherbrooke.ca/scil_test_data/dvc-store/files/md5"



[docs]
def download_file_from_google_drive(url, destination):
    """
    Download large file from Google Drive.
    Parameters
    ----------
    id: str
        id of file to be downloaded
    destination: str
        path to destination file with its name and extension
    """
    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                f.write(chunk)

    session = requests.Session()
    response = session.get(url, stream=True)

    save_response_content(response, destination)




[docs]
def get_testing_files_dict():
    """ Get dictionary linking zip file to their GDrive ID & MD5SUM """
    return {
        "commit_amico.zip": "c190e6b9d22350b51e222c60febe13b4",
        "bundles.zip": "54b6e2bf2dda579886efe4e2a8989486",
        "stats.zip": "2aeac4da5ab054b3a460fc5fdc5e4243",
        "bst.zip": "eed227fd246255e7417f92d49eb1066a",
        "filtering.zip": "aa35388a791d803e3051a3236577ae19",
        "ihMT.zip": "08fcf44848ba2649aad5a5a470b3cb06",
        "tractometry.zip": "890bfa70e44b15c0d044085de54e00c6",
        "bids_json.zip": "97fd9a414849567fbfdfdb0ef400488b",
        "MT.zip": "1f4345485248683b3652c97f2630950e",
        "btensor_testdata.zip": "7ada72201a767292d56634e0a7bbd9ad",
        "tracking.zip": "4793a470812318ce15f1624e24750e4d",
        "atlas.zip": "dc34e073fc582476504b3caf127e53ef",
        "anatomical_filtering.zip": "5282020575bd485e15d3251257b97e01",
        "connectivity.zip": "ad5428b7b131f964382dfbe60a89f8a8",
        "plot.zip": "a1dc54cad7e1d17e55228c2518a1b34e",
        "others.zip": "82248b4888a63b0aeffc8070cc206995",
        "fodf_filtering.zip": "5985c0644321ecf81fd694fb91e2c898",
        "processing.zip": "0417df00d97272f5887c31acb8948604",
        "surface_vtk_fib.zip": "9e44050007618e7f9cf785cf59100877",
        "tractograms.zip": "964113f307213523d784b3dbf3a5117a",
        "mrds.zip": "5abe6092400e11e9bb2423e2c387e774",
        "lesions.zip": "dbd160b4c0c5d2db9cada875c0bbb00c"
    }




[docs]
def fetch_data(files_dict, keys=None, verbose=True):
    """
    Fetch data. Typical use would be with gdown.
    But with too many data accesses, downloaded become denied.
    Using trick from https://github.com/wkentaro/gdown/issues/43.
    """

    if not os.path.exists(SCILPY_HOME):
        os.makedirs(SCILPY_HOME)

    if keys is None:
        keys = files_dict.keys()
    elif isinstance(keys, str):
        keys = [keys]
    for f in keys:
        url_md5 = files_dict[f]
        full_path = os.path.join(SCILPY_HOME, f)
        full_path_no_ext, ext = os.path.splitext(full_path)

        CURR_URL = DVC_URL + "/" + url_md5[:2] + "/" + url_md5[2:]
        if not os.path.isdir(full_path_no_ext):
            if ext == '.zip' and not os.path.isdir(full_path_no_ext):
                if verbose:
                    logging.warning('Downloading and extracting {} from url {} to '
                                    '{}'.format(f, CURR_URL, SCILPY_HOME))

                # Robust method to Virus/Size check from GDrive
                download_file_from_google_drive(CURR_URL, full_path)

                with open(full_path, 'rb') as file_to_check:
                    data = file_to_check.read()
                    md5_returned = hashlib.md5(data).hexdigest()
                if md5_returned != url_md5:
                    try:
                        zipfile.ZipFile(full_path)
                    except zipfile.BadZipFile:
                        raise RuntimeError("Could not fetch valid archive for "
                                           "file {}".format(f))
                    raise ValueError('MD5 mismatch for file {}.'.format(f))

                try:
                    # If there is a root dir, we want to skip one level.
                    z = zipfile.ZipFile(full_path)
                    zipinfos = z.infolist()
                    root_dir = pathlib.Path(
                        zipinfos[0].filename).parts[0] + '/'
                    assert all([s.startswith(root_dir) for s in z.namelist()])
                    nb_root = len(root_dir)
                    for zipinfo in zipinfos:
                        zipinfo.filename = zipinfo.filename[nb_root:]
                        if zipinfo.filename != '':
                            z.extract(zipinfo, path=full_path_no_ext)
                except AssertionError:
                    # Not root dir. Extracting directly.
                    z.extractall(full_path)
            else:
                raise NotImplementedError("Data fetcher was expecting to deal "
                                          "with a zip file.")

        else:
            # toDo. Verify that data on disk is the right one.
            logging.warning("Not fetching data; already on disk.")




[docs]
def get_synb0_template_path():
    """
    Return MNI 2.5mm template in scilpy repository
    Returns
    -------
    path: str
        Template path
    """
    import scilpy  # ToDo. Is this the only way?
    module_path = inspect.getfile(scilpy)
    module_path = os.path.dirname(os.path.dirname(module_path))

    path = os.path.join(module_path, 'data/',
                        'mni_icbm152_t1_tal_nlin_asym_09c_masked_2_5.nii.gz')
    return path