Source code for scilpy.io.fetcher

# -*- coding: utf-8 -*-

import hashlib
import inspect
import logging
import os
import pathlib
import zipfile

import requests

from scilpy import SCILPY_HOME

DVC_URL = "https://scil.usherbrooke.ca/scil_test_data/dvc-store/files/md5"


[docs] def download_file_from_google_drive(url, destination): """ Download large file from Google Drive. Parameters ---------- id: str id of file to be downloaded destination: str path to destination file with its name and extension """ def save_response_content(response, destination): CHUNK_SIZE = 32768 with open(destination, "wb") as f: for chunk in response.iter_content(CHUNK_SIZE): f.write(chunk) session = requests.Session() response = session.get(url, stream=True) save_response_content(response, destination)
[docs] def get_testing_files_dict(): """ Get dictionary linking zip file to their GDrive ID & MD5SUM """ return { "commit_amico.zip": "c190e6b9d22350b51e222c60febe13b4", "bundles.zip": "54b6e2bf2dda579886efe4e2a8989486", "stats.zip": "2aeac4da5ab054b3a460fc5fdc5e4243", "bst.zip": "eed227fd246255e7417f92d49eb1066a", "filtering.zip": "19116ff4244d057c8214ee3fe8e05f71", "ihMT.zip": "08fcf44848ba2649aad5a5a470b3cb06", "tractometry.zip": "890bfa70e44b15c0d044085de54e00c6", "bids_json.zip": "97fd9a414849567fbfdfdb0ef400488b", "MT.zip": "1f4345485248683b3652c97f2630950e", "btensor_testdata.zip": "7ada72201a767292d56634e0a7bbd9ad", "tracking.zip": "4793a470812318ce15f1624e24750e4d", "atlas.zip": "dc34e073fc582476504b3caf127e53ef", "anatomical_filtering.zip": "5282020575bd485e15d3251257b97e01", "connectivity.zip": "fe8c47f444d33067f292508d7050acc4", "plot.zip": "a1dc54cad7e1d17e55228c2518a1b34e", "others.zip": "82248b4888a63b0aeffc8070cc206995", "fodf_filtering.zip": "5985c0644321ecf81fd694fb91e2c898", "processing.zip": "eece5cdbf437b8e4b5cb89c797872e28", "surface_vtk_fib.zip": "241f3afd6344c967d7176b43e4a99a41", "tractograms.zip": "5497d0bf3ccc35f8f4f117829d790267" }
[docs] def fetch_data(files_dict, keys=None): """ Fetch data. Typical use would be with gdown. But with too many data accesses, downloaded become denied. Using trick from https://github.com/wkentaro/gdown/issues/43. """ if not os.path.exists(SCILPY_HOME): os.makedirs(SCILPY_HOME) if keys is None: keys = files_dict.keys() elif isinstance(keys, str): keys = [keys] for f in keys: url_md5 = files_dict[f] full_path = os.path.join(SCILPY_HOME, f) full_path_no_ext, ext = os.path.splitext(full_path) CURR_URL = DVC_URL + "/" + url_md5[:2] + "/" + url_md5[2:] if not os.path.isdir(full_path_no_ext): if ext == '.zip' and not os.path.isdir(full_path_no_ext): logging.warning('Downloading and extracting {} from url {} to ' '{}'.format(f, CURR_URL, SCILPY_HOME)) # Robust method to Virus/Size check from GDrive download_file_from_google_drive(CURR_URL, full_path) with open(full_path, 'rb') as file_to_check: data = file_to_check.read() md5_returned = hashlib.md5(data).hexdigest() if md5_returned != url_md5: try: zipfile.ZipFile(full_path) except zipfile.BadZipFile: raise RuntimeError("Could not fetch valid archive for " "file {}".format(f)) raise ValueError('MD5 mismatch for file {}.'.format(f)) try: # If there is a root dir, we want to skip one level. z = zipfile.ZipFile(full_path) zipinfos = z.infolist() root_dir = pathlib.Path( zipinfos[0].filename).parts[0] + '/' assert all([s.startswith(root_dir) for s in z.namelist()]) nb_root = len(root_dir) for zipinfo in zipinfos: zipinfo.filename = zipinfo.filename[nb_root:] if zipinfo.filename != '': z.extract(zipinfo, path=full_path_no_ext) except AssertionError: # Not root dir. Extracting directly. z.extractall(full_path) else: raise NotImplementedError("Data fetcher was expecting to deal " "with a zip file.") else: # toDo. Verify that data on disk is the right one. logging.warning("Not fetching data; already on disk.")
[docs] def get_synb0_template_path(): """ Return MNI 2.5mm template in scilpy repository Returns ------- path: str Template path """ import scilpy # ToDo. Is this the only way? module_path = inspect.getfile(scilpy) module_path = os.path.dirname(os.path.dirname(module_path)) path = os.path.join(module_path, 'data/', 'mni_icbm152_t1_tal_nlin_asym_09c_masked_2_5.nii.gz') return path