# *- encoding: utf-8 -*-
# Author: Ben Cipollini, Howard Zhang
# License: simplified BSD
import os
import os.path as op
from ...core.datasets import Dataset
from ...core.fetchers import AmazonS3Fetcher, HttpFetcher
[docs]class HcpHttpFetcher(HttpFetcher):
"""TODO: HcpHttpFetcher docstring"""
dependencies = ['requests'] + HttpFetcher.dependencies
def __init__(self, data_dir=None, username=None, passwd=None):
username = username or os.environ.get("NIDATA_HCP_USERNAME")
passwd = passwd or os.environ.get("NIDATA_HCP_PASSWD")
if username is None or passwd is None:
raise ValueError("Must define NIDATA_HCP_USERNAME and "
"NIDATA_HCP_PASSWD environment variables, or "
"pass username and passwd arguments.")
super(HcpHttpFetcher, self).__init__(data_dir=data_dir,
username=username,
passwd=passwd)
self.jsession_id = None
[docs] def fetch(self, files, force=False, resume=True, check=False, verbose=1):
if self.jsession_id is None:
# Log in to the website.
import requests
res = requests.post('https://db.humanconnectome.org/data/JSESSION',
data={},
auth=(self.username, self.passwd))
res.raise_for_status()
# Get the login information.
self.jsession_id = res.cookies.get('JSESSIONID')
if self.jsession_id is None:
raise Exception('Failed to create HCP session.')
self.username = self.passwd = None # use session
files = self.reformat_files(files) # allows flexibility
# Add the session header
for tgt, src, opts in files:
opts['cookies'] = opts.get('cookies', dict())
opts['cookies'].update({'JSESSIONID': self.jsession_id})
return super(HcpHttpFetcher, self).fetch(files=files, force=force,
resume=resume, check=check,
verbose=verbose)
[docs]class HcpDataset(Dataset):
"""TODO: HcpDataset docstring"""
def __init__(self, data_dir=None, fetcher_type='http', profile_name='hcp',
access_key=None, secret_access_key=None,
username=None, passwd=None):
"""fetcher_type: aws or XNAT"""
super(HcpDataset, self).__init__(data_dir=data_dir)
self.fetcher_type = fetcher_type
if fetcher_type == 'aws':
self.fetcher = AmazonS3Fetcher(data_dir=self.data_dir,
profile_name=profile_name,
access_key=access_key,
secret_access_key=secret_access_key)
elif fetcher_type in ['http', 'xnat']:
self.fetcher = HcpHttpFetcher(data_dir=self.data_dir,
username=username,
passwd=passwd)
else:
raise NotImplementedError(fetcher_type)
[docs] def prepend(self, src_files):
"""
Prepends the proper absolute url to a list of files, based on fetcher type.
Parameters
----------
src_files: list of str
uncompleted urls without the prepended fetcher type
Returns
-------
list of fully qualified urls
"""
files = []
for src_file in src_files:
if isinstance(self.fetcher, HttpFetcher):
files.append((src_file, 'https://db.humanconnectome.org/data/'
'archive/projects/HCP_900/subjects/' +
src_file))
elif isinstance(self.fetcher, AmazonS3Fetcher):
files.append((src_file, 'HCP/' + src_file))
return files
[docs] def get_subject_list(self, n_subjects=None):
"""
Get the list of subject IDs. Depends on the # of subjects,
which also corresponds to other things (license agreement,
type of data available, etc)
"""
subj_file_info = (('S900.txt', 900),
('S500.txt', 500),
('U100.txt', 100))
fil = None
errs = dict()
# Loop until we retreive a file that's good.
for fname, nsubj in subj_file_info:
try:
fil = self.fetcher.fetch(
files=self.prepend([fname]), verbose=0)[0]
with open(fil, 'r') as fp:
data = fp.read()
# Make sure it's a good file.
subject_ids = data.split('\n')
subject_ids = filter(lambda sid: sid != '',
[sid.strip() for sid in subject_ids])
if subject_ids < nsubj:
os.remove(fil) # corrupt
raise ValueError("Removed corrupt file %s" % fil)
else:
break
except Exception as e:
errs[fname] = e
continue
# Completed the loop. Make sure we succeeded,
# and that what's requested is possible.
if fil is None:
raise Exception("Failed to fetch subject list from any file. "
"Error details: %s" % errs)
if n_subjects > nsubj:
raise IndexError("Subjects number requested is too high. Please "
"enter a number <= %d." % nsubj)
return subject_ids[:n_subjects]
[docs] def get_diff_files(self, process, subj_id):
"""
Parameters
----------
process : boolean
whether or not the data is processed or not
can choose from True or False
subj_id : String
the id of the subject the files are on
"""
files = []
if not process:
diff_path = '%s/unprocessed/3T/Diffusion' % subj_id
files += ['%s/%s_3T_BIAS_32CH.nii.gz' % (diff_path, subj_id)]
files += ['%s/%s_3T_BIAS_BC.nii.gz' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_LR_SBRef.nii.gz' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_LR.bval' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_LR.bvec' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_LR.nii.gz' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_RL_SBRef.nii.gz' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_RL.bval' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_RL.bvec' % (diff_path, subj_id)]
files += ['%s/%s_3T_DWI_dir95_RL.nii.gz' % (diff_path, subj_id)]
files += ['%s/release-notes/Diffusion_unproc.txt' % subj_id]
else:
diff_path = '%s/T1w/Diffusion' % subj_id
files += ['%s/bvals' % diff_path]
files += ['%s/bvecs' % diff_path]
files += ['%s/data.nii.gz' % diff_path]
files += ['%s/release-notes/Diffusion_preproc.txt' % subj_id]
return files
[docs] def get_anat_files(self, process, subj_id, atlas, mni, property):
"""
Parameters
----------
atlas : String
scope of surface data,
can choose from native or fsaverage
mni : boolean
determines whether to use mninonlinear data or not,
can choose from true or false
property : String
the chosen properties displayed in structural data files
can choose from myelinmap, curvature, thickness
process : boolean
whether or not the data is processed or not
can choose from True or False
subj_id : String
the id of the subject the files are on
"""
files = []
if not process:
anat_path = '%s/unprocessed/3T' % subj_id
files += ['%s/T1w_MPR1/%s_3T_AFI.nii.gz' % (anat_path, subj_id)]
files += ['%s/T1w_MPR1/%s_3T_BIAS_32CH.nii.gz' % (anat_path, subj_id)]
files += ['%s/T1w_MPR1/%s_3T_BIAS_BC.nii.gz' % (anat_path, subj_id)]
files += ['%s/T1w_MPR1/%s_3T_FieldMap_Magnitude.nii.gz' % (anat_path, subj_id)]
files += ['%s/T1w_MPR1/%s_3T_FieldMap_Phase.nii.gz' % (anat_path, subj_id)]
files += ['%s/T1w_MPR1/%s_3T_T1w_MPR1.nii.gz' % (anat_path, subj_id)]
files += ['%s/release-notes/Structural_unproc.txt' % subj_id]
else:
if not mni:
if atlas == 'native':
anat_path = '%s/T1w/Native' % subj_id
if property == 'thickness':
files += ['%s/%s.L.midthickness.native.surf.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.midthickness.native.surf.gii' % (anat_path, subj_id)]
elif atlas == 'fsaverage':
anat_path = '%s/T1w/fsaverage_LR32k' % subj_id
if property == 'thickness':
files += ['%s/%s.L.midthickness.32k_fs_LR.surf.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.midthickness.32k_fs_LR.surf.gii' % (anat_path, subj_id)]
else:
if atlas == 'native':
anat_path = '%s/MNINonLinear/Native' % subj_id
if property == 'thickness':
files += ['%s/%s.corrThickness.native.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.L.corrThickness.native.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.midthickness.native.surf.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.thickness.native.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.corrThickness.native.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.midthickness.native.surf.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.thickness.native.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.thickness.native.dscalar.nii' % (anat_path, subj_id)]
elif property == 'curvature':
files += ['%s/%s.curvature.native.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.L.curvature.native.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.curvature.native.shape.gii' % (anat_path, subj_id)]
elif property == 'myelinmap':
files += ['%s/%s.L.MyelinMap.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.MyelinMap_BC.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.SmoothedMyelinMap.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.SmoothedMyelinMap_BC.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.MyelinMap.native.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.MyelinMap_BC.native.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.R.MyelinMap.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.MyelinMap_BC.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.SmoothedMyelinMap.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.SmoothedMyelinMap_BC.native.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.SmoothedMyelinMap.native.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.SmoothedMyelinMap_BC.native.dscalar.nii' % (anat_path, subj_id)]
elif atlas == 'fsaverage':
anat_path = '%s/MNINonLinear/fsaverage_LR32k' % subj_id
if property == 'thickness':
files += ['%s/%s.corrThickness.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.L.corrThickness.32k_fs_LR.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.midthickness.32k_fs_LR.surf.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.thickness.32k_fs_LR.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.corrThickness.32k_fs_LR.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.midthickness.32k_fs_LR.surf.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.thickness.32k_fs_LR.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.thickness.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
elif property == 'curvature':
files += ['%s/%s.curvature.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.L.curvature.32k_fs_LR.shape.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.curvature.32k_fs_LR.shape.gii' % (anat_path, subj_id)]
elif property == 'myelinmap':
files += ['%s/%s.L.MyelinMap.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.MyelinMap_BC.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.SmoothedMyelinMap.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.L.SmoothedMyelinMap_BC.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.MyelinMap.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.MyelinMap_BC.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.R.MyelinMap.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.MyelinMap_BC.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.SmoothedMyelinMap.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.R.SmoothedMyelinMap_BC.32k_fs_LR.func.gii' % (anat_path, subj_id)]
files += ['%s/%s.SmoothedMyelinMap.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/%s.SmoothedMyelinMap_BC.32k_fs_LR.dscalar.nii' % (anat_path, subj_id)]
files += ['%s/release-notes/Structural_preproc.txt' % subj_id]
return files
[docs] def get_rest_files(self, process, subj_id):
"""
Parameters
----------
process : boolean
whether or not the data is processed or not
can choose from True or False
subj_id : String
the id of the subject the files are on
"""
files = []
if not process:
rest_path = '%s/unprocessed/3T' % subj_id
files += ['%s/rfMRI_REST1_LR/%s_3T_BIAS_32CH.nii.gz' % (rest_path, subj_id)]
files += ['%s/rfMRI_REST1_LR/%s_3T_BIAS_BC.nii.gz' % (rest_path, subj_id)]
files += ['%s/rfMRI_REST1_LR/%s_3T_rfMRI_REST1_LR_SBRef.nii.gz' % (rest_path, subj_id)]
files += ['%s/rfMRI_REST1_LR/%s_3T_rfMRI_REST1_LR.nii.gz' % (rest_path, subj_id)]
files += ['%s/rfMRI_REST1_LR/%s_3T_SpinEchoFieldMap_LR.nii.gz' % (rest_path, subj_id)]
files += ['%s/rfMRI_REST1_LR/%s_3T_SpinEchoFieldMap_RL.nii.gz' % (rest_path, subj_id)]
files += ['%s/rfMRI_REST1_LR/LINKED_DATA/PHYSIO/%s_3T_rfMRI_REST1_LR_Physio_log.txt' % (rest_path, subj_id)]
files += ['%s/release-notes/rfMRI_REST1_unproc.txt' % subj_id]
else:
rest_path = '%s/MNINonLinear/Results' % subj_id
files += ['%s/rfMRI_REST1_LR/brainmask_fs.2.nii.gz' % rest_path]
files += ['%s/rfMRI_REST1_LR/Movement_Regressors_dt.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/Movement_Regressors.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/Movement_AbsoluteRMS.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/Movement_AbsoluteRMS_mean.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/Movement_RelativeRMS.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/Movement_RelativeRMS_mean.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/rfMRI_REST1_LR_Atlas.dtseries.nii' % rest_path]
files += ['%s/rfMRI_REST1_LR/rfMRI_REST1_LR_Jacobian.nii.gz' % rest_path]
files += ['%s/rfMRI_REST1_LR/rfMRI_REST1_LR_SBRef.nii.gz' % rest_path]
files += ['%s/rfMRI_REST1_LR/rfMRI_REST1_LR.nii.gz' % rest_path]
files += ['%s/rfMRI_REST1_LR/rfMRI_REST1_LR_Physio_log.txt' % rest_path]
files += ['%s/rfMRI_REST1_LR/RibbonVolumeToSurfaceMapping/goodvoxels.nii.gz' % rest_path]
files += ['%s/release-notes/rfMRI_REST1_preproc.txt' % subj_id]
return files
[docs] def get_task_files(self, process, task, subj_id):
"""
Parameters
----------
task : String
the type of activity for functional data,
can choose from emotional, gambling, language, motor,
relational, social, and workingmemory
process : boolean
whether or not the data is processed or not
can choose from True or False
subj_id : String
the id of the subject the files are on
"""
files = []
task = task.upper()
if not process:
func_path = '%s/unprocessed/3T' % subj_id
task_path = '%s/tfMRI/%s_LR' % (func_path, task)
files += ['%s/%s_3T_BIAS_32CH.nii.gz' % (task_path, subj_id)]
files += ['%s/%s_3T_BIAS_BC.nii.gz' % (task_path, subj_id)]
files += ['%s/%s_3T_SpinEchoFieldMap_LR.nii.gz' % (task_path, subj_id)]
files += ['%s/%s_3T_SpinEchoFieldMap_RL.nii.gz' % (task_path, subj_id)]
files += ['%s/%s_3T_tfMRI_%s_LR.nii.gz' % (task_path, subj_id, task)]
files += ['%s/%s_3T_tfMRI_%s_LR_SBRef.nii.gz' % (task_path, subj_id, task)]
files += ['%s/LINKED_DATA/EPRIME/%s_3T_%s_run2_TAB.txt' % (task_path, subj_id, task)]
files += ['%s/LINKED_DATA/EPRIME/EVs/%s_Stats.csv' % (task_path, task)]
files += ['%s/LINKED_DATA/EPRIME/EVs/fear.txt' % (func_path, task)]
files += ['%s/LINKED_DATA/EPRIME/EVs/neut.txt' % (task_path)]
files += ['%s/LINKED_DATA/EPRIME/EVs/Sync.txt' % (task_path)]
files += ['%s/release-notes/tfMRI_%s_unproc.txt' % (subj_id, task)]
else:
func_path = '%s/MNINonLinear/Results' % subj_id
task_path = '%s/tfMRI_%s_LR' % (func_path, task)
files += ['%s/brainmask_fs.2.nii.gz' % (task_path)]
files += ['%s/%s_run2_TAB.txt' % (task_path, task)]
files += ['%s/Movement_Regressors_dt.txt' % (task_path)]
files += ['%s/Movement_Regressors.txt' % (task_path)]
files += ['%s/Movement_AbsoluteRMS.txt' % (task_path)]
files += ['%s/Movement_AbsoluteRMS_mean.txt' % (task_path)]
files += ['%s/Movement_RelativeRMS.txt' % (task_path)]
files += ['%s/Movement_RelativeRMS_mean.txt' % (task_path)]
files += ['%s/tfMRI_%s_LR_Atlas.dtseries.nii' % (task_path, task)]
files += ['%s/tfMRI_%s_LR_Jacobian.nii.gz' % (task_path, task)]
files += ['%s/tfMRI_%s_LR_SBRef.nii.gz' % (task_path, task)]
files += ['%s/tfMRI_%s_LR.nii.gz' % (task_path, task)]
files += ['%s/tfMRI_%s_LR_Physio_log.txt' % (task_path, task)]
files += ['%s/tfMRI_%s_LR_hp200_s4_level1.fsf' % (task_path, task)]
files += ['%s/RibbonVolumeToSurfaceMapping/goodvoxels.nii.gz' % (task_path)]
files += ['%s/EVs/%s_Stats.csv' % (task_path, task)]
files += ['%s/tfMRI_%s/tfMRI_%s_hp200_s4_level2.fsf' % (func_path, task, task)]
files += ['%s/release-notes/tfMRI_%s_preproc.txt' % (subj_id, task)]
# Task-specific-files:
if task == 'emotion':
files += ['%s/EVs/fear.txt' % (task_path)]
files += ['%s/EVs/neut.txt' % (task_path)]
files += ['%s/EVs/Sync.txt' % (task_path)]
return files
[docs] def fetch(self, n_subjects=1, data_types=None,
tasks=None, atlases=None, mnis=None, force=False, check=True, verbose=1,
properties=None, process=None):
"""
Parameters
----------
n_subjects : int
the number of subjects to fetch files from
data_types : list
the type of data to fetch,
can choose from anat, diff, func, or rest
tasks : list
the type of activity for functional data,
can choose from emotional, gambling, language, motor,
relational, social, and workingmemory
atlases : list
scope of surface data,
can choose from native or fsaverage
mnis : list
determines whether to use mninonlinear data or not,
can choose from true or false
properties : list
the chosen properties displayed in structural data files
can choose from myelinmap, curvature, thickness
process : list
whether or not the data is processed or not
can choose from True or False
"""
if data_types is None:
data_types = ['anat', 'diff', 'task', 'rest']
if tasks is None:
tasks = ['emotion', 'gambling', 'language', 'motor',
'relational', 'social', 'wm']
if atlases is None:
atlases = ['native', 'fsaverage']
if mnis is None:
mnis = [True, False]
if properties is None:
properties = ['myelinmap', 'curvature', 'thickness']
if process is None:
process = [True]
subj_ids = self.get_subject_list(n_subjects=n_subjects)
# Build a list of files to fetch
src_files = []
for subj_id in subj_ids[:n_subjects]:
for data_type in data_types:
if data_type == 'diff':
for pro in process:
src_files += self.get_diff_files(process=pro,
subj_id=subj_id)
if data_type == 'anat':
for pro in process:
for atlas in atlases:
for mni in mnis:
for prop in properties:
src_files += self.get_anat_files(process=pro,
subj_id=subj_id,
atlas=atlas,
mni=mni,
property=prop)
if data_type == 'rest':
for pro in process:
src_files += self.get_rest_files(process=pro,
subj_id=subj_id)
if data_type == 'task':
print process, tasks
for pro in process:
for task in tasks:
src_files += self.get_task_files(process=pro,
task=task,
subj_id=subj_id)
# Massage paths, based on fetcher type.
out_files = self.fetcher.fetch(self.prepend(src_files))
return out_files