Source code for ramutils.tasks.misc

from __future__ import unicode_literals

import os
import pandas as pd

try:
    from urllib.parse import urlparse
except ImportError:
    from urlparse import urlparse

from ptsa.data.readers import JsonIndexReader
from ramutils.io import store_results
from ramutils.utils import get_session_str

from ._wrapper import task
from ramutils.reports.summary import *
from ramutils.hmm import save_foresplot, save_traceplot
from ramutils.utils import is_stim_experiment as is_stim_experiment_core
from ramutils.utils import get_completed_sessions
from ramutils.utils import encode_file
from ramutils.utils import extract_experiment_series
from ramutils.log import get_logger

logger = get_logger()


__all__ = [
    'read_index',
    'is_stim_experiment',
    'save_all_output',
    'load_existing_results'
]


[docs]@task() def read_index(mount_point='/'): """Reads the JSON index reader. :param str mount_point: Root directory to search for. :returns: JsonIndexReader """ path = os.path.join(mount_point, 'protocols', 'r1.json') return JsonIndexReader(path)
@task(cache=False) def is_stim_experiment(experiment): is_stim = is_stim_experiment_core(experiment) return is_stim
[docs]@task(cache=False) def save_all_output(subject, experiment, session_summaries, math_summaries, classifier_evaluation_results, save_location, retrained_classifier=None, target_selection_table=None, behavioral_results=None, agg_report=False): """ Save all required output necessary to re-generate a report Parameters: ----------- subject: str Subject ID experiment: str Experiment name session_summaries: List List of SessionSummary derived objects math_summaries: List List of MathSummary objects classifier_evaluation_results: List List of ClassifierSummary objects save_location: str Destination for data to be saved. Typically in /data10/RAM/report_database/ on RHINO retrained_classifier: ClassifierContainer Serialized representation of the retrained classifier target_selection_table pd.DataFrame DataFrame representation of the target selection table, formerly known as the subsequent memory effect table behavioral_results: dict Keys are the behavioral effect model type (stim list, stim item, etc.) and values are the traces from estimating those models Returns ------- results_files: dict Dictionary whose keys are the names of statically-produced plots and values are encoded versions of those images. These are used to embed the static plots in the html reports during report generation Notes ----- All output files are of the format {subject}_{experiment}_{session}_{data_type}.{file_type} where data_type is a generic name for the type of data being saved. The following data types map to a summary object: * sessions_summary: :class:`ramutils.reports.summary.SessionSummary` * math_summary: :class:`ramutils.reports.summary.MathSummary` * classifier_[tag]: :class:`ramutils.reports.summary.ClassifierSummary` """ result_files = {} base_output_format = os.path.join(save_location, "{subject}_{experiment}_{session}_{" "data_type}.{file_type}") subject_specific_output = os.path.join(save_location, "{subject}_{data_type}.{file_type}") session_str = '_'.join([str(summary.session_number) for summary in session_summaries]) # Agg reports could have hundreds of sessions, so do not save them # as part of the file name if agg_report: session_str = "" if (target_selection_table is not None) and \ (len(target_selection_table) > 0): target_selection_table.to_csv( base_output_format.format(subject=subject, experiment=experiment, session=session_str, data_type='target_selection_table', file_type='csv')) for session_summary in session_summaries: session = session_summary.session_number store_results(session_summary, base_output_format.format( subject=subject, experiment=experiment, session=session, data_type='session_summary', file_type='h5')) for math_summary in math_summaries: session = math_summary.session_number store_results(math_summary, base_output_format.format( subject=subject, experiment=experiment, session=session, data_type='math_summary', file_type='h5')) for classifier_summary in classifier_evaluation_results: sessions = classifier_summary.sessions session_str = get_session_str(sessions) store_results(classifier_summary, base_output_format.format( subject=subject, experiment=experiment, session=session_str, data_type='classifier_' + classifier_summary.tag, file_type='h5')) if retrained_classifier is not None: # The retrained classifier is the same regardless of session/experiment # so just save one per subject retrained_classifier.save(subject_specific_output.format( subject=subject, data_type="retrained_classifier", file_type="zip"), overwrite=True) # Save plots from hmm models and return file paths in a dict if behavioral_results is not None: for name, trace in behavioral_results.items(): forestplot_path = base_output_format.format(subject=subject, experiment=experiment, session=session_str, data_type=(name + '_foresplot'), file_type='png') save_foresplot(trace, forestplot_path) traceplot_path = base_output_format.format(subject=subject, experiment=experiment, session=session_str, data_type=(name + '_traceplot'), file_type='png') save_traceplot(trace, traceplot_path) with open(forestplot_path, 'rb') as f: encoded_image = encode_file(f) result_files[name] = encoded_image return result_files
[docs]@task(cache=False) def load_existing_results(subject, experiment, sessions, stim_report, db_loc, joint_report, rootdir='/'): """ Load previously-saved data creating during report generation Parameters: ----------- subject: str Subject ID experiment: str Experiment ID sessions: list or None If none, then sessions are looked up from r1.json for the given subject and experiment. stim_report: bool Indicator for if the requested data is associated with a stim report db_loc: str Report database location relative to rootdir. db_loc will be appended to rootdir to find the full absolute path. If both db_loc and rootdir are absolute paths, it will be assumed that db_loc contains the root directory. rootdir: str RHINO mount point or root directory Returns: -------- saved_results: dict Mirrors the input to save_all_output """ saved_results = { 'target_selection_table': None, 'classifier_evaluation_results': None, 'session_summaries': None, 'math_summaries': None, 'hmm_results': None } # Repetition ratio dictionary optional # Cases: PS, stim, non-stim subject_experiment = "_".join([subject, experiment]) base_output_format = os.path.join(rootdir, db_loc, subject_experiment + "_{session}_{data_type}.{file_type}") if sessions is None: if joint_report and 'FR' in experiment: series_num = extract_experiment_series(experiment) fr_sessions = get_completed_sessions(subject, 'FR'+series_num, rootdir) catfr_sessions = get_completed_sessions(subject, 'catFR'+series_num, rootdir) catfr_sessions = set(str(100 + int(s)) for s in catfr_sessions) sessions = fr_sessions | catfr_sessions else: sessions = get_completed_sessions(subject, experiment, rootdir=rootdir) session_str = get_session_str(sessions) target_selection_table = None hmm_results = {} try: if stim_report is False: target_selection_table = pd.read_csv( base_output_format.format(session=session_str, data_type='target_selection_table', file_type='csv')) saved_results['target_selection_table'] = target_selection_table encoding_classifier_summary = ClassifierSummary.from_hdf( base_output_format.format(session=session_str, data_type='classifier_Encoding', file_type='h5')) joint_classifier_summary = ClassifierSummary.from_hdf( base_output_format.format(session=session_str, data_type='classifier_Joint', file_type='h5')) classifier_evaluation_results = [encoding_classifier_summary, joint_classifier_summary] saved_results['classifier_evaluation_results'] = classifier_evaluation_results session_summaries, math_summaries = [], [] for session in sessions: math_summary = MathSummary.from_hdf( base_output_format.format(session=str(session), data_type='math_summary', file_type='h5') ) math_summaries.append(math_summary) if (experiment == 'catFR1') or (int(session) >= 100): summary = CatFRSessionSummary elif experiment == 'FR1': summary = FRSessionSummary session_summary = summary.from_hdf( base_output_format.format(session=str(session), data_type='session_summary', file_type='h5')) session_summaries.append(session_summary) saved_results['session_summaries'] = session_summaries saved_results['math_summaries'] = math_summaries elif stim_report and 'PS' not in experiment: classifier_evaluation_results, math_summaries, session_summaries = [], [], [] for session in sessions: classifier_summary = ClassifierSummary.from_hdf( base_output_format.format(session=str(session), data_type='classifier_session_' + str(session), file_type='h5')) classifier_evaluation_results.append(classifier_summary) math_summary = MathSummary.from_hdf( base_output_format.format(session=str(session), data_type='math_summary', file_type='h5')) math_summaries.append(math_summary) if 'TICL' in experiment: klass = TICLFRSessionSummary else: klass = FRStimSessionSummary session_summary = klass.from_hdf( base_output_format.format(session=str(session), data_type='session_summary', file_type='h5')) session_summaries.append(session_summary) # Check if behavioral model results are saved if 'FR5' in experiment: for name in ['list', 'stim_item', 'post_stim_item']: forestplot_path = base_output_format.format( subject=subject, experiment=experiment, session=str(session), data_type=(name + '_foresplot'), file_type='png') assert os.path.exists(forestplot_path) # Encode the image and pass along that data with open(forestplot_path, 'rb') as f: encoded_image = encode_file(f) hmm_results[name] = encoded_image saved_results['hmm_results'] = hmm_results saved_results['session_summaries'] = session_summaries saved_results['math_summaries'] = math_summaries saved_results['classifier_evaluation_results'] = classifier_evaluation_results else: return saved_results except (IOError, OSError, AssertionError): logger.warning('Not all underlying data could be found for the ' 'requested report, building from scratch instead.') return saved_results return saved_results