Source code for ramutils.classifier.utils

""" Utility functions used during classifier training """

import os
import numpy as np

from glob import glob
from sklearn.linear_model.logistic import LogisticRegression

from classiflib.container import ClassifierContainer
from ramutils.exc import UnableToReloadClassifierException


[docs]def reload_classifier(subject, task, session, mount_point='/', base_path=None):
    """Loads the actual classifier used by Ramulator for a particular session

    Parameters
    ----------
    subject: str
        Subject ID
    task: str
        ex: FR5, FR6, PAL1, etc
    session: int
        Session number
    mount_point: str, default '/'
        Mount point for RHINO
    base_path: str
        Location of where the classifier files can be found. If None, default
        is to look in the expected location on RHINO

    Returns
    -------
    classifier_container: classiflib.container.ClassifierContainer

    """
    if base_path is None:
        base_path = os.path.join(mount_point, 'data', 'eeg', subject,
                                 'behavioral', task,
                                 'session_{}'.format(str(session)),
                                 'host_pc')

    config_paths = glob(os.path.join(base_path,'*', 'config_files'))
    if len(config_paths) < 1:
        # expected host_pc folder does not exist
        return None

    if len(config_paths) > 1:
        # Return the original classifier
        retrained_classifier_paths = glob(os.path.join(
            base_path, '*', 'config_files', 'retrained_classifier'))
        if len(retrained_classifier_paths):
            config_path = sorted(retrained_classifier_paths)[-1]
        else:
            config_path = sorted(config_paths)[-1]
        classifier_path = glob(os.path.join(config_path,
                                            '*classifier*.zip'))
        # No container was found, likely because it is the old .pkl version
        if len(classifier_path) == 0:
            return None
        classifier_path = classifier_path[0]
        orig_classifier_container = ClassifierContainer.load(classifier_path)
        return orig_classifier_container



    # FIXME: this needs a data quality check to confirm that all classifiers in
    # a session are the same!
    # We take the final timestamped directory because in principle retrained
    # classifiers can be different depending on artifact detection. In
    # reality, stim sessions should never be restarted (apart from issues
    # getting things started in the first place).
    config_path = config_paths[-1]
    if 'retrained_classifier' in os.listdir(config_path):
        classifier_path = glob(os.path.join(config_path,
                                            'retrained_classifier',
                                            '*classifier*.zip'))[0]
    else:
        classifier_path = glob(os.path.join(config_path,
                                            '*classifier*.zip'))
        if len(classifier_path) == 0:
            return None
        classifier_path = classifier_path[0]
    classifier_container = ClassifierContainer.load(classifier_path)

    return classifier_container


[docs]def train_classifier(pow_mat, events, sample_weights, penalty_param,
                     penalty_type, solver):
    """Train a classifier.

    Parameters
    ----------
    pow_mat : np.ndarray
    events : np.recarray
    sample_weights : np.ndarray
    penalty_param: Float
        Penalty parameter to use
    penalty_type: str
        Type of penalty to use for regularized model (ex: L2)
    solver: str
        Solver to use when fitting the model (ex: liblinear)

    Returns
    -------
    classifier : LogisticRegression
        Trained classifier

    """
    recalls = events.recalled
    classifier = LogisticRegression(C=penalty_param,
                                    penalty=penalty_type,
                                    solver=solver,
                                    class_weight='balanced')
    classifier.fit(pow_mat, recalls, sample_weights)
    return classifier


# TODO: group and average classifier weights by brain region
[docs]def save_classifier_weights_plot(weights, frequencies, pairs, file_):
    """
    Visualize the classifier weights as a function of frequency and location.

    Parameters
    -----------
    weights : np.ndarray (len(pairs)*len(frequencies)
    frequencies : np.ndarray[float]
    pairs : ??? Iterable describing the pairs in some way
    file_ : The destination of the classifier weight plot,
    which should be either a path or a file-like object.

    Returns:
    file_ : string or file-like
        The file_ parameter
    """
    # pyplot is imported here rather than at the top of the module to avoid any
    # problems setting the matplotlib backend elsewhere.
    from matplotlib import pyplot as plt

    weights_by_channel = weights.reshape((len(frequencies), len(pairs)))
    plt.imshow(weights_by_channel, aspect='auto', origin='lower', cmap='RdBu')
    cmin, cmax = weights.min(), weights.max()
    clim = max(abs(cmin), abs(cmax))
    plt.clim(-clim, clim)
    plt.colorbar()
    locs, old_labels = plt.yticks()
    new_labels = ['%d' % (np.rint(f).astype(int)) for f in frequencies]
    plt.yticks(locs[1:-1], new_labels)
    plt.savefig(file_,
                format="png",
                dpi=300,
                bbox_inches="tight",
                pad_inches=0.1)
    plt.close()
    return file_