{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Classification from scratch" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# Set CML_ROOT to root data mount point\n", "os.environ[\"CML_ROOT\"] = \"~/mnt/rhino\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from copy import deepcopy\n", "from functools import partial\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from scipy.stats import zscore\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import auc, roc_auc_score, roc_curve\n", "from sklearn.model_selection import train_test_split\n", "\n", "from cmlreaders import CMLReader, get_data_index\n", "from ptsa.data.filters import ButterworthFilter, MorletWaveletFilter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We start by loading the data index of all RAM subjects:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = get_data_index(\"r1\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To train a classifier, we want to analyze record-only experiments. Here, we'll just consider FR1. Let's choose subject R1111M and see how many FR1 sessions they completed:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | subject | \n", "experiment | \n", "session | \n", "
---|---|---|---|
616 | \n", "R1111M | \n", "FR1 | \n", "0 | \n", "
617 | \n", "R1111M | \n", "FR1 | \n", "1 | \n", "
618 | \n", "R1111M | \n", "FR1 | \n", "2 | \n", "
619 | \n", "R1111M | \n", "FR1 | \n", "3 | \n", "