{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Classification from scratch" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# Set CML_ROOT to root data mount point\n", "os.environ[\"CML_ROOT\"] = \"~/mnt/rhino\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from copy import deepcopy\n", "from functools import partial\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from scipy.stats import zscore\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import auc, roc_auc_score, roc_curve\n", "from sklearn.model_selection import train_test_split\n", "\n", "from cmlreaders import CMLReader, get_data_index\n", "from ptsa.data.filters import ButterworthFilter, MorletWaveletFilter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We start by loading the data index of all RAM subjects:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = get_data_index(\"r1\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To train a classifier, we want to analyze record-only experiments. Here, we'll just consider FR1. Let's choose subject R1111M and see how many FR1 sessions they completed:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | subject\n", " | experiment\n", " | session\n", " | 
|---|---|---|---|
| 616\n", " | R1111M\n", " | FR1\n", " | 0\n", " | 
| 617\n", " | R1111M\n", " | FR1\n", " | 1\n", " | 
| 618\n", " | R1111M\n", " | FR1\n", " | 2\n", " | 
| 619\n", " | R1111M\n", " | FR1\n", " | 3\n", " |