{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7f08508-3893-4c6b-80bd-348250ac0762",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "path_to_data = 'data/dorschky2024'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "786292c0-3cb2-4201-bdfb-8158299207a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "import os\n",
    "import subprocess\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "os.chdir('..')\n",
    "\n",
    "from matplotlib.animation import FuncAnimation\n",
    "from IPython.display import HTML\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "device = 'cpu'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c91bca3",
   "metadata": {},
   "source": [
    "# Functions to find the validation data from dorschky's dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2b98c76",
   "metadata": {},
   "source": [
    "# Build the necessary dataframes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59f3d5da",
   "metadata": {},
   "source": [
    "## IMU Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b7d655d-af81-4bc5-9316-4a074ec8ec7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.signal\n",
    "imu_dataframe = pd.DataFrame(columns = ['subject','trial','data','triggertime','trigger_no','trigger_idx','omc_start' ,'omc_end']) \n",
    "signal_keys = ['PELVIS','FEMUR_R','TIBIA_R','FOOT_R','FEMUR_L','TIBIA_L','FOOT_L']\n",
    "signal_keys = [f'{key}_{signal}' for key in signal_keys for signal in ['ACC_X','ACC_Y','GYRO_Z']]\n",
    "for sub in range(1,11):\n",
    "    df1 = pd.read_parquet(f'{path_to_data}/P{str(sub).zfill(2)}_IMU.parquet')\n",
    "    df = pd.read_parquet(f'{path_to_data}/P{str(sub).zfill(2)}_OMC.parquet')\n",
    "    \n",
    "\n",
    "    assert len(df1[df1.TRIGGER >= 7]) == len(df[df.TRIGGER >= 7])\n",
    "    len_0 = len(df[df.TRIGGER >= 7])\n",
    "\n",
    "    dfnew = pd.DataFrame()\n",
    "    for signal in signal_keys:\n",
    "        dfnew[signal] = scipy.signal.decimate(df1[signal], 10)\n",
    "    for i in range(len_0):\n",
    "        assert df1[df1.TRIGGER >= 7].iloc[i].TRIGGER == df[df.TRIGGER >= 7].iloc[i].TRIGGER\n",
    "        row_imu = df1[df1.TRIGGER >= 7].iloc[i]\n",
    "        row_omc = df[df.TRIGGER >= 7].iloc[i]\n",
    "        t_0 = 0\n",
    "        t_trigger = row_omc.TIME\n",
    "        t_trigger_total = row_imu.name\n",
    "        idx_t_end = (df[df.TIME==0].index-1)[(i+1)%len_0]\n",
    "        t_end = df.iloc[idx_t_end].TIME\n",
    "\n",
    "        t_0_total = t_trigger_total - int(t_trigger*1000)\n",
    "        t_end_total = t_0_total + int(t_end*1000)\n",
    "        \n",
    "        t_0_10 = int(t_0_total/10) - 100 # 1s time before trigger\n",
    "        t_end_10 = int(t_end_total/10)\n",
    "        t_trigger_10 = int(t_trigger_total/10) + 100 # 1s time after trigger\n",
    "        trial_data = np.zeros((len(signal_keys), t_end_10-t_0_10))\n",
    "        for idx, signal in enumerate(signal_keys):\n",
    "            trial_data[idx] = dfnew[signal][t_0_10:t_end_10]\n",
    "        \n",
    "        triggertime = t_trigger_10 - t_0_10\n",
    "        trigger_hist = df1.iloc[:t_end_10*10][df1.iloc[:t_end_10*10].TRIGGER > 0]\n",
    "        hastrigger = df1.iloc[:t_end_10*10][df1.iloc[:t_end_10*10].TRIGGER > 0].TRIGGER.max()\n",
    "        trigger_no = len(trigger_hist[trigger_hist.TRIGGER == hastrigger])\n",
    "        imu_dataframe.loc[len(imu_dataframe)] = [sub, i, torch.from_numpy(trial_data), triggertime, trigger_no, hastrigger, 100, -100]\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b91d6f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(imu_dataframe)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed55d4e3",
   "metadata": {},
   "source": [
    "## Save "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61320165-e93a-45fe-8950-c18af4dc3de7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pickle the imu dataframe\n",
    "import pickle\n",
    "with open('data/dorschky2024/dorschky_val_sequences.pkl','wb') as output:\n",
    "    pickle.dump(imu_dataframe, output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "252c26e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "list(df.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c4cd8ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "imu_dataframe.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9449452",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pinn",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
