{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76483d81",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "\n",
    "parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))\n",
    "if parent_dir not in sys.path:\n",
    "    sys.path.append(parent_dir)\n",
    "\n",
    "import os \n",
    "import torch\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch.nn.utils.rnn import pad_sequence\n",
    "import pickle\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import tqdm\n",
    "import torch.nn as nn\n",
    "import pytorch_lightning as pl\n",
    "import torch.nn.functional as F\n",
    "from transformers import BertTokenizer, BertModel\n",
    "from os.path import join as opj\n",
    "from himalaya.ridge import RidgeCV\n",
    "from himalaya.backend import set_backend\n",
    "from config import DATASET_FULL_TRIALS_ZSCORE\n",
    "from dataset import getDatasetLoaders_V3\n",
    "from encoding_utils import plot_channels_grid_fdr\n",
    "from transformers import WhisperForConditionalGeneration, WhisperTokenizer, AutoProcessor\n",
    "import torchaudio\n",
    "set_backend(\"torch_cuda\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a9274e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "device  = \"cuda:1\"\n",
    "ROI = \"sm\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d53c5e7f",
   "metadata": {},
   "source": [
    "## Load the data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbc17fd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# #load the data\n",
    "\n",
    "train_loader, test_loader, _, loadedData = getDatasetLoaders_V3(DATASET_FULL_TRIALS_ZSCORE, 128, include_prego=True, roi=ROI)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5fabd32",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = train_loader.dataset\n",
    "test_dataset = test_loader.dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7cba56ed",
   "metadata": {},
   "source": [
    "## Create time-windows of data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db636a4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def create_fixed_time_windows(\n",
    "    neural_feats,\n",
    "    go_onsets,\n",
    "    n_before=50,\n",
    "    n_after=150,\n",
    "    window_size=4\n",
    "):\n",
    "    \"\"\"\n",
    "    For each trial, create 50 windows before go_onset and 150 windows after.\n",
    "    Each window is an average of 4 consecutive samples in the time dimension.\n",
    "    \n",
    "    Out-of-bounds samples are effectively zero-padded.\n",
    "    \n",
    "    Arguments:\n",
    "    ----------\n",
    "    neural_feats : list (or array) of length N\n",
    "        Each entry is a 2D array of shape (T_i, D).\n",
    "        T_i can vary, D is number of features/channels.\n",
    "    go_onsets : array-like of length N\n",
    "        The go_onset time index for each trial i.\n",
    "    n_before : int\n",
    "        Number of windows before the go onset.\n",
    "    n_after : int\n",
    "        Number of windows after the go onset.\n",
    "    window_size : int\n",
    "        Number of samples in each window to average.\n",
    "        \n",
    "    Returns:\n",
    "    --------\n",
    "    windowed_array : torch.Tensor of shape (N, n_before + n_after, D)\n",
    "        For each trial i, a (200, D) array (50 + 150 = 200 windows),\n",
    "        where each row is the average of 4 samples in that window.\n",
    "    \"\"\"\n",
    "    # Number of total windows\n",
    "    n_windows = n_before + n_after\n",
    "    N = len(neural_feats)\n",
    "    \n",
    "    # Determine feature dimensionality from the first trial\n",
    "    # (assuming they all have the same #channels, D)\n",
    "    example_feat = neural_feats[0]\n",
    "    _, D = example_feat.shape\n",
    "    \n",
    "    # Prepare output: (N, n_windows, D)\n",
    "    windowed_array = np.zeros((N, n_windows, D), dtype=np.float32)\n",
    "    \n",
    "    for i in tqdm.trange(N):\n",
    "        feat = neural_feats[i]        # shape (T_i, D)\n",
    "        T_i = feat.shape[0]\n",
    "        onset = go_onsets[i]         # an integer time index\n",
    "        \n",
    "        for w in range(n_windows):\n",
    "            # Where does this window start and end (in the time dimension)?\n",
    "            window_start = onset - (n_before * window_size) + (w * window_size)\n",
    "            window_end   = window_start + window_size  # exclusive\n",
    "            \n",
    "            # Clip to valid bounds [0, T_i]\n",
    "            # We'll gather the portion of data that is within the trial\n",
    "            valid_start = max(0, window_start)\n",
    "            valid_end   = min(T_i, window_end)\n",
    "\n",
    "            # If valid_start < valid_end, there's at least one valid timepoint\n",
    "            if valid_end > valid_start:\n",
    "                chunk = feat[valid_start:valid_end]  # shape (some_count, D)\n",
    "                # Average along the time axis\n",
    "                mean_chunk = chunk.mean(axis=0)  # shape (D,)\n",
    "            else:\n",
    "                # Entire window is out-of-bounds\n",
    "                # So we just keep zeros\n",
    "                mean_chunk = np.zeros(D, dtype=np.float32)\n",
    "            \n",
    "            windowed_array[i, w, :] = mean_chunk\n",
    "\n",
    "    return torch.from_numpy(windowed_array)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "811ecb7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For training set:\n",
    "padded_neural_train = create_fixed_time_windows(\n",
    "    dataset.neural_feats,\n",
    "    dataset.go_onset,\n",
    "    n_before=50,\n",
    "    n_after=150,\n",
    "    window_size=4\n",
    ")\n",
    "print(\"padded_neural_train shape:\", padded_neural_train.shape)\n",
    "# -> (N_train, 200, D)\n",
    "\n",
    "# For testing set:\n",
    "padded_neural_test = create_fixed_time_windows(\n",
    "    test_dataset.neural_feats,\n",
    "    test_dataset.go_onset,\n",
    "    n_before=50,\n",
    "    n_after=150,\n",
    "    window_size=4\n",
    ")\n",
    "print(\"padded_neural_test shape:\", padded_neural_test.shape)\n",
    "# -> (N_test, 200, D)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9851a73c",
   "metadata": {},
   "source": [
    "## Extract speech embeddings\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57dc4659",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "train_audio = dataset.audio_files[:8800]\n",
    "test_audio = test_dataset.audio_files[:880]\n",
    "\n",
    "train_sentences = dataset.sentences\n",
    "\n",
    "len(train_audio), len(test_audio)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bdc3fae",
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import Audio\n",
    "import ipywidgets as widgets\n",
    "\n",
    "idx = -1\n",
    "print(train_sentences[idx])\n",
    "Audio(train_audio[idx])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f88d7b8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "whisper_model = WhisperForConditionalGeneration.from_pretrained(\"openai/whisper-small\")\n",
    "tokenizer = WhisperTokenizer.from_pretrained(\"openai/whisper-small\")\n",
    "processor = AutoProcessor.from_pretrained(\"openai/whisper-small\")\n",
    "\n",
    "whisper_model.to(device)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc4abbb6",
   "metadata": {},
   "outputs": [],
   "source": [
    "## load all the audio files and proces\n",
    "whisper_encoder = whisper_model.get_encoder()\n",
    "resample = torchaudio.transforms.Resample(24000, 16000)\n",
    "train_hidden_states = []\n",
    "test_hidden_states = []\n",
    "\n",
    "BATCH_SIZE = 64\n",
    "\n",
    "\n",
    "# Batch processing\n",
    "for i in tqdm.tqdm(range(0, len(train_audio), BATCH_SIZE)):\n",
    "    batch_audio_files = train_audio[i:i + BATCH_SIZE]\n",
    "    audio_tensors = []\n",
    "\n",
    "    # Load and preprocess each audio\n",
    "    for audio_file in batch_audio_files:\n",
    "        audio_waveform, _ = torchaudio.load(audio_file)\n",
    "        audio_resampled = resample(audio_waveform[0])  # mono\n",
    "        audio_tensors.append(audio_resampled)\n",
    "        \n",
    "    audio_np = [x.cpu().numpy() for x in audio_tensors]  # Convert each tensor to 1D NumPy array\n",
    "\n",
    "\n",
    "    # Process with Whisper\n",
    "    input_features = processor(audio_np, sampling_rate=16000, return_tensors=\"pt\").input_features.to(device)\n",
    "    with torch.no_grad():\n",
    "        encoder_outputs = whisper_encoder(input_features)\n",
    "\n",
    "    # Store hidden states (move to CPU and numpy)\n",
    "    batch_hidden = encoder_outputs.last_hidden_state.detach().cpu().numpy()\n",
    "    train_hidden_states.extend(batch_hidden)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41cd6051",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Batch processing\n",
    "for i in tqdm.tqdm(range(0, len(test_audio), BATCH_SIZE)):\n",
    "    batch_audio_files = test_audio[i:i + BATCH_SIZE]\n",
    "    audio_tensors = []\n",
    "\n",
    "    # Load and preprocess each audio\n",
    "    for audio_file in batch_audio_files:\n",
    "        audio_waveform, _ = torchaudio.load(audio_file)\n",
    "        audio_resampled = resample(audio_waveform[0])  # mono\n",
    "        audio_tensors.append(audio_resampled)\n",
    "        \n",
    "    audio_np = [x.cpu().numpy() for x in audio_tensors]  # Convert each tensor to 1D NumPy array\n",
    "\n",
    "\n",
    "    # Process with Whisper\n",
    "    input_features = processor(audio_np, sampling_rate=16000, return_tensors=\"pt\").input_features.to(device)\n",
    "    with torch.no_grad():\n",
    "        encoder_outputs = whisper_encoder(input_features)\n",
    "\n",
    "    # Store hidden states (move to CPU and numpy)\n",
    "    batch_hidden = encoder_outputs.last_hidden_state.detach().cpu().numpy()\n",
    "    test_hidden_states.extend(batch_hidden)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02de6779",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch_train_hidden_states = [torch.tensor(hidden_states) for hidden_states in train_hidden_states]\n",
    "torch_test_hidden_states = [torch.tensor(hidden_states) for hidden_states in test_hidden_states]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3063c42",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_embeddings = torch.stack(torch_train_hidden_states)[:,::10].mean(1)\n",
    "test_embeddings = torch.stack(torch_test_hidden_states)[:,::10].mean(1)\n",
    "\n",
    "train_embeddings.shape, test_embeddings.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bcbc80b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_output_dir = \"encoding_speech\"\n",
    "os.makedirs(base_output_dir,exist_ok=True)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b13c3312",
   "metadata": {},
   "source": [
    "## Train the encoding model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c57cbbd",
   "metadata": {},
   "outputs": [],
   "source": [
    "TRAIN_SM_ENCODING = True\n",
    "\n",
    "\n",
    "if TRAIN_SM_ENCODING:\n",
    "\n",
    "    time_windows_sm_models=[]\n",
    "    time_windows_sm_corrs = []\n",
    "\n",
    "    for time_window in tqdm.trange(200):\n",
    "        encoding = RidgeCV(alphas = [1,10,1e2,1e3], ).fit(train_embeddings, padded_neural_train[:, time_window, :])\n",
    "        time_windows_sm_models.append(encoding)\n",
    "        pred = encoding.predict(test_embeddings)\n",
    "\n",
    "            ## measure channel-wise correlation\n",
    "        corrs = np.zeros(256)\n",
    "        for i in range(256):\n",
    "            corrs[i] = np.corrcoef(pred[:, i], padded_neural_test[:, time_window,i])[0, 1]\n",
    "        time_windows_sm_corrs.append(corrs)\n",
    "\n",
    "    time_windows_SM_corrs_array = np.array(time_windows_sm_corrs)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7f1d4b77",
   "metadata": {},
   "outputs": [],
   "source": [
    "## for each time-window compute a null distribution\n",
    "if TRAIN_SM_ENCODING:\n",
    "    time_windows_SM_null_dist=[]\n",
    "    N = 100\n",
    "\n",
    "    for time_window in tqdm.trange(200):\n",
    "        null_dist = []\n",
    "\n",
    "        for i in range(N):\n",
    "            null_encoding = RidgeCV(alphas = [1,10,1e2,1e3], ).fit(train_embeddings, np.random.permutation(padded_neural_train[:, time_window, :]))\n",
    "            null_pred = null_encoding.predict(test_embeddings)\n",
    "\n",
    "            ## measure channel-wise correlation\n",
    "            null_corrs = np.zeros(256)\n",
    "            for i in range(256):\n",
    "                null_corrs[i] = np.corrcoef(null_pred[:, i], padded_neural_test[:, time_window,i])[0, 1]\n",
    "\n",
    "            null_dist.append(null_corrs)\n",
    "        null_dist = np.array(null_dist)\n",
    "        time_windows_SM_null_dist.append(null_dist)\n",
    "    time_windows_SM_null_dist_array = np.array(time_windows_SM_null_dist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "add6e6c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "## TO DO WORK HERE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4af5e3c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "## save all the models, the time_windows_null_dist and the time_windows_corrs\n",
    "import pickle\n",
    "\n",
    "if TRAIN_SM_ENCODING:\n",
    "\n",
    "    with open(opj(base_output_dir,\"time_windows_SM_models.pkl\"), \"wb\") as f:\n",
    "        pickle.dump(time_windows_sm_models, f)\n",
    "    with open(opj(base_output_dir,\"time_windows_SM_corrs.pkl\"), \"wb\") as f:\n",
    "        pickle.dump(time_windows_SM_corrs_array, f)\n",
    "\n",
    "    with open(opj(base_output_dir,\"time_windows_SM_null_dist.pkl\"), \"wb\") as f:\n",
    "        pickle.dump(time_windows_SM_null_dist, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b3b0b17",
   "metadata": {},
   "outputs": [],
   "source": [
    "if not TRAIN_SM_ENCODING:\n",
    "\n",
    "    time_windows_sm_models= pickle.load(open(opj(base_output_dir,\"time_windows_SM_models.pkl\"), \"rb\"))\n",
    "    time_windows_SM_corrs_array = pickle.load(open(opj(base_output_dir,\"time_windows_SM_corrs.pkl\"), \"rb\"))\n",
    "    time_windows_SM_null_dist = pickle.load(open(opj(base_output_dir,\"time_windows_SM_null_dist.pkl\"), \"rb\"))\n",
    "    time_windows_SM_null_dist_array = np.array(time_windows_SM_null_dist)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c505c06",
   "metadata": {},
   "source": [
    "### Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4447b185",
   "metadata": {},
   "outputs": [],
   "source": [
    "channel_idx = 130\n",
    "\n",
    "plt.plot(time_windows_SM_corrs_array[:, channel_idx],  color=\"tab:orange\", label=\"encoding\")\n",
    "plt.ylim(-0.3,0.3)\n",
    "plt.plot(time_windows_SM_null_dist_array[:,:,channel_idx].mean(-1), color=\"tab:blue\",label=\"null distribution\")\n",
    "\n",
    "#fill between the null distribution with std\n",
    "plt.fill_between(range(200),\n",
    "                  time_windows_SM_null_dist_array[:,:,channel_idx].mean(-1) - time_windows_SM_null_dist_array[:,:,240].std(-1), \n",
    "                  time_windows_SM_null_dist_array[:,:,channel_idx].mean(-1) + time_windows_SM_null_dist_array[:,:,240].std(-1),\n",
    "                  alpha=0.2, color=\"tab:blue\")\n",
    "\n",
    "plt.axvline(x=50, color='r', linestyle='--')\n",
    "plt.title(f\"Channel {channel_idx} correlation with the target\")\n",
    "plt.xlabel(\"Time window\")\n",
    "plt.ylabel(\"Correlation\")\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "077153a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"check\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14065ffa",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(16):\n",
    "    plot_channels_grid_fdr(channels_range=range(i*16,(i+1)*16), figure_title=f\"Encoding - SM Channels {i*16} to {(i+1)*16} with FDR\", alpha_level=0.05,\n",
    "                           time_windows_corrs_array=time_windows_SM_corrs_array,\n",
    "                           time_windows_null_dist_array=time_windows_SM_null_dist_array, prefix_title = \"Speech_SM\",\n",
    "                           out_folder=base_output_dir)\n",
    "\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "evo",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
