{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# IEMOCAP\n",
    "\n",
    "#### * Identifier:\n",
    "USC_IEMOCAP\n",
    "\t\n",
    "#### * Modalities:\n",
    "Audio, video, Motion Capture\n",
    "\t\n",
    "#### * Emotional content:\n",
    "angry, happy, sad, neutral, frustrated, excited, fearful, disgusted, excited, other\t\n",
    "\n",
    "#### * Emotion elicitation methods:\n",
    "scripts and improvisation\t\n",
    "\n",
    "#### * Size:\n",
    "10 subjects, ~12 hours of audiovisual data\n",
    "\t\n",
    "#### * Nature of material:\n",
    "dyadic interactions\t\n",
    "\n",
    "#### * Language:\n",
    "English\n",
    "\n",
    "#### * Reference:\n",
    "Busso, Carlos et al.  \"IEMOCAP: Interactive emotional dyadic motion capture database.\"  Journal of Language Resources and Evaluation.  Volume 42, Number 4.  pp. 335-359.  2008"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Label  \n",
    "% [START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]\n",
    "\n",
    "[6.2901 - 8.2357]\tSes01F_impro01_F000\tneu\t[2.5000, 2.5000, 2.5000]  \n",
    "C-E2:\tNeutral;\t()  \n",
    "C-E3:\tNeutral;\t()  \n",
    "C-E4:\tNeutral;\t()  \n",
    "C-F1:\tNeutral;\t(curious)  \n",
    "A-E3:\tval 3; act 2; dom  2;\t()  \n",
    "A-E4:\tval 2; act 3; dom  3;\t(mildly aggravated but staying polite, attitude)  \n",
    "A-F1:\tval 3; act 2; dom  1;\t()  \n",
    "  \n",
    "[10.0100 - 11.3925]\tSes01F_impro01_F001\tneu\t[2.5000, 2.5000, 2.5000]  \n",
    "C-E2:\tNeutral;\t()  \n",
    "C-E3:\tNeutral;\t()  \n",
    "C-E4:\tNeutral;\t()  \n",
    "C-F1:\tNeutral; Anger;\t()  \n",
    "A-E3:\tval 3; act 2; dom  2;\t()  \n",
    "A-E4:\tval 2; act 3; dom  3;\t(guarded, tense, ready)  \n",
    "A-F1:\tval 2; act 3; dom  2;\t()  \n",
    "  \n",
    "[14.8872 - 18.0175]\tSes01F_impro01_F002\tneu\t[2.5000, 2.5000, 2.5000]  \n",
    "C-E2:\tNeutral;\t()  \n",
    "C-E3:\tSurprise;\t()  \n",
    "C-E4:\tNeutral;\t()  \n",
    "C-F1:\tNeutral; Anger;\t()  \n",
    "A-E3:\tval 3; act 2; dom  2;\t()  \n",
    "A-E4:\tval 2; act 3; dom  3;\t(superior, indifferent, menacing)  \n",
    "A-F1:\tval 3; act 3; dom  3;\t()  \n",
    "  \n",
    "[19.2900 - 20.7875]\tSes01F_impro01_F003\txxx\t[2.5000, 3.0000, 3.0000]  \n",
    "C-E2:\tNeutral;\t()  \n",
    "C-E3:\tFrustration;\t()  \n",
    "C-E4:\tAnger;\t()  \n",
    "C-F1:\tFrustration; Anger;\t()  \n",
    "A-E3:\tval 3; act 3; dom  3;\t()  \n",
    "A-E4:\tval 2; act 3; dom  3;\t(accusing)  \n",
    "A-F1:\tval 2; act 3; dom  3;\t()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import os, re\n",
    "import librosa\n",
    "from tqdm.notebook import tqdm\n",
    "import numpy as np\n",
    "import torch\n",
    "from utils import *\n",
    "import random\n",
    "random.seed(0)\n",
    "\n",
    "data_path = \"./Dataset/IEMOCAP\"\n",
    "\n",
    "if not os.path.exists(f\"{data_path}/melspectrogram\"):\n",
    "    os.mkdir(f\"{data_path}/melspectrogram\")\n",
    "    os.mkdir(f\"{data_path}/melspectrogram/train\")\n",
    "    os.mkdir(f\"{data_path}/melspectrogram/validation\")\n",
    "    os.mkdir(f\"{data_path}/melspectrogram/test\")\n",
    "\n",
    "Sessions = [1,2,3,4,5]\n",
    "labels = ['ang', 'hap', 'sad', 'neu', 'fru', 'exc', 'sur', 'fea', 'dis', 'oth', 'xxx']\n",
    "label2id = dict([ (l,i) for i, l in enumerate(labels)])\n",
    "label_count = [0, 0, 0, 0, 0, 0, 0]\n",
    "data_list=[]\n",
    "for s in Sessions:\n",
    "    root_dir = f'./Dataset/IEMOCAP/Session{s}/dialog/EmoEvaluation'\n",
    "    dialogues = sorted([f for f in os.listdir(root_dir) if (f.endswith('.txt')) and not f.startswith('.')])\n",
    "    for dialogue in dialogues:\n",
    "        with open(os.path.join(root_dir, dialogue), 'r') as f:\n",
    "            lines = f.read().splitlines()\n",
    "            for line in lines:\n",
    "                if len(line)>0 and line[0]=='[' and line[-1]==']':\n",
    "                    basename, label = line.split()[3], line.split()[4]\n",
    "                    session, part, speaker = basename[4], basename[:-5], basename[:6]\n",
    "                    assert label in labels\n",
    "                    assert speaker in ['Ses01M', 'Ses01F', 'Ses02M', 'Ses02F', 'Ses03M', 'Ses03F', 'Ses04M', 'Ses04F', 'Ses05M', 'Ses05F']\n",
    "                    \n",
    "                    if label in ['ang', 'hap', 'sad', 'neu', 'fru', 'exc', 'sur']:\n",
    "                        label_count[ label2id[label] ] += 1\n",
    "                        data_list.append((f'{data_path}/Session{session}/sentences/wav/{part}/{basename}.wav', label))\n",
    "                        \n",
    "random.shuffle(data_list)\n",
    "\n",
    "##### Train Set #####\n",
    "for wav_path, label in tqdm(data_list[2000:]):\n",
    "    y, sr = librosa.load(wav_path, sr=16000)\n",
    "    wav_name = f\"{label}_{wav_path.split('/')[-1][:-4]}\"\n",
    "    mel = mel_spectrogram_torch(torch.from_numpy(y).unsqueeze(0), 1024, 80, 16000, 256, 1024, 0.0, 8000.0, center=False)\n",
    "    np.save(f\"{data_path}/melspectrogram/train/{wav_name}.npy\", mel[0].numpy())\n",
    "    \n",
    "##### Validation Set #####\n",
    "for wav_path, label in tqdm(data_list[1000:2000]):\n",
    "    y, sr = librosa.load(wav_path, sr=16000)\n",
    "    wav_name = f\"{label}_{wav_path.split('/')[-1][:-4]}\"\n",
    "    mel = mel_spectrogram_torch(torch.from_numpy(y).unsqueeze(0), 1024, 80, 16000, 256, 1024, 0.0, 8000.0, center=False)\n",
    "    np.save(f\"{data_path}/melspectrogram/validation/{wav_name}.npy\", mel[0].numpy())\n",
    "    \n",
    "##### Test Set #####\n",
    "for wav_path, label in tqdm(data_list[:1000]):\n",
    "    y, sr = librosa.load(wav_path, sr=16000)\n",
    "    wav_name = f\"{label}_{wav_path.split('/')[-1][:-4]}\"\n",
    "    mel = mel_spectrogram_torch(torch.from_numpy(y).unsqueeze(0), 1024, 80, 16000, 256, 1024, 0.0, 8000.0, center=False)\n",
    "    np.save(f\"{data_path}/melspectrogram/test/{wav_name}.npy\", mel[0].numpy())"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "f00343bf9e86865dcb3c541c495e4343d6119318f9fa8a8963a3632bc79f9f1f"
  },
  "kernelspec": {
   "display_name": "Environment (conda_fs2)",
   "language": "python",
   "name": "conda_fs2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
