{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Crutial References\n",
    "\n",
    "### TFRecord使用方法 -ファイル保存、読込み、ニューラルネットワークへの投入-\n",
    "https://qiita.com/rivival/items/6613d80a4427e6be8799\n",
    "- context and feature_list\n",
    "- dataset.padded_batch\n",
    "\n",
    "### [TensorFlow 2] TFRecordからの特徴量読み込みはバッチ単位でやるのがオススメ\n",
    "https://qiita.com/everylittle/items/a7c31b08d2f76c886a92\n",
    "- Ragged Tensor vs. tf.io.VarLenFeature\n",
    "- Acceleration Tips:\n",
    "    - TFRecordからの読み込みはバッチ単位でやりましょう。\n",
    "    - バッチ化した後に parse_example_dataset() を使って変換しましょう。この関数の戻り値を、Datasetの apply() の引数に指定します。\n",
    "    - 可変長の特徴量は、TensorFlow 2.1以降なら RaggedFeature を指定して読み込みましょう。\n",
    "\n",
    "### TensorFlow & Keras で TFRecord & DataSetを使って大量のデータを学習させる方法\n",
    "https://qiita.com/everylittle/items/1d8a6267b0967346767a\n",
    "- Acceleration Tips:\n",
    "    - dataset.prefetch(1)\n",
    "    - dataset.parallel_interleave(...)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Should be written in our paper\n",
    "- repeat the audio if duration < 10\n",
    "- divide 127.5 and subtract 1\n",
    "- v1\n",
    "- used balaced train and eval because labels are rerated already.\n",
    "- 128-dimentional features, duration <= 10. preprocessing can be found in AudioSet github[]."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import absolute_import, division, print_function\n",
    "import os, sys, glob\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from utils.misc import set_gpu_devices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_gpu_devices(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_bal_train = \"/data/t-miyagawa/audioset/audioset_v1_embeddings/bal_train\"\n",
    "path_eval = \"/data/t-miyagawa/audioset/audioset_v1_embeddings/eval\"\n",
    "path_tfrdir = path_bal_train"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reader Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_trainsubset = 19782\n",
    "\"\"\"\n",
    "# Batching of padded sparse tensors is not currently supported\n",
    "ds = ds.padded_batch(\n",
    "    100, \n",
    "    padded_shapes=(tf.TensorShape([10, 128]), tf.TensorShape([None])))\n",
    "\n",
    "\"\"\"\n",
    "# Parsing function\n",
    "def _parse_func(sequence_example):\n",
    "    context, feature_lists = tf.io.parse_single_sequence_example(\n",
    "        sequence_example,\n",
    "        context_features={\n",
    "            'video_id': tf.io.FixedLenFeature([], tf.string),\n",
    "            'start_time_seconds': tf.io.FixedLenFeature([], tf.float32),\n",
    "            'end_time_seconds': tf.io.FixedLenFeature([], tf.float32),\n",
    "            'labels': tf.io.VarLenFeature(tf.int64)\n",
    "        },\n",
    "        sequence_features={\n",
    "            'audio_embedding': tf.io.FixedLenSequenceFeature([], dtype=tf.string),\n",
    "        })\n",
    "\n",
    "    return context[\"labels\"], feature_lists[\"audio_embedding\"]\n",
    "\n",
    "# Get all paths to tfrecords\n",
    "record_file = glob.glob(path_tfrdir + \"/*.tfrecord\")\n",
    "\n",
    "# Decoding\n",
    "ds = tf.data.TFRecordDataset(record_file)\n",
    "ds = ds.map(_parse_func)\n",
    "\n",
    "# Load all data onto GPU and CPU\n",
    "ls_audio = []\n",
    "ls_labels = []\n",
    "for itr_psd in ds:\n",
    "    sparse_labels = itr_psd[0] # Sparse Tensor\n",
    "    ls_labels.append(tf.cast(sparse_labels.values, tf.int32).numpy())\n",
    "    audio = tf.cast(tf.io.decode_raw(itr_psd[1], tf.uint8), tf.float32)\n",
    "    ls_audio.append(audio)\n",
    "\n",
    "# Extract audio and labels with duration = 10.\n",
    "# duration: num of audio data = \n",
    "# {1: 4, 2: 15, 3: 22, 4: 39, 5: 38, 6: 47, 7: 69, 8: 57, 9: 87, 10: 21782}\n",
    "ls_idx = []\n",
    "for idx, itr_audio in enumerate(ls_audio):\n",
    "    duration = itr_audio.shape[0]\n",
    "    if duration == 10:\n",
    "        ls_idx.append(idx)\n",
    "\n",
    "ls_audio_10frame = [value for i, value in enumerate(ls_audio) if i in ls_idx]\n",
    "ls_labels_10frame = [value for i, value in enumerate(ls_labels) if i in ls_idx]\n",
    "    # len = 21782\n",
    "\n",
    "# Training/validation separation\n",
    "audio_all = tf.reshape(tf.concat(ls_audio_10frame, axis=0), (21782, 10, 128))\n",
    "audio_tr = audio_all[: num_trainsubset] # <= 19782\n",
    "audio_vl = audio_all[19782: ] # 2000\n",
    "    # GPU consumption = 681 MB so far\n",
    "\n",
    "# return audio_tr, audio_vl, labels_tr, labels_vl"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Statistics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Audio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "duration: num of audio data\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{1: 4, 2: 15, 3: 22, 4: 39, 5: 38, 6: 47, 7: 69, 8: 57, 9: 87, 10: 21782}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dc = dict()\n",
    "for itr_audio in ls_audio:\n",
    "    duration = itr_audio.shape[0]\n",
    "    if not (duration in dc.keys()):\n",
    "        dc[duration] = 1\n",
    "    else:\n",
    "        dc[duration] += 1\n",
    "print(\"duration: num of audio data\")\n",
    "dc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 16,  17, 478, ..., 137, 447, 478], dtype=int32)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for i, itr in enumerate(ls_labels):\n",
    "    if i == 0:\n",
    "        labels_concat = itr\n",
    "    else:\n",
    "        labels_concat = np.append(labels_concat, itr)\n",
    "labels_concat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "22160"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(ls_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((52882,), array([ 16,  17, 478, ..., 137, 447, 478], dtype=int32), 526)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels_concat.shape, labels_concat, np.max(labels_concat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "title = \"Logit histogram, data=NMNIST10, order={}, oblivious={}\".format(order_sprt, oblivious)\n",
    "xlabel = \"Logits\"\n",
    "ylabel = \"Count\"\n",
    "labels = ['true class = 0', 'true class = 1']\n",
    "\n",
    "plt.rcParams[\"font.size\"] = 25\n",
    "plt.figure(figsize=(13,8)) # 13, 11, 10 \n",
    "\n",
    "data = tf.reshape(logits_all, [-1]).numpy()\n",
    "print(np.max(data))\n",
    "print(np.min(np.abs(data)))\n",
    "print(np.min(data))\n",
    "plt.hist(data, bins=100)\n",
    "\n",
    "plt.xlabel(xlabel)\n",
    "#plt.ylabel(ylabel)\n",
    "#plt.yscale(\"log\")\n",
    "#plt.yscale('symlog')\n",
    "plt.grid(True)\n",
    "#plt.legend(bbox_to_anchor=(0.15, 0, 0.4, 0.9))\n",
    "#plt.legend(fontsize=\"15\")\n",
    "plt.title(title)\n",
    "plt.tight_layout()\n",
    "#plt.xlim(-0.1, 1.1)\n",
    "#plt.ylim(0.6, 600)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Feature PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
