{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fed2776c-a51f-4659-aab6-1e1850c7a8f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jsonlines\n",
    "from collections import defaultdict\n",
    "import json\n",
    "from pathlib import Path\n",
    "import random\n",
    "import sys\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.font_manager\n",
    "import matplotlib.image as mpimg\n",
    "sys.path.append(\"..\")\n",
    "import src.utils.datatool as dtool  # noqa: E402"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "157239bf-a536-4cb4-b6d7-d8d13241fee3",
   "metadata": {},
   "outputs": [],
   "source": [
    "with jsonlines.open(\"/data/vtt/meta/vtt.jsonl\") as reader:\n",
    "    data = list(reader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "921e40dc-5caa-401d-82f5-cfb1ce7e0b21",
   "metadata": {},
   "outputs": [],
   "source": [
    "STATES_ROOT = Path(\"/data/vtt/states\")\n",
    "def show_sample(sample):\n",
    "    n_states = len(sample['annotation']) + 1\n",
    "    t_states = [sample['annotation'][0][\"segment\"][0]] + [\n",
    "        step[\"segment\"][1] for step in sample['annotation']\n",
    "    ]\n",
    "    state_path_list = [STATES_ROOT / f\"{sample['id']}_{n_states}_{i}.jpg\" for i in range(n_states)]\n",
    "    show_figures(\n",
    "        state_path_list,\n",
    "        title=f\"{sample['ori']} - {sample['id']} - {sample['youtube_id']}\",\n",
    "        labels = [\n",
    "            f\"[ {t_states[0]} ]\"] + [f\"{s['label']} [ {t_states[i+1]} ]\"\n",
    "            for i, s in enumerate(sample[\"annotation\"])\n",
    "        ],\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af434887-ffde-41ec-ba88-a2bc2d9afd5d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def show_figures(path_list, title=None, labels=None, show_indices=True):\n",
    "    from textwrap import wrap\n",
    "    n_img = len(path_list)\n",
    "    width, height = plt.figaspect(1)\n",
    "    \n",
    "    plt.rcParams[\"savefig.bbox\"] = \"tight\"\n",
    "    plt.rcParams['axes.linewidth'] = 0\n",
    "    plt.rcParams['axes.titlepad'] = 6\n",
    "    plt.rcParams['axes.titlesize'] = 12\n",
    "    plt.rcParams['font.family'] = 'Helvetica'\n",
    "    plt.rcParams['axes.labelweight'] = 'normal'\n",
    "    plt.rcParams['font.size'] = 12\n",
    "    plt.rcParams[\"figure.dpi\"] = 100\n",
    "    plt.rcParams[\"savefig.dpi\"] = 100\n",
    "    plt.rcParams['figure.titlesize'] = 18\n",
    "\n",
    "    #subplot(r,c) provide the no. of rows and columns\n",
    "    fig, axarr = plt.subplots(1, n_img,figsize=(width*n_img, height))\n",
    "    # use the created array to output your multiple images. In this case I have stacked 4 images vertically\n",
    "    for i in range(n_img):\n",
    "        # axarr[i].axis(\"off\")\n",
    "        if path_list[i].exists():\n",
    "            axarr[i].imshow(mpimg.imread(path_list[i]))\n",
    "            axarr[i].set_xticks([])\n",
    "            axarr[i].set_yticks([])\n",
    "            if show_indices:\n",
    "                axarr[i].set_title(f\"{i}\")\n",
    "            if labels is not None:\n",
    "                axarr[i].set_xlabel(\"\\n\".join(wrap(f\"{labels[i]}\", width=width*10)))\n",
    "\n",
    "    # plt.subplots_adjust(hspace=0, wspace=0.05)\n",
    "    if title:\n",
    "        fig.suptitle(title)\n",
    "    plt.tight_layout()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84dc4e45-fa3d-431c-9e07-6a76e08d81aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "history = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5da5a333-c407-4b80-91cc-daf134b70f17",
   "metadata": {},
   "outputs": [],
   "source": [
    "sample = random.choice(data)\n",
    "history.append(sample)\n",
    "show_sample(sample)\n",
    "plt.savefig(f\"sample/sample_case_current.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2cc46fc8-4adf-4699-8a94-da169c04f601",
   "metadata": {},
   "outputs": [],
   "source": [
    "show_sample(data[1357])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "adca6844-6eb8-46a2-ac93-2957c25d0659",
   "metadata": {},
   "outputs": [],
   "source": [
    "data[1357]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6aa9a019-3457-437c-8e74-a6edad35db55",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "536861af-f253-4b9a-9ef2-45c1b70a3268",
   "metadata": {},
   "outputs": [],
   "source": [
    "META_FILE = Path(\"/data/vtt/meta/vtt.jsonl\")\n",
    "test_samples = dtool.JSONLList(META_FILE, lambda x: x[\"split\"] == \"test\").samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65a69b5a-32df-4c70-996d-b130490ae443",
   "metadata": {},
   "outputs": [],
   "source": [
    "show_sample(test_samples[581])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79f06db3-debd-4baf-b32a-93b595f87e71",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4740e22a-6b40-47bf-91be-62203ab48c83",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "b394a95c-1f37-4d2d-8112-9e9bcfa6221f",
   "metadata": {
    "tags": []
   },
   "source": [
    "## VAR samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abe8c645-3c7a-4eb7-a505-8959f2c37662",
   "metadata": {},
   "outputs": [],
   "source": [
    "var_data = [sample for sample in data if sample['ori'] == 'var']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6df3cd3-5029-47fb-8e5b-41d2ad4043a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "sample = random.choice(var_data)\n",
    "show_sample(sample)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
