{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8f8a00a6-ee2d-4b1a-93e6-e3c8c24acca6",
   "metadata": {},
   "source": [
    "# summary of this notebook\n",
    "- 3 methods to read the scenes: get names, get object counts, get scenes \n",
    "- display stats on number of objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "918a634a-339b-4441-9c43-be9c01bf41a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import matplotlib.pyplot as plt\n",
    "from collections import Counter\n",
    "from ipywidgets import interact, IntSlider\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e4e9204c-209b-4dcf-94d7-740b1ac021af",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_json(split):\n",
    "    path = f\"../data/sceneGraphs/{split}_sceneGraphs.json\"\n",
    "    with open(path) as f:\n",
    "        d = json.load(f)\n",
    "    return d\n",
    "\n",
    "# scene_id need to be str\n",
    "def get_scene_graph(scene_id):\n",
    "    return d[scene_id]['objects']\n",
    "def get_object_names(scenegraph):\n",
    "    names = []\n",
    "    for _, val in scenegraph.items():\n",
    "        names.append(val['name'])\n",
    "    return names\n",
    "def get_object_count(scenegraph):\n",
    "    return len(scenegraph)\n",
    "\n",
    "# Define a function to update the plot dynamically\n",
    "def update_plot(min_value, max_value, max_duplicates):\n",
    "    # Filter the values based on the selected range\n",
    "    filtered_quantities = []\n",
    "    for key, val in counts.items():\n",
    "        if min_value <= val <= max_value:\n",
    "            if get_max_duplicates(key) <= max_duplicates:\n",
    "                filtered_quantities.append(val)\n",
    "                \n",
    "    # filtered_quantities = [q for q in quantities if min_value <= q <= max_value]\n",
    "    # Count the frequency of each value\n",
    "    value_counts = Counter(filtered_quantities)\n",
    "\n",
    "    # Prepare data for plotting\n",
    "    x_values = list(value_counts.keys())\n",
    "    y_values = list(value_counts.values())\n",
    "    \n",
    "    # Calculate the remaining percentage and the total number of objects\n",
    "    total_quantities = len(quantities)\n",
    "    filtered_total = len(filtered_quantities)\n",
    "    remaining_percentage = (filtered_total / total_quantities) * 100\n",
    "    mean_value = np.mean(filtered_quantities) if filtered_quantities else 0\n",
    "    median_value = np.median(filtered_quantities) if filtered_quantities else 0\n",
    "\n",
    "    # Clear the previous plot\n",
    "    plt.clf()\n",
    "    \n",
    "    # Create the frequency plot\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.bar(x_values, y_values, color='blue')\n",
    "\n",
    "    # Annotate with remaining percentage and total number of objects\n",
    "    plt.text(0.95, 0.95, f'Remaining: {remaining_percentage:.2f}%\\nTotal Scenes: {filtered_total}\\nMean Value: {mean_value:.2f}\\nMedian Value: {median_value:.2f}', \n",
    "             ha='right', va='top', transform=plt.gca().transAxes, fontsize=12, color='red')\n",
    "    \n",
    "    plt.xlabel('Number of objects')\n",
    "    plt.ylabel('Frequency')\n",
    "    plt.title(f'Frequency Plot of Object Quantities ({min_value} <= values <= {max_value})')\n",
    "    plt.show()\n",
    "    \n",
    "# remove the duplicates\n",
    "def get_max_duplicates(key):\n",
    "    scene = get_scene_graph(key)\n",
    "    names = get_object_names(scene)\n",
    "    # Use Counter to count the frequency of each string\n",
    "    counts = Counter(names)\n",
    "    # Return the maximum count (or 0 if the list is empty)\n",
    "    return max(counts.values()) if counts else 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3b61f740-75ab-4ff3-820a-80798b89459a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_split = \"train\"\n",
    "val_split = \"val\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e1e78ddf-59aa-4dd4-9875-759a697a2f5d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "74942\n"
     ]
    }
   ],
   "source": [
    "# read json file\n",
    "d = read_json(train_split)\n",
    "counts = {}\n",
    "for key in d.keys():\n",
    "    scene = get_scene_graph(key)\n",
    "    count = get_object_count(scene)\n",
    "    counts[f\"{key}\"] = count\n",
    "print(len(d))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d03d36ee-f61f-4652-a44c-debd68e94fec",
   "metadata": {},
   "source": [
    "Val_scenes: 10696\n",
    "Train_scenes: 74942\n",
    "Total_scenes: 85638"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "80d95f44-19cd-465b-a7bf-08a41d0c1c00",
   "metadata": {},
   "source": [
    "Total images in image: 148854"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ee84e721-aa2b-4b4b-a787-e9a5fee4a226",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the values from the dictionary\n",
    "quantities = list(counts.values())\n",
    "keys = counts.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "29ddaf68-abf3-4d56-b6f6-05fdd3d0a3de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "15f31c70b3fc4eada2590aba3c6d2840",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(IntSlider(value=0, description='min_value', max=126), IntSlider(value=126, description='…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create sliders for the dynamic range selection\n",
    "interact(update_plot, \n",
    "         min_value=IntSlider(min=0, max=max(quantities), step=1, value=0), \n",
    "         max_value=IntSlider(min=0, max=max(quantities), step=1, value=max(quantities)),\n",
    "         max_duplicates=IntSlider(min=0, max=len(quantities), step=1, value=0));"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "0133ac08-f03a-49ef-b205-db89155490d7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e371f7a0-0532-4a58-8f34-fb59fe11f4f6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
