{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "w8P8MmqYpSUd",
        "outputId": "0ca7d05c-41e4-4ac1-8b55-e3e62dfff4b6"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2024-02-20 13:12:47--  http://images.cocodataset.org/zips/val2014.zip\n",
            "Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.221.33, 3.5.11.147, 52.217.230.249, ...\n",
            "Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.221.33|:80... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 6645013297 (6.2G) [application/zip]\n",
            "Saving to: ‘val2014.zip’\n",
            "\n",
            "val2014.zip         100%[===================>]   6.19G  34.5MB/s    in 1m 52s  \n",
            "\n",
            "2024-02-20 13:14:39 (56.5 MB/s) - ‘val2014.zip’ saved [6645013297/6645013297]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "!wget http://images.cocodataset.org/zips/val2014.zip\n",
        "!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip\n",
        "!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!unzip -q val2014.zip"
      ],
      "metadata": {
        "id": "AssY-3Tzp_0f"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!unzip -q /content/v2_Annotations_Val_mscoco.zip\n",
        "!unzip -q /content/v2_Questions_Val_mscoco.zip"
      ],
      "metadata": {
        "id": "8T_ca-7fqC37"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from tqdm import tqdm\n",
        "import json\n",
        "\n",
        "len(os.listdir(\"val2014\"))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AqHi8nY2rCDo",
        "outputId": "04fe2609-2840-4dd0-9b24-eceb2334ec6b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "40504"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "with open(\"/content/v2_mscoco_val2014_annotations.json\") as f:\n",
        "  annotation_data = json.load(f)\n",
        "\n",
        "with open(\"/content/v2_OpenEnded_mscoco_val2014_questions.json\") as f:\n",
        "  question_data = json.load(f)"
      ],
      "metadata": {
        "id": "_0JqARbQrNDA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "question_id_dict = {}\n",
        "for data in question_data[\"questions\"]:\n",
        "  question_id_dict[data[\"question_id\"]] = data"
      ],
      "metadata": {
        "id": "HOFJXFTBsEZh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "for data in annotation_data[\"annotations\"]:\n",
        "  data[\"question\"] = question_id_dict[data[\"question_id\"]][\"question\"]"
      ],
      "metadata": {
        "id": "AIayJsOWrVII"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "annotation_data[\"annotations\"][0]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Hk48adVKuZ3g",
        "outputId": "2a1b0ddc-a594-41fd-cccc-51359e730cf9"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'question_type': 'none of the above',\n",
              " 'multiple_choice_answer': 'down',\n",
              " 'answers': [{'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 1},\n",
              "  {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 2},\n",
              "  {'answer': 'at table', 'answer_confidence': 'yes', 'answer_id': 3},\n",
              "  {'answer': 'skateboard', 'answer_confidence': 'yes', 'answer_id': 4},\n",
              "  {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 5},\n",
              "  {'answer': 'table', 'answer_confidence': 'yes', 'answer_id': 6},\n",
              "  {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 7},\n",
              "  {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 8},\n",
              "  {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 9},\n",
              "  {'answer': 'down', 'answer_confidence': 'yes', 'answer_id': 10}],\n",
              " 'image_id': 262148,\n",
              " 'answer_type': 'other',\n",
              " 'question_id': 262148000,\n",
              " 'question': 'Where is he looking?'}"
            ]
          },
          "metadata": {},
          "execution_count": 45
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "directional_words = [\"left\", \"right\", \"up\", \"down\", \"above\", \"below\", \"over\", \"under\", \"top\", \"bottom\",\n",
        " \"port\", \"starboard\", \"upward\", \"downward\", \"overhead\", \"beneath\", \"atop\", \"summit\"]"
      ],
      "metadata": {
        "id": "bcDD8oEntDiW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def forbidden_word_as_answer(answer_list, forbidden_words):\n",
        "  for data in answer_list:\n",
        "    for word in forbidden_words:\n",
        "      if word in data[\"answer\"]:\n",
        "        return True\n",
        "  return False"
      ],
      "metadata": {
        "id": "Tg_IfK48sRlZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from nltk.corpus import wordnet as wn\n",
        "import nltk\n",
        "nltk.download('wordnet')\n",
        "\n",
        "def has_antonyms_wordnet(words):\n",
        "  for word in words:\n",
        "    for synset in wn.synsets(word):\n",
        "      for lemma in synset.lemmas():\n",
        "        if lemma.antonyms():\n",
        "          if lemma.antonyms()[0].name() in words:\n",
        "            return True\n",
        "  return False"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "m0YsaDY70y9Y",
        "outputId": "0e36d685-2cae-4f41-c42e-90c3f3775fa1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
            "[nltk_data]   Package wordnet is already up-to-date!\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "MIN_ANSWERS = 4\n",
        "annotations_without_directional_words = []\n",
        "\n",
        "for data in tqdm(annotation_data[\"annotations\"]):\n",
        "  if forbidden_word_as_answer(data[\"answers\"], directional_words):\n",
        "    continue\n",
        "\n",
        "  new_data_format = {\n",
        "      \"question\": data[\"question\"],\n",
        "      \"question_id\": data[\"question_id\"],\n",
        "      \"image_id\": data[\"image_id\"],\n",
        "      \"answers\": list(set([x[\"answer\"] for x in data[\"answers\"] if x[\"answer_confidence\"]==\"yes\"]))\n",
        "  }\n",
        "\n",
        "  if len(new_data_format[\"answers\"]) < MIN_ANSWERS:\n",
        "    continue\n",
        "  if has_antonyms_wordnet(new_data_format[\"answers\"]):\n",
        "    continue\n",
        "  annotations_without_directional_words.append(new_data_format)\n",
        "\n",
        "len(annotation_data[\"annotations\"]), len(annotations_without_directional_words)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TUAVvEX4uldL",
        "outputId": "37d788c1-ba6f-4373-8e02-f5e966be461e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 214354/214354 [00:09<00:00, 21913.92it/s]\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(214354, 31958)"
            ]
          },
          "metadata": {},
          "execution_count": 89
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import random\n",
        "random.seed(53)\n",
        "\n",
        "SAMPLES = 200\n",
        "dataset = random.sample(annotations_without_directional_words, SAMPLES)"
      ],
      "metadata": {
        "id": "2GBG_odb1JQm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!mkdir dataset4"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nu4WUoot1zey",
        "outputId": "9eb9aef8-efdb-4219-9cf9-236297613f40"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "mkdir: cannot create directory ‘dataset4’: File exists\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "os.listdir(\"val2014\")[0]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 36
        },
        "id": "b4qF8wOe2RHM",
        "outputId": "7803764c-84e9-499b-ada4-28ec2370c8a2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'COCO_val2014_000000512982.jpg'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 92
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import shutil\n",
        "\n",
        "for data in tqdm(dataset):\n",
        "  image_path = \"val2014/COCO_val2014_\"+str(data[\"image_id\"]).zfill(12)+\".jpg\"\n",
        "  shutil.copy(image_path, \"dataset4/\"+str(data[\"image_id\"])+\".jpg\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EjTpONch18Uk",
        "outputId": "e160c102-33da-4d52-c0c2-33d87b674a4d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 200/200 [00:00<00:00, 2075.66it/s]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "with open(\"dataset4_eval.json\",\"w\") as f:\n",
        "  json.dump(dataset, f, indent=4)"
      ],
      "metadata": {
        "id": "Qems2fkv2uIg"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!zip -rq dataset4.zip dataset4"
      ],
      "metadata": {
        "id": "foZXnks54O7D"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!curl --upload-file dataset4.zip https://transfer.sh/dataset4.zip"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uW1V1b9w5DuB",
        "outputId": "83efbfa9-13b8-4043-ac29-a2ae99c201fb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "https://transfer.sh/RwUCilpbgq/dataset4.zip"
          ]
        }
      ]
    }
  ]
}