{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0d033483-43c1-4a31-b9ed-9a4342ae4053",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from pathlib import Path\n",
    "import re\n",
    "import json\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "89a050cb-3d00-4b4b-88a2-3b3f147be99c",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = Path(\"../../data/gqa/data/val_all_questions.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "916e296a-7294-42e8-83d4-5512fa7faff5",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(path) as f:\n",
    "    question = json.load(f)\n",
    "random.seed(42)\n",
    "keys = list(question.keys())\n",
    "subset_size = int(0.1 * len(keys))\n",
    "subset_keys = random.sample(keys, subset_size)\n",
    "subset = {key: question[key] for key in subset_keys}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1c1857aa-8b5f-4371-bf6f-87297af4c386",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2011853\n"
     ]
    }
   ],
   "source": [
    "print(len(question))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "93ce73a0-90b3-417c-a447-bdc6af1dc607",
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(subset_keys[:10])\n",
    "# for item in subset_keys[:10]:\n",
    "#     print(subset[item])\n",
    "#     print(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c4c35692-b647-490f-8dcb-fd74636ffdfb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'semantic': [{'operation': 'select', 'dependencies': [], 'argument': 'table (1276780)'}, {'operation': 'exist', 'dependencies': [0], 'argument': '?'}, {'operation': 'select', 'dependencies': [], 'argument': 'plate (-) '}, {'operation': 'exist', 'dependencies': [2], 'argument': '?'}, {'operation': 'or', 'dependencies': [1, 3], 'argument': ''}], 'entailed': [], 'equivalent': ['03716212'], 'question': 'Are there any plates or tables in the scene?', 'imageId': '2387346', 'isBalanced': False, 'groups': {'global': None, 'local': '09existOr-plate_table'}, 'answer': 'yes', 'semanticStr': 'select: table (1276780)->exist: ? [0]->select: plate (-) ->exist: ? [2]->or:  [1, 3]', 'annotations': {'answer': {}, 'question': {'5': '1276780'}, 'fullAnswer': {'4': '1276780'}}, 'types': {'detailed': 'existOr', 'semantic': 'obj', 'structural': 'logical'}, 'fullAnswer': 'Yes, there is a table.'}\n",
      "obj\n"
     ]
    }
   ],
   "source": [
    "for key, val in subset.items():\n",
    "    print(val)\n",
    "    print(val['types']['semantic'])\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "834b0e7f-f6b2-42f7-bd13-5504fe074380",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2011853\n"
     ]
    }
   ],
   "source": [
    "print(len(question))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9c5372a5-0880-45c2-9173-35fab61470cf",
   "metadata": {},
   "source": [
    "# first round of filtering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "0a62ecd0-5e78-40d4-bac0-2181ad5e781b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# first filter\n",
    "filtered_single_noun_dic = {}\n",
    "filtered_more_nouns_dic = {}\n",
    "multiple_mention_dic = {}\n",
    "half_or_no_mention_dic = {}\n",
    "global_dic = {}\n",
    "noun_not_in_q = {}\n",
    "max_noun_num = 0\n",
    "\n",
    "pattern = r\"(\\w+)\\s*\\((\\d+(?:\\s*,\\s*\\d+)*)\\)\" # should be a correct one\n",
    "for key, val in subset.items():\n",
    "    question = val['question']\n",
    "    question_type = val['types']['semantic']\n",
    "    nouns = []\n",
    "    if question_type != 'global':\n",
    "        overall_flag = True\n",
    "        multiple_mention_flag = False\n",
    "        half_or_no_mention_flag = False\n",
    "        noun_not_in_q_flag = False\n",
    "        semantics = val.get('semantic', [])\n",
    "        for operation in semantics:\n",
    "            if operation['operation'] == 'select':\n",
    "                text = operation['argument']\n",
    "                match = re.search(pattern, text)\n",
    "                if match: \n",
    "                    noun = match.group(1)\n",
    "                    if noun not in question: \n",
    "                        overall_flag = False\n",
    "                        noun_not_in_q_flag = True\n",
    "                    else:\n",
    "                        numbers = match.group(2)\n",
    "                        number_list = [n.strip() for n in numbers.split(',')]\n",
    "                        if len(number_list) > 1:\n",
    "                            # as long as there is a multiple mention, add this case\n",
    "                            multiple_mention_flag = True\n",
    "                            overall_flag = False\n",
    "                        nouns.append(noun)\n",
    "                else: \n",
    "                    overall_flag = False\n",
    "                    half_or_no_mention_flag = True\n",
    "        if len(nouns) > max_noun_num:\n",
    "            max_noun_num = len(nouns)\n",
    "        if overall_flag:\n",
    "            if len(nouns) > 1:\n",
    "                filtered_more_nouns_dic[key] = val\n",
    "            else:\n",
    "                filtered_single_noun_dic[key] = val\n",
    "        elif multiple_mention_flag:\n",
    "            multiple_mention_dic[key] = val\n",
    "        elif half_or_no_mention_flag:\n",
    "            half_or_no_mention_dic[key] = val  \n",
    "        elif noun_not_in_q_flag:\n",
    "            noun_not_in_q[key] = val\n",
    "    else:\n",
    "        global_dic[key] = val        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "33c0ecbb-42fe-4bdf-9684-1cd576b08bf1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "201185\n",
      "133986\n",
      "7377\n",
      "207\n",
      "51617\n",
      "3128\n",
      "4870\n"
     ]
    }
   ],
   "source": [
    "print(len(subset))\n",
    "print(len(filtered_single_noun_dic))\n",
    "print(len(filtered_more_nouns_dic))\n",
    "print(len(multiple_mention_dic))\n",
    "print(len(half_or_no_mention_dic))\n",
    "print(len(global_dic))\n",
    "print(len(noun_not_in_q))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "3ed0e645-5e53-4e4d-a795-efaa7227127c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7377"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "141363 - 133986"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "17ecb77f-f41a-4054-abb9-35078c8ef13b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7377"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final = len(subset) - (len(filtered_single_noun_dic) + len(multiple_mention_dic) + len(half_or_no_mention_dic) + len(global_dic) + len(noun_not_in_q))\n",
    "final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "20de30c3-4d64-4411-b64f-8a421bc257ba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2\n"
     ]
    }
   ],
   "source": [
    "print(max_noun_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "650c8a9a-7645-473e-8ab7-f4e71f302560",
   "metadata": {},
   "outputs": [],
   "source": [
    "# store as json file\n",
    "with open('../data/filtered_single_nouns.json', 'w') as f1:  # 'wb' means write in binary mode\n",
    "    json.dump(filtered_single_noun_dic, f1, indent=4)\n",
    "with open('../data/filtered_more_nouns.json', 'w') as f2:  # 'wb' means write in binary mode\n",
    "    json.dump(filtered_more_nouns_dic, f2, indent=4)\n",
    "with open('../data/multiple_references_dic.json', 'w') as f3:  # 'wb' means write in binary mode\n",
    "    json.dump(multiple_mention_dic, f3, indent=4)\n",
    "with open('../data/half_mention.json', 'w') as f4:  # 'wb' means write in binary mode\n",
    "    json.dump(half_or_no_mention_dic, f4, indent=4)\n",
    "with open('../data/global_questions.json', 'w') as f5:  # 'wb' means write in binary mode\n",
    "    json.dump(global_dic, f5, indent=4)\n",
    "with open('../data/noun_not_in_q.json', 'w') as f6:  # 'wb' means write in binary mode\n",
    "    json.dump(noun_not_in_q, f6, indent=4)  "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aaececf3-83ea-4e23-bd3d-7731b9723ab6",
   "metadata": {},
   "source": [
    "# Hypernym replacement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8354931f-a49b-4ff9-bb52-18fc0484ba8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load the filtered_dic\n",
    "with open(\"../data/filtered_single_nouns.json\", 'r') as f:\n",
    "    filtered_single_noun_dic = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "657a2b6b-837c-41eb-8864-a4743e9fc477",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "933\n"
     ]
    }
   ],
   "source": [
    "# read the hypernym file first\n",
    "with open('../data/gqa_entities/noun-hypernyms.json') as f:\n",
    "    hyper = json.load(f)\n",
    "print(len(hyper))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0715ac3c-6527-4d38-8dcf-7e3a16190c83",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "133986"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(filtered_single_noun_dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "2fe111d4-7c3c-4086-9121-fcc50fd806a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "pattern = r\"(\\w+)\\s*\\((\\d+(?:\\s*,\\s*\\d+)*)\\)\" # should be a correct one\n",
    "replaced_dic = {}\n",
    "no_hypernym = {}\n",
    "for key, val in filtered_single_noun_dic.items():\n",
    "    question_str = val['question']\n",
    "    semantics = val.get('semantic', [])\n",
    "    question_dic = {}\n",
    "    for operation in semantics:\n",
    "        if operation['operation'] == 'select':\n",
    "            text = operation['argument']\n",
    "            match = re.search(pattern, text)\n",
    "            noun = match.group(1)\n",
    "    hypers = hyper.get(noun, None) #[\"adult\", \"person\"]\n",
    "    if hypers is not None: \n",
    "        new_qs = []\n",
    "        for hyp in hypers:\n",
    "            new_q = question_str.replace(noun, hyp)\n",
    "            new_qs.append(new_q)\n",
    "        question_dic['original'] = question_str\n",
    "        question_dic['new'] = new_qs\n",
    "        question_dic['arg'] = noun\n",
    "        question_dic['hypernym'] = hypers\n",
    "        question_dic['answer'] = val['answer']\n",
    "        question_dic['fullAnswer'] = val['fullAnswer']\n",
    "        question_dic['imageId'] = val['imageId']\n",
    "    if len(question_dic) > 0:\n",
    "        replaced_dic[key] = question_dic\n",
    "    else: \n",
    "        no_hypernym[key] = val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a9dc63ca-8deb-4fcc-bb77-a2a6ce86c0d8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['adult', 'person']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hyper.get('man') # just save the information in the json. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "3afe8527-f179-45b3-819a-c103adea6379",
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = list(no_hypernym.keys())[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "9a7ee3dd-5655-45ac-a16d-2744c5119070",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'semantic': [{'operation': 'select', 'dependencies': [], 'argument': 'palm trees (1144319)'}, {'operation': 'relate', 'dependencies': [0], 'argument': 'traffic light,to the left of,s (1144320)'}, {'operation': 'choose color', 'dependencies': [1], 'argument': 'black|orange'}], 'entailed': [], 'equivalent': ['03959891'], 'question': 'Which color is the signal light to the left of the palm trees, black or orange?', 'imageId': '2401770', 'isBalanced': False, 'groups': {'global': 'color', 'local': '10c-traffic light_color'}, 'answer': 'black', 'semanticStr': 'select: palm trees (1144319)->relate: traffic light,to the left of,s (1144320) [0]->choose color: black|orange [1]', 'annotations': {'answer': {}, 'question': {'11:13': '1144319', '4:6': '1144320'}, 'fullAnswer': {'1:3': '1144320'}}, 'types': {'detailed': 'chooseAttr', 'semantic': 'attr', 'structural': 'choose'}, 'fullAnswer': 'The traffic signal is black.'}\n",
      "\n",
      "\n",
      "{'semantic': [{'operation': 'select', 'dependencies': [], 'argument': 'shelf (821902)'}, {'operation': 'relate', 'dependencies': [0], 'argument': 'cabinet,to the left of,s (-)'}, {'operation': 'exist', 'dependencies': [1], 'argument': '?'}], 'entailed': ['05580453', '05580456', '05580454'], 'equivalent': ['05580453', '05580456', '05580454', '05580455'], 'question': 'Do you see a cabinet to the left of the shelf?', 'imageId': '2356290', 'isBalanced': False, 'groups': {'global': None, 'local': '13-shelf_cabinet'}, 'answer': 'no', 'semanticStr': 'select: shelf (821902)->relate: cabinet,to the left of,s (-) [0]->exist: ? [1]', 'annotations': {'answer': {}, 'question': {'10': '821902'}, 'fullAnswer': {'10': '821902', '4': '821884'}}, 'types': {'detailed': 'existRelSC', 'semantic': 'rel', 'structural': 'verify'}, 'fullAnswer': 'No, there is a rug to the left of the shelf.'}\n",
      "\n",
      "\n",
      "{'semantic': [{'operation': 'select', 'dependencies': [], 'argument': 'shorts (3392817)'}, {'operation': 'verify color', 'dependencies': [0], 'argument': 'khaki'}], 'entailed': ['16885845', '16885846'], 'equivalent': ['16885847', '16885846'], 'question': 'Do the shorts look khaki?', 'imageId': '2316535', 'isBalanced': False, 'groups': {'global': None, 'local': '06-shorts_khaki'}, 'answer': 'no', 'semanticStr': 'select: shorts (3392817)->verify color: khaki [0]', 'annotations': {'answer': {}, 'question': {'2': '3392817'}, 'fullAnswer': {'2': '3392817'}}, 'types': {'detailed': 'verifyAttrKC', 'semantic': 'attr', 'structural': 'verify'}, 'fullAnswer': 'No, the shorts are white.'}\n",
      "\n",
      "\n",
      "{'semantic': [{'operation': 'select', 'dependencies': [], 'argument': 'traffic light (1194191)'}, {'operation': 'relate', 'dependencies': [0], 'argument': 'traffic light,to the right of,s (1194177)'}, {'operation': 'choose color', 'dependencies': [1], 'argument': 'black|orange'}], 'entailed': ['17583180'], 'equivalent': ['17583181', '17583180'], 'question': 'Which color is the traffic signal that is to the right of the signal light, black or orange?', 'imageId': '2396905', 'isBalanced': False, 'groups': {'global': 'color', 'local': '10c-traffic light_color'}, 'answer': 'black', 'semanticStr': 'select: traffic light (1194191)->relate: traffic light,to the right of,s (1194177) [0]->choose color: black|orange [1]', 'annotations': {'answer': {}, 'question': {'13:15': '1194191', '4:6': '1194177'}, 'fullAnswer': {'1:3': '1194177'}}, 'types': {'detailed': 'chooseAttr', 'semantic': 'attr', 'structural': 'choose'}, 'fullAnswer': 'The traffic signal is black.'}\n",
      "\n",
      "\n",
      "{'semantic': [{'operation': 'select', 'dependencies': [], 'argument': 'smoke (1038127)'}, {'operation': 'filter color', 'dependencies': [0], 'argument': 'not(white)'}, {'operation': 'exist', 'dependencies': [1], 'argument': '?'}], 'entailed': ['10836143'], 'equivalent': ['10836157'], 'question': 'Is there any smoke that is not white?', 'imageId': '150297', 'isBalanced': False, 'groups': {'global': None, 'local': '04-smoke_n#white'}, 'answer': 'yes', 'semanticStr': 'select: smoke (1038127)->filter color: not(white) [0]->exist: ? [1]', 'annotations': {'answer': {}, 'question': {'3': '1038127'}, 'fullAnswer': {'4': '1038127'}}, 'types': {'detailed': 'existAttrNot', 'semantic': 'obj', 'structural': 'verify'}, 'fullAnswer': 'Yes, there is black smoke.'}\n",
      "\n",
      "\n",
      "['tree', 'woody plant', 'plant']\n"
     ]
    }
   ],
   "source": [
    "# for key, val in no_hypernym.items():\n",
    "#     print(val)\n",
    "#     break\n",
    "for key in keys:\n",
    "    print(no_hypernym[key])\n",
    "    print(\"\\n\")\n",
    "res = hyper.get('palm trees')\n",
    "print(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "deb788f1-b8cb-4d3e-9a72-37489e87d507",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'original': 'Is there a plate to the left of the man that is on the right side?', 'new': ['Is there a plate to the left of the adult that is on the right side?', 'Is there a plate to the left of the person that is on the right side?'], 'arg': 'man', 'hypernym': ['adult', 'person'], 'answer': 'no', 'fullAnswer': 'No, there is a bottle to the left of the man.', 'imageId': '2393739'}\n"
     ]
    }
   ],
   "source": [
    "for key, val in replaced_dic.items():\n",
    "    print(val)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "4a0547f2-be6c-4e2e-bebc-34901597374b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "86390\n"
     ]
    }
   ],
   "source": [
    "print(len(replaced_dic))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a717538c-38fe-4531-9764-c7a5d807f8b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../data/replacement.json', 'w') as f7:  \n",
    "     json.dump(replaced_dic, f7, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "fc194a54-7feb-45d6-bfa3-312bfc6471dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load the scenegraph\n",
    "def load_scenegraphs(split):\n",
    "    path = f\"../data/sceneGraphs/{split}_sceneGraphs.json\"\n",
    "    with open(path) as f:\n",
    "        d = json.load(f)\n",
    "    return d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a17e0db2-cfd4-4f37-b4e8-d4e950ada748",
   "metadata": {},
   "outputs": [],
   "source": [
    "val_scenegraphs = load_scenegraphs(\"val\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e53adfaa-2244-426e-853a-97a4d45a9cf4",
   "metadata": {},
   "outputs": [],
   "source": [
    "final_dic = {}\n",
    "num_not_match_dic = {}\n",
    "dup_obj_dic = {}\n",
    "none_num = 0\n",
    "for key, val in replaced_dic.items():\n",
    "    image_id = val.get('imageId')\n",
    "    item = val_scenegraphs.get(image_id, None)\n",
    "    if item is not None:\n",
    "        # if len(item['objects']) >=2 and len(item['objects']) <= 20:\n",
    "        if 2 <= len(item['objects']) <=20:\n",
    "            all_names = [obj['name'] for obj in item['objects'].values()]\n",
    "            duplicated_flag = len(set(all_names))!=len(all_names)\n",
    "            # name_set = set()\n",
    "            # duplicated_flag = False\n",
    "            # for key_1, objs in item['objects'].items():\n",
    "            #     if objs['name'] not in name_set:\n",
    "            #         name_set.add(objs['name'])\n",
    "            #     else:\n",
    "            #         duplicated_flag = True\n",
    "            if not duplicated_flag:\n",
    "                final_dic[key] = val\n",
    "            else:\n",
    "                dup_obj_dic[key] = val\n",
    "            # final_dic[key] = val\n",
    "        else:\n",
    "            num_not_match_dic[key] = val  \n",
    "    else:\n",
    "        none_num += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e5c3d009-9946-44f2-b8bf-449474e99caf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15762\n"
     ]
    }
   ],
   "source": [
    "print(len(final_dic))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "c647c2bd-3c55-4a5d-a087-309080d6c4e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "33439\n"
     ]
    }
   ],
   "source": [
    "print(len(num_not_match_dic))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c8e766a9-46a1-4d92-9efa-c3cc34926036",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "37189\n",
      "52951\n"
     ]
    }
   ],
   "source": [
    "print(len(dup_obj_dic))\n",
    "print(len(dup_obj_dic)+len(final_dic))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "57ae3d74-d58b-4ea4-aa2a-191e432ccaca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n"
     ]
    }
   ],
   "source": [
    "print(none_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "35e63c84-aa94-4700-9021-dc97f7d713a0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "86390\n"
     ]
    }
   ],
   "source": [
    "print(len(replaced_dic))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "226cfee7-e28d-4f11-84fa-e963970084d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.39519800576179603"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(final_dic) / (len(final_dic) + len(else_dic) + none_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "d0e3d33a-31e0-412c-a3c7-28368ccfc95c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.24949621602256952"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(else_dic) / (len(final_dic) + len(else_dic) + none_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "c461fb0a-59f6-44b4-97f0-4bb9fd2ed698",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.35523114355231145"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "none_num / (len(final_dic) + len(else_dic) + none_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "6400b167-b659-4631-8752-0b71871d36cd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'original': 'Is there any cup to the left of the napkin?', 'new': ['Is there any cup to the left of the table linen?', 'Is there any cup to the left of the linen?', 'Is there any cup to the left of the clothing?'], 'arg': 'napkin', 'hypernym': ['table linen', 'linen', 'clothing'], 'answer': 'yes', 'fullAnswer': 'Yes, there is a cup to the left of the napkin.', 'imageId': '2347577'}\n"
     ]
    }
   ],
   "source": [
    "# for key, val in val_scenegraphs.items():\n",
    "#     duplicated_flag = False\n",
    "#     name_set = ()\n",
    "#     for item in val['objects']:\n",
    "#         if item['name'] not in name_set:\n",
    "#             name_set.add(item['name'])\n",
    "#         else\n",
    "#             duplicated_flag = True\n",
    "#             break\n",
    "#     if not duplicated_flag:\n",
    "for key, val in final_dic.items():\n",
    "    print(val)\n",
    "    break\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "56b7235a-5c19-4b0f-9928-c2bc0fa910d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"../data/val_final_filtered_questions.json\", 'w') as f8:\n",
    "    json.dump(final_dic, f8, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d50dc43c-e7c3-4ab3-82bf-2b61ed6f6613",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
