{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "os.environ[\"HF_HOME\"] = \"/your/path/hf_cache\"\n",
    "from datasets import load_dataset \n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "eval_data = load_dataset('json', \n",
    "                         data_files=\"/your/path/visual7w/dataset.json\", \n",
    "                         split=\"train\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from PIL import Image\n",
    "idx = 0\n",
    "# print(eval_data[idx].keys()) \n",
    "# # dict_keys(['images', 'version', 'dataset'])\n",
    "# print(eval_data[idx][\"images\"].keys()) \n",
    "# dict_keys(['filename', 'image_id', 'qa_pairs', 'split'])\n",
    "for qa_pair in eval_data[idx][\"images\"]['qa_pairs']:\n",
    "    print(qa_pair['question'], qa_pair['answer'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "count = 0\n",
    "places = []\n",
    "for item in eval_data:\n",
    "    if item[\"images\"][\"split\"] != 'val': continue\n",
    "    for qa_pair in item[\"images\"][\"qa_pairs\"]:\n",
    "        if \"Where\" in qa_pair[\"question\"] and \"scene\" in qa_pair[\"question\"]:\n",
    "            count += 1\n",
    "            places.append(qa_pair[\"answer\"])\n",
    "print(places)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "location_clusters = {\n",
    "    \"Beach/Ocean Environments\": [\n",
    "        \"The beach\", \"At the beach\", \"Beach\", \"Beach shore\", \"On the beach\", \"On a beach\",\n",
    "        \"By the ocean\", \"Near the ocean\", \"Ocean shore\", \"Beach scene\", \"On sand dune\",\n",
    "        \"Oahu\", \"Inside a home very close to a marina and the sea\", \"Nature\", \"Outside a building\",\n",
    "        \"Outside building\", \"Outdoor eating area\", \"Outside a city\", \"On the boardwalk\"\n",
    "    ],\n",
    "    \n",
    "    \"Sports Facilities\": [\n",
    "        \"Baseball game\", \"At a baseball field\", \"Tennis court\", \"At a baseball park\",\n",
    "        \"Tennis Court\", \"Tennis court\", \"A tennis court\", \"At a baseball game\",\n",
    "        \"On a baseball field\", \"A baseball field\", \"Soccer field\", \"Soccer field\",\n",
    "        \"Sports arena\", \"Outside a building\",\n",
    "        \"Outside building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Kitchen/Dining Spaces\": [\n",
    "        \"In a kitchen\", \"A restaurant\", \"At dinner\", \"A dining room\", \"In a kitchen\",\n",
    "        \"The kitchen\", \"In a kitchen\", \"In a kitchen\", \"In a kitchen\", \"In a kitchen\",\n",
    "        \"In a kitchen being remodeled in a home\", \"Kitchen\", \"A kitchen\", \"Kitchen\",\n",
    "        \"In kitchen\", \"On a counter\", \"In a kitchen\", \"In a kitchen\", \"Bakery\",\n",
    "        \"Near the food\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Bathroom Spaces\": [\n",
    "        \"In a bathroom\", \"A bathroom\", \"Bathroom\", \"The bathroom\", \"In a bathroom\",\n",
    "        \"Inside of a bathroom\", \"In a bathroom\", \"In a bathroom\", \"In a bathroom\", \n",
    "        \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Urban Street Settings\": [\n",
    "        \"On the street\", \"On the street\", \"City street\", \"At a street\", \"The street\",\n",
    "        \"City street\", \"City street\", \"A city street\", \"A street\", \"Street\", \"Street\",\n",
    "        \"Third street\", \"On street\", \"On the street\", \"Road\", \"Side of the road\",\n",
    "        \"Outside a city\", \"On the sidewalk\", \"Broadway\", \"Downtown Toronto\", \"At an intersection\", \"Outside a building\",\n",
    "        \"Outside building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Natural Outdoor Settings\": [\n",
    "        \"Mountain\", \"On a grassland\", \"In an open field\", \"Nature\", \"A farm\",\n",
    "        \"Mountian\", \"Field\", \"Woods\", \"Yard\", \"Grassland\", \"Nature\", \"Outside a building\",\n",
    "        \"Outside building\", \"Outdoor eating area\", \"Outside a city\", \"On the boardwalk\"\n",
    "    ],\n",
    "    \n",
    "    \"Airport/Air Travel\": [\n",
    "        \"At an airport\", \"Airport\", \"On an air strip\", \"Airport\", \"Airport\",\n",
    "        \"Airport\", \"Airport runway\", \"In the sky\", \"Outside a building\",\n",
    "        \"Outside building\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Zoo/Safari Settings\": [\n",
    "        \"At the zoo\", \"In the zoo\", \"A zoo\", \"At a zoo\", \"In a zoo\", \"Zoo\",\n",
    "        \"On a safari\", \"At a safari\", \"At a waterhole\", \"Nature\", \"Outside a building\",\n",
    "        \"Outside building\", \"Outdoor eating area\", \"Outside a city\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Winter Sports Areas\": [\n",
    "        \"Ski slope\", \"Ski slopes\", \"A ski lodge\", \"Ski slope\", \"Lodge\", \"Outside a building\",\n",
    "        \"Outside building\"\n",
    "    ],\n",
    "    \n",
    "    \"Train/Transportation Infrastructure\": [\n",
    "        \"Train station\", \"Railroad tracks\", \"Station\", \"Train station\", \"Train station\",\n",
    "        \"On a dock by the water\", \"Harbor\", \"On an air strip\", \"Airport runway\", \"Outside a building\",\n",
    "        \"Outside building\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Indoor Home Spaces\": [\n",
    "        \"Inside a home\", \"In a room\", \"Living Room\", \"IN a bedroom\", \n",
    "        \"A man on a phone in a room\", \"On a desk\", \"Table\", \"In a living room\", \n",
    "        \"Bedroom\", \"In a house\", \"Inside of a home\", \"In a building\", \"In a parlor\",\n",
    "        \"Inside a refrigerator\", \"In a kitchen\", \"In a bathroom\", \"Indoores\", \n",
    "        \"Apple computers\", \"On a desk\", \"In front of a television\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Parking/Vehicle Areas\": [\n",
    "        \"A parking lot\", \"In a parking lot\", \"Parking area\", \"In a car\",\n",
    "        \"At the motorcycle Race\", \"Outside a building\",\n",
    "        \"Outside building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Parks/Recreational Outdoor Spaces\": [\n",
    "        \"At a skate park\", \"Park\", \"At a park\", \"A Park\", \"Backyard patio\", \"A backyard\",\n",
    "        \"Outdoor eating area\", \"Nature\", \"Field\", \"Yard\", \"Outside a building\",\n",
    "        \"Outside building\", \"In the city\", \"In a city\",\n",
    "        \"A town\", \"On the boardwalk\"\n",
    "    ],\n",
    "    \n",
    "    \"Water Features\": [\n",
    "        \"Near a river\", \"A river\", \"At the river\", \"On a dock by the water\",\n",
    "        \"At a fountain\", \"Harbor\", \"By the ocean\", \"Near the ocean\", \"Ocean shore\", \"Nature\", \"Outside a building\",\n",
    "        \"Outside building\", \"Outdoor eating area\", \"Outside a city\", \"In the city\", \"In a city\",\n",
    "        \"A town\", \"On the boardwalk\"\n",
    "    ],\n",
    "    \n",
    "    \"Urban Centers/Buildings\": [\n",
    "        \"Outside a building\", \"Outside building\", \"Downtown Toronto\", \n",
    "        \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\", \"London\", \"Tokyo\", \"In front of clock tower\",\n",
    "        \"This scene takes place on a road in front of a large building\",\n",
    "        \"On the street\", \"City street\", \"Broadway\"\n",
    "    ],\n",
    "    \n",
    "    \"Specialized Indoor Commercial Spaces\": [\n",
    "        \"In a coffee shop\", \"Museum\", \"A tourist trap\", \"Farmers market\", \"At a market\",\n",
    "        \"Bakery\", \"A restaurant\", \"In a coffee shop\", \"Outside a building\",\n",
    "        \"Outside building\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Special Events/Gatherings\": [\n",
    "        \"At a baby shower\", \"A press conference\", \"A birthday party\", \"An outdoor show\",\n",
    "        \"At the concert photography session\", \"Performance\", \"Baseball game\",\n",
    "        \"At a baseball game\", \"Outside a building\",\n",
    "        \"Outside building\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Educational/Institutional Settings\": [\n",
    "        \"In a classroom\", \"Museum\", \"A press conference\", \"Apple computers\", \"On a desk\"\n",
    "        , \"In front of a television\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Tourist/Travel Destinations\": [\n",
    "        \"Oahu\", \"A tourist trap\", \"London\", \"Tokyo\", \"Downtown Toronto\",\n",
    "        \"Outside a city\", \"Beach\", \"Mountain\", \"On a safari\", \"Outside a building\",\n",
    "        \"Outside building\"\n",
    "    ],\n",
    "    \n",
    "    \"Technology Environments\": [\n",
    "        \"Apple computers\", \"On a desk\", \"In front of a television\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ],\n",
    "    \n",
    "    \"Construction/Maintenance Areas\": [\n",
    "        \"At a construction site\", \"In a kitchen being remodeled in a home\", \"Outside a building\",\n",
    "        \"Outside building\", \"Inside a building\", \"In side building\", \"In the city\", \"In a city\",\n",
    "        \"A town\"\n",
    "    ]\n",
    "}\n",
    "\n",
    "# Function to find all clusters a location belongs to\n",
    "def find_clusters_for_location(location, clusters_dict):\n",
    "    found_in = []\n",
    "    for cluster_name, locations in clusters_dict.items():\n",
    "        if location in locations:\n",
    "            found_in.append(cluster_name)\n",
    "    return found_in\n",
    "\n",
    "# Example usage:\n",
    "test_location = \"On the beach\"\n",
    "clusters = find_clusters_for_location(test_location, location_clusters)\n",
    "print(f\"'{test_location}' belongs to these clusters: {clusters}\")\n",
    "\n",
    "# Get all unique locations\n",
    "all_locations = set()\n",
    "for locations in location_clusters.values():\n",
    "    all_locations.update(locations)\n",
    "\n",
    "print(f\"Total unique locations: {len(all_locations)}\")\n",
    "\n",
    "prepositions = [\"at\", \"in\", \"on\", \"by\", \"near\", \"outside\", \"inside\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "paths_to_cluster = {}\n",
    "for item in eval_data:\n",
    "    if item[\"images\"][\"split\"] != 'val': continue\n",
    "    for qa_pair in item[\"images\"][\"qa_pairs\"]:\n",
    "        if \"Where\" in qa_pair[\"question\"] and \"scene\" in qa_pair[\"question\"]:\n",
    "            ans = qa_pair[\"answer\"].split(\".\")[0]\n",
    "            \n",
    "            paths_to_cluster[item[\"images\"][\"filename\"]] = []\n",
    "            for cluster in location_clusters:\n",
    "                if ans in location_clusters[cluster]:\n",
    "                    paths_to_cluster[item[\"images\"][\"filename\"]].append(cluster)\n",
    "            if len(paths_to_cluster[item[\"images\"][\"filename\"]]) == 0:\n",
    "                print(ans)\n",
    "                input()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data = []\n",
    "for item in eval_data:\n",
    "    if item[\"images\"][\"split\"] != 'val': continue\n",
    "    for qa_pair in item[\"images\"][\"qa_pairs\"]:\n",
    "        if \"Where\" in qa_pair[\"question\"] and \"scene\" in qa_pair[\"question\"]:\n",
    "            new_item = {}\n",
    "            ans = qa_pair[\"answer\"].split(\".\")[0].lower()\n",
    "            if ans.split()[0] in prepositions:\n",
    "                new_item['qry_text'] = f\"Find me an everyday image that shows the scene {ans}.\\n\" # the scene of ...\n",
    "            else:\n",
    "                new_item['qry_text'] = f\"Find me an everyday image that shows the scene of {ans}.\\n\" # the scene of ...\n",
    "            new_item['qry_img_path'] = ''\n",
    "            new_item['tgt_text'] = \"<|image_1|> Represent the given image.\"\n",
    "            new_item['tgt_img_path'] = [item[\"images\"][\"filename\"]]\n",
    "            current_clusters = paths_to_cluster[item[\"images\"][\"filename\"]]\n",
    "            all_paths = []\n",
    "            for p in paths_to_cluster:\n",
    "                if not set(paths_to_cluster[p]).intersection(current_clusters):\n",
    "                    all_paths.append(p)\n",
    "            if len(all_paths) < 99: print(ans)\n",
    "            else: \n",
    "                new_item['tgt_img_path'].extend(random.sample(all_paths, 99))\n",
    "                new_data.append(new_item)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "print(len(new_data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open('Visual7W_scene_retrieval.json', 'w') as f:\n",
    "    json.dump(new_data, f, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test whether we can load it using load_dataset\n",
    "new_eval_data = load_dataset('json', \n",
    "                      data_files='Visual7W_scene_retrieval.json',\n",
    "                      split=\"train\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "idx = 0\n",
    "print(new_eval_data[idx].keys())\n",
    "print(new_eval_data[idx]['qry_text'])\n",
    "img = Image.open(\"/your/path/visual7w/images/\"+new_eval_data[idx]['tgt_img_path'][0]).convert(\"RGB\")\n",
    "img"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 2
}
