{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebc32717",
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.corpus import wordnet as wn\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75408177",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"/datasets/imagenet/splits.pkl\", \"rb\") as f:\n",
    "    imagenet_data = pickle.load(f)\n",
    "synsets = list(set(map(lambda x: x[\"synset_id\"], imagenet_data[\"train\"])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d09ac14",
   "metadata": {},
   "outputs": [],
   "source": [
    "def synset_to_imagenet_id(synset):\n",
    "    \"\"\"\n",
    "    Convert an NLTK Synset into an ImageNet-style ID.\n",
    "    \"\"\"\n",
    "    pos = synset.pos()\n",
    "    offset = synset.offset()\n",
    "    return f\"{pos}{offset:08d}\"\n",
    "\n",
    "def imagenet_id_to_synset(imagenet_id):\n",
    "    \"\"\"\n",
    "    Convert an ImageNet-style ID to an NLTK Synset.\n",
    "    \"\"\"\n",
    "    pos = imagenet_id[0]\n",
    "    offset = int(imagenet_id[1:])\n",
    "    return wn.synset_from_pos_and_offset(pos, offset)\n",
    "\n",
    "def all_hypernyms(synset):\n",
    "    \"\"\"\n",
    "    Return the full set of hypernyms (direct and indirect) of a WordNet synset.\n",
    "    \"\"\"\n",
    "    visited = set()\n",
    "\n",
    "    def recurse(s):\n",
    "        for h in s.hypernyms():\n",
    "            if h not in visited:\n",
    "                visited.add(h)\n",
    "                recurse(h)\n",
    "\n",
    "    recurse(synset)\n",
    "    visited.add(synset)\n",
    "    return visited\n",
    "\n",
    "def get_remaning_synsets(synsets, chosen_concepts):\n",
    "    remaining_synsets = []\n",
    "    chosen_concept_synsets = set(map(imagenet_id_to_synset, chosen_concepts))\n",
    "    for synset_id in synsets:\n",
    "        synset = imagenet_id_to_synset(synset_id)\n",
    "        hypernyms = all_hypernyms(synset)\n",
    "        if not hypernyms.intersection(chosen_concept_synsets):\n",
    "            remaining_synsets.append(synset_id)\n",
    "    return remaining_synsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99bece3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "chosen_concepts = [\n",
    "    \"n13086908\", # Plant part\n",
    "    \"n04194289\", # Ship\n",
    "    \"n01861778\", # Mammal\n",
    "    \"n09287968\", # Geological formation\n",
    "    \"n02778669\", # Ball\n",
    "    \"n03051540\", # Clothing\n",
    "    \"n02316707\", # Echinoderm\n",
    "    \"n04285146\", # Sports equipment\n",
    "    \"n03614007\", # Keyboard\n",
    "    \"n03743902\", # Memorial\n",
    "    \"n03736970\", # Mechanical device\n",
    "    \"n04576211\", # Wheeled vehicle\n",
    "    \"n02159955\", # Insect\n",
    "    \"n12992868\", # Fungus\n",
    "    \"n02913152\", # Building\n",
    "    \"n04451818\", # Tool\n",
    "    \"n02898711\", # Bridge\n",
    "    \"n01976957\", # Crab\n",
    "    \"n01503061\", # Bird\n",
    "    \"n03800933\", # Musical instrument\n",
    "    \"n01661091\", # Reptile\n",
    "    \"n04524313\", # Vehicle\n",
    "    \"n04202417\", # Shop\n",
    "    \"n07555863\", # Food\n",
    "    \"n03094503\", # Container\n",
    "    \"n02512053\", # Fish\n",
    "    \"n02796623\", # Barrier\n",
    "    \"n03739693\", # Medical instrument\n",
    "    \"n04341686\", # Structure\n",
    "    \"n03682487\", # Lock\n",
    "    \"n04447443\", # Toiletry\n",
    "    \"n03278248\", # Electronic equipment\n",
    "    \"n03122748\", # Covering\n",
    "    \"n03926148\", # Photographic equipment\n",
    "    \"n03405265\", # Furnishing\n",
    "    \"n03269401\", # Electrical device\n",
    "    \"n03699975\", # Machine\n",
    "    \"n01905661\", # Invertebrate\n",
    "    \"n02924116\", # Bus\n",
    "    \"n01627424\", # Amphibian\n",
    "    \"n04263760\", # Source of illumination\n",
    "    \"n04147495\", # Scientific instrument\n",
    "    \"n00007846\", # Person\n",
    "    \"n03183080\", # Device\n",
    "    \"n00021265\", # Food\n",
    "    \"n04516672\", # Utensil\n",
    "    \"n03563967\", # Implement\n",
    "    \"n03528263\", # Home appliance\n",
    "    \"n03309808\", # Fabric\n",
    "    \"n03294048\", # Equipment\n",
    "    \"n03078287\", # Communication system\n",
    "    \"n00017222\", # Plant\n",
    "    \"n04019101\", # Public transport\n",
    "    \"n03964744\", # Plaything\n",
    "    \"n03100490\", # Conveyance\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93f2879b",
   "metadata": {},
   "outputs": [],
   "source": [
    "remaining = get_remaning_synsets(synsets, chosen_concepts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7106dc6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(remaining)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef4fbe6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "imagenet_id_to_synset(remaining[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60a1d0a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_hypernyms(imagenet_id_to_synset(remaining[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75e4560f",
   "metadata": {},
   "outputs": [],
   "source": [
    "for concept in chosen_concepts:\n",
    "    # Print the concept name and the number of synsets with the concept as a hypernym\n",
    "    number_of_classes = 0\n",
    "    for synset_id in synsets:\n",
    "        synset = imagenet_id_to_synset(synset_id)\n",
    "        hypernyms = all_hypernyms(synset)\n",
    "        if imagenet_id_to_synset(concept) in hypernyms:\n",
    "            number_of_classes += 1\n",
    "    print(f\"{imagenet_id_to_synset(concept)}: {number_of_classes}\")\n",
    "    assert number_of_classes > 1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
