{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from semantic_memory import taxonomy\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_json(filename):\n",
    "    with open(filename, 'r') as f:\n",
    "        return json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "noun_hypernyms = read_json(\"../data/gqa_entities/noun-hypernyms.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get all hypernym paths of a noun.\n",
    "# the hypernyms list is structured so that succeeding elements are more general than the previous ones\n",
    "# example if the entry is \"jacket\": [\"coat\", \"garment\", \"clothing\"], then \"jacket\" is a type of \"coat\", \"coat\" is a type of \"garment\", and \"garment\" is a type of \"clothing\"\n",
    "\n",
    "hypernym_paths = defaultdict(set)\n",
    "\n",
    "for noun, hypernyms in noun_hypernyms.items():\n",
    "    hypernym_paths[noun].add(tuple(hypernyms))\n",
    "    # each hypernym is the child of the next one\n",
    "    for i in range(len(hypernyms) - 1):\n",
    "        hypernym_paths[hypernyms[i]].add(tuple(hypernyms[i + 1:]))\n",
    "\n",
    "# store only the longest paths\n",
    "longest_paths = {}\n",
    "for noun, paths in hypernym_paths.items():\n",
    "    longest_paths[noun] = max(paths, key=len)\n",
    "\n",
    "# now store the unique hypernym pairs\n",
    "hypernym_pairs = {}\n",
    "for noun, path in longest_paths.items():\n",
    "    hypernym_pairs[noun] = path[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "Tree = taxonomy.Nodeset(taxonomy.Node)\n",
    "root = Tree['ROOT']\n",
    "\n",
    "# # populate the tree\n",
    "\n",
    "for concept, path in hypernym_pairs.items():\n",
    "    node = Tree[concept]\n",
    "    parent = Tree[path]\n",
    "    node.add_parent(parent)\n",
    "    parent.add_child(node)\n",
    "\n",
    "# make sure root is added as a parent to all top level nodes\n",
    "for value, node in Tree.items():\n",
    "    if value == \"ROOT\":\n",
    "        continue\n",
    "    elif node.parent is None:\n",
    "        node.add_parent(root)\n",
    "        root.add_child(node)\n",
    "\n",
    "\n",
    "# Tree.default_factory = None\n",
    "Tree.default_factory = None\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Getting the hypernym path all the way upto ROOT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['cat', 'feline', 'mammal', 'vertebrate', 'animal', 'ROOT']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = Tree['cat']\n",
    "x.path()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Printing a concept's descendants (all levels, flattened)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Node cat\n",
       " Parent:feline\n",
       " Children: ['kitten'],\n",
       " Node kitten\n",
       " Parent:cat\n",
       " Children: [],\n",
       " Node cheetah\n",
       " Parent:feline\n",
       " Children: [],\n",
       " Node leopard\n",
       " Parent:feline\n",
       " Children: [],\n",
       " Node lion\n",
       " Parent:feline\n",
       " Children: [],\n",
       " Node tiger\n",
       " Parent:feline\n",
       " Children: []]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = Tree['feline']\n",
    "list(x.descendants())\n",
    "\n",
    "# if you only want the strings, just do X.value() for X in x.descendants()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get only the leaf-level descendants of a concept"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['chicken',\n",
       " 'turkey',\n",
       " 'duck',\n",
       " 'flamingo',\n",
       " 'goose',\n",
       " 'penguin',\n",
       " 'seagull',\n",
       " 'swan',\n",
       " 'eagle',\n",
       " 'owl',\n",
       " 'ostrich',\n",
       " 'parrot',\n",
       " 'peacock',\n",
       " 'pigeon']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = Tree['bird']\n",
    "x.leaf_values()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get siblings of a concept"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['cheetah', 'leopard', 'lion', 'tiger']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = Tree['cat']\n",
    "x.siblings()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Node food\n",
       "Parent:ROOT\n",
       "Children: ['nut', 'fruit', 'produce', 'meat', 'baked good', 'herb', 'meal', 'soup', 'sandwich', 'burrito', 'burritoes', 'dairy product', 'sweet', 'casserole', 'breakfast food', 'chili', 'cinnamon', 'coconut', 'salad', 'condiment', 'grain', 'dessert', 'dumpling', 'egg', 'egg roll', 'egg white', 'egg yolk', 'french toast', 'garlic', 'spice', 'sweetening', 'seafood', 'pasta', 'noodles', 'porridge', 'omelette', 'relish', 'pizza', 'pizza pie', 'pizza slice', 'snack', 'pudding', 'stew', 'stir fry', 'sushi', 'taco', 'tacoes', 'tofu', 'yolk']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Tree['food']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
