{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import itertools\n",
    "import pathlib\n",
    "\n",
    "from semantic_memory import memory\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "521it [00:00, 4535.13it/s]\n"
     ]
    }
   ],
   "source": [
    "MEM_PATH = \"../data/comps-world\"\n",
    "\n",
    "# load semantic memory\n",
    "mem = memory.Memory(\n",
    "    concept_path=f\"{MEM_PATH}/concept_senses.csv\",\n",
    "    feature_path=f\"{MEM_PATH}/xcslb_compressed.csv\",\n",
    "    matrix_path=f\"{MEM_PATH}/concept_matrix.txt\",\n",
    "    feature_metadata=f\"{MEM_PATH}/feature_lexicon.csv\",\n",
    ")\n",
    "mem.create()\n",
    "\n",
    "lexicon = defaultdict(memory.Concept)\n",
    "senses = defaultdict(str)\n",
    "with open(\"../data/comps-world/categories.csv\", \"r\") as f:\n",
    "    entries = csv.DictReader(f)\n",
    "    for line in entries:\n",
    "        lexicon[line[\"concept\"]] = memory.Concept(\n",
    "            concept=line[\"concept\"],\n",
    "            category=line[\"category\"],\n",
    "            sense=line[\"node-value\"],\n",
    "            article=line[\"article\"],\n",
    "            singular=line[\"singular\"],\n",
    "            plural=line[\"plural\"],\n",
    "            generic=line[\"generic\"],\n",
    "        )\n",
    "        senses[line[\"concept\"]] = line[\"node-value\"]\n",
    "    for c, entry in mem.lexicon.items():\n",
    "        lexicon[c] = entry\n",
    "        # senses[entry.sense] = c\n",
    "        senses[c] = entry.sense\n",
    "\n",
    "lexicon = dict(lexicon)\n",
    "senses = dict(senses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "CATEGORIES = ['animal', 'vertebrate', 'invertebrate', 'mammal', 'bird', 'reptile', 'amphibian', 'fish', 'arthropod', 'insect']\n",
    "CATEGORIES += ['bird of prey', 'aquatic bird', 'canine', 'feline', 'primate', 'aquatic mammal', 'rodent']\n",
    "CATEGORIES += mem.taxonomy['vertebrate.n.01'].leaf_values()\n",
    "CATEGORIES += mem.taxonomy['invertebrate.n.01'].leaf_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "for c in CATEGORIES:\n",
    "    if c not in lexicon:\n",
    "        print(c)\n",
    "\n",
    "big_concept_list = [lexicon[c] for c in CATEGORIES]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "def is_a(c1, c2):\n",
    "    \"\"\"c1 is a c2\"\"\"\n",
    "    if c1.generic == \"s\":\n",
    "        concept1 = c1.article\n",
    "        if c2.concept in (\"bird of prey\", \"aquatic bird\", \"aquatic mammal\"):\n",
    "            taxonomic_phrase = \"is\"\n",
    "            concept2 = c2.article\n",
    "        else:\n",
    "            taxonomic_phrase = \"is a type of\"\n",
    "            concept2 = c2.singular\n",
    "    else:\n",
    "        concept1 = c1.plural\n",
    "        if c2.concept in (\"bird of prey\", \"aquatic bird\", \"aquatic mammal\"):\n",
    "            taxonomic_phrase = \"are\"\n",
    "            concept2 = c2.plural\n",
    "        else:\n",
    "            taxonomic_phrase = \"are a type of\"\n",
    "            concept2 = c2.singular\n",
    "\n",
    "    return f\"{concept1} {taxonomic_phrase} {concept2}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'an eagle is a type of bird'"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# is_a(lexicon['broccoli'], lexicon['animal'])\n",
    "is_a(lexicon['eagle'], lexicon['bird']) # not good"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "pairwise = [] # child, parent, statement\n",
    "for c1, c2 in itertools.product(big_concept_list, big_concept_list):\n",
    "    if c1.concept != \"animal\":\n",
    "        pairwise.append((c1.concept, c2.concept, is_a(c1, c2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "pathlib.Path(\"../data/tsv/\").mkdir(parents=True, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Answer the question. Is it true that {}? Answer with Yes or No:\"\"\"\n",
    "\n",
    "with open(\"../data/tsv/stimuli.csv\", \"w\") as f:\n",
    "    writer = csv.writer(f)\n",
    "    writer.writerow([\"concept1\", \"concept2\", \"question\"])\n",
    "    for c1, c2, isa_statement in pairwise:\n",
    "        writer.writerow((c1, c2, f\"Answer the question. Is it true that {isa_statement}? Answer with Yes or No:\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pathlib.Path(\"../data/pairwise-categorization/\").mkdir(parents=True, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "categories = mem.taxonomy['vertebrate.n.01'].leaf_values() + mem.taxonomy['invertebrate.n.01'].leaf_values()\n",
    "\n",
    "with open(\"../data/pairwise-categorization/stimuli.csv\", \"w\") as f:\n",
    "    writer = csv.writer(f)\n",
    "    writer.writerow([\"concept1\", \"concept2\", \"question\"])\n",
    "    for c1, c2 in itertools.product(categories, categories):\n",
    "        if c1 != c2:\n",
    "            pl_concept1 = lexicon[c1].plural\n",
    "            pl_concept2 = lexicon[c2].plural\n",
    "            writer.writerow((c1, c2, f\"Answer the question. Do {pl_concept1} and {pl_concept2} belong to the same category? Answer with Yes or No:\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
