{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from evaluator import EmbeddingEvaluator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate(base_name, base_dir, ratio):\n",
    "    result_path = \"results_rGodel/\" + base_name + \"_v1.txt\"\n",
    "    if os.path.exists(result_path):\n",
    "        print(\"Skip\", base_name)\n",
    "        # return\n",
    "    \n",
    "    axiom_path = '../input/' + base_name + '_subclassaxioms.txt'\n",
    "    cname_pickle = base_dir + base_name + '.c2id.pkl'\n",
    "    rname_pickle = base_dir + base_name + '.r2id.pkl'\n",
    "    evaluator = EmbeddingEvaluator(axiom_path, cname_pickle, rname_pickle)\n",
    "    outputs = ['\\nMASK RATIO = {}\\n'.format(ratio)]\n",
    "    \n",
    "    cEmb_path = base_dir + base_name + '.cEmb.pkl'\n",
    "    rEmb_path = base_dir + base_name + '.rEmb.pkl'\n",
    "    evaluator.load_embedding(cEmb_path, rEmb_path)\n",
    "    scores = []\n",
    "    for mode in ['crisp']:\n",
    "        result = evaluator.evaluate(mode=mode)\n",
    "        scores.append(result['score'])\n",
    "\n",
    "    outputs.append(f\"Find {result['all_axiomss']} subclass axioms, {result['parsed_axioms']} parsed\")\n",
    "    outputs.append(f\"Learned embedding: {np.average(scores):.3f}  <= \" + ', '.join([f'{s:.3f}' for s in scores]))\n",
    "          \n",
    "    cEmb_path = base_dir + base_name + f'.masked_cEmb.npy'\n",
    "    rEmb_path = base_dir + base_name + f'.masked_rEmb.npy'\n",
    "    evaluator.load_embedding(cEmb_path, rEmb_path)\n",
    "    scores = []\n",
    "    for mode in ['crisp']:\n",
    "        result = evaluator.evaluate(mode=mode)\n",
    "        scores.append(result['score'])\n",
    "    outputs.append(f\"Masked  embedding: {np.average(scores):.3f}  <= \" + ', '.join([f'{s:.3f}' for s in scores]))\n",
    "\n",
    "    cEmb_path = base_dir + base_name + f'.true_cEmb.npy'\n",
    "    rEmb_path = base_dir + base_name + f'.true_rEmb.npy'\n",
    "    evaluator.load_embedding(cEmb_path, rEmb_path)\n",
    "    scores = []\n",
    "    for mode in ['crisp']:\n",
    "        result = evaluator.evaluate(mode=mode)\n",
    "        scores.append(result['score'])\n",
    "    outputs.append(f\"Oracle  embedding: {np.average(scores):.3f}  <= \" + ', '.join([f'{s:.3f}' for s in scores]))\n",
    "    \n",
    "    with open(result_path, 'ab') as f:\n",
    "        np.savetxt(f, outputs, fmt='%s')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating Family.owl in 0.2\n",
      "Skip Family.owl\n",
      "Evaluating Family2.owl in 0.2\n",
      "Skip Family2.owl\n",
      "Evaluating glycordf.glycordf.14.owl.xml in 0.2\n",
      "Skip glycordf.glycordf.14.owl.xml\n",
      "Evaluating nifdys.neuroscience-information-framework-nif-dysfunction-ontlogy.14.owl.xml in 0.2\n",
      "Skip nifdys.neuroscience-information-framework-nif-dysfunction-ontlogy.14.owl.xml\n",
      "Evaluating nihss.national-institutes-of-health-stroke-scale-ontology.11.owl.xml in 0.2\n",
      "Evaluating ontodm-core.ontology-of-core-data-mining-entities.6.owl.xml in 0.2\n",
      "Evaluating sso.syndromic-surveillance-ontology.1.owl.xml in 0.2\n",
      "Evaluating glycan.owl in 0.2\n"
     ]
    }
   ],
   "source": [
    "base_names = [\n",
    "    'Family.owl',\n",
    "    'Family2.owl',\n",
    "    'glycordf.glycordf.14.owl.xml',\n",
    "    'nifdys.neuroscience-information-framework-nif-dysfunction-ontlogy.14.owl.xml',\n",
    "    'nihss.national-institutes-of-health-stroke-scale-ontology.11.owl.xml',\n",
    "    'ontodm-core.ontology-of-core-data-mining-entities.6.owl.xml',\n",
    "    'sso.syndromic-surveillance-ontology.1.owl.xml',\n",
    "    \"glycan.owl\"\n",
    "]\n",
    "\n",
    "\n",
    "base_dir = '../rGodel_output_alpha0.8/'\n",
    "import os\n",
    "\n",
    "for base_name in base_names:\n",
    "    for ratio in [0.2]:\n",
    "        print(\"Evaluating \" + base_name + \" in \" + str(ratio))\n",
    "        evaluate(base_name, f'{base_dir}mask_{ratio}/', str(ratio))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate(base_name, base_dir, base2_dir, ratio):\n",
    "    result_path = \"results_product/\" + base_name + \".txt\"\n",
    "    if os.path.exists(result_path):\n",
    "        print(\"Skip\", base_name)\n",
    "        # return\n",
    "    \n",
    "    axiom_path = '../input/' + base_name + '_subclassaxioms.txt'\n",
    "    cname_pickle = base_dir + base_name + '.c2id.pkl'\n",
    "    rname_pickle = base_dir + base_name + '.r2id.pkl'\n",
    "    evaluator = EmbeddingEvaluator(axiom_path, cname_pickle, rname_pickle)\n",
    "    outputs = ['\\nMASK RATIO = {}\\n'.format(ratio)]\n",
    "    \n",
    "    cEmb_path = base2_dir + base_name + '.cEmb.pkl'\n",
    "    rEmb_path = base2_dir + base_name + '.rEmb.pkl'\n",
    "    evaluator.load_embedding(cEmb_path, rEmb_path)\n",
    "    scores = []\n",
    "    for mode in ['godel', 'product', 'crisp']:\n",
    "        result = evaluator.evaluate(mode=mode)\n",
    "        scores.append(result['score'])\n",
    "\n",
    "    outputs.append(f\"Find {result['all_axiomss']} subclass axioms, {result['parsed_axioms']} parsed\")\n",
    "    outputs.append(f\"Learned embedding: {np.average(scores):.3f}  <= \" + ', '.join([f'{s:.3f}' for s in scores]))\n",
    "          \n",
    "    cEmb_path = base_dir + base_name + f'.masked_cEmb.npy'\n",
    "    rEmb_path = base_dir + base_name + f'.masked_rEmb.npy'\n",
    "    evaluator.load_embedding(cEmb_path, rEmb_path)\n",
    "    scores = []\n",
    "    for mode in ['godel', 'product', 'crisp']:\n",
    "        result = evaluator.evaluate(mode=mode)\n",
    "        scores.append(result['score'])\n",
    "    outputs.append(f\"Masked  embedding: {np.average(scores):.3f}  <= \" + ', '.join([f'{s:.3f}' for s in scores]))\n",
    "\n",
    "    cEmb_path = base_dir + base_name + f'.true_cEmb.npy'\n",
    "    rEmb_path = base_dir + base_name + f'.true_rEmb.npy'\n",
    "    evaluator.load_embedding(cEmb_path, rEmb_path)\n",
    "    scores = []\n",
    "    for mode in ['godel', 'product', 'crisp']:\n",
    "        result = evaluator.evaluate(mode=mode)\n",
    "        scores.append(result['score'])\n",
    "    outputs.append(f\"Oracle  embedding: {np.average(scores):.3f}  <= \" + ', '.join([f'{s:.3f}' for s in scores]))\n",
    "    \n",
    "    with open(result_path, 'ab') as f:\n",
    "        np.savetxt(f, outputs, fmt='%s')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating Family.owl in 0.2\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "evaluate() missing 1 required positional argument: 'ratio'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 19\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ratio \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m0.2\u001b[39m, \u001b[38;5;241m0.4\u001b[39m, \u001b[38;5;241m0.6\u001b[39m, \u001b[38;5;241m0.8\u001b[39m]:\n\u001b[1;32m     18\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEvaluating \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m base_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m in \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(ratio))\n\u001b[0;32m---> 19\u001b[0m     evaluate(base_name,  \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbase2_dir\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mmask_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mratio\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28mstr\u001b[39m(ratio))\n",
      "\u001b[0;31mTypeError\u001b[0m: evaluate() missing 1 required positional argument: 'ratio'"
     ]
    }
   ],
   "source": [
    "base_names = [\n",
    "    'Family.owl',\n",
    "    'Family2.owl',\n",
    "    'glycordf.glycordf.14.owl.xml',\n",
    "    'nifdys.neuroscience-information-framework-nif-dysfunction-ontlogy.14.owl.xml',\n",
    "    'nihss.national-institutes-of-health-stroke-scale-ontology.11.owl.xml',\n",
    "    'ontodm-core.ontology-of-core-data-mining-entities.6.owl.xml',\n",
    "    'sso.syndromic-surveillance-ontology.1.owl.xml',\n",
    "]\n",
    "\n",
    "\n",
    "base_dir = '../output_alpha0.8/'\n",
    "base2_dir = '../product_output_alpha0.8/'\n",
    "import os\n",
    "\n",
    "for base_name in base_names:\n",
    "    for ratio in [0.2, 0.4, 0.6, 0.8]:\n",
    "        print(\"Evaluating \" + base_name + \" in \" + str(ratio))\n",
    "        evaluate(base_name,  f'{base2_dir}mask_{ratio}/', str(ratio)) #f'{base2_dir}mask_{ratio}/',"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "b5d4ea6110d76bf407abdf3fc85b4f9a1bbb4f7f6454d667a509d28831b3322d"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
