{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%load_ext lab_black"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import math\n",
    "import random\n",
    "from itertools import product\n",
    "from tqdm import tqdm\n",
    "from typing import Dict, List, Set, Union, Tuple\n",
    "import yaml\n",
    "from ast import literal_eval\n",
    "\n",
    "import json\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "\n",
    "# from transformers import GPTNeoXForCausalLM, AutoTokenizer\n",
    "import torch\n",
    "from typing import List\n",
    "from matplotlib import font_manager as fm, pyplot as plt\n",
    "import numpy as np\n",
    "import wandb\n",
    "\n",
    "# import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/home/kevin/code/rycolab/context-vs-prior-finetuning'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# IMPORTANT: Run as if from project root so that imports work.\n",
    "pardir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))\n",
    "os.chdir(pardir)\n",
    "os.getcwd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "Could not find the bitsandbytes CUDA binary at PosixPath('/home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda120.so')\n",
      "The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n"
     ]
    }
   ],
   "source": [
    "from model_utils.utils import (\n",
    "    construct_paths_and_dataset_kwargs,\n",
    "    construct_test_results_dir,\n",
    "    EvalConfig,\n",
    ")\n",
    "from preprocessing.dataset import load_dataset_from_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "#b40426\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgAAAABACAYAAABsv8+/AAAAGXRFWHRUaXRsZQBjb29sd2FybV9yIGNvbG9ybWFwWhD8tgAAAB90RVh0RGVzY3JpcHRpb24AY29vbHdhcm1fciBjb2xvcm1hcCA4PfgAAAAwdEVYdEF1dGhvcgBNYXRwbG90bGliIHYzLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZ2GZxVMAAAAydEVYdFNvZnR3YXJlAE1hdHBsb3RsaWIgdjMuOS4yLCBodHRwczovL21hdHBsb3RsaWIub3JnTz9adAAAAiFJREFUeJzt1kGOnDAURVHblWwl+5eyP5xBAS0MLigps3fOhLaxPzVq3fr39aeXUkr7XUsppdT12X59uW7tvX6t6/VZW73ef7XD+zacq214P85rx3Pn9+3j+f+//16XbV0/3yvjnFrX/cncev3dMrs/mVfq9ffLzf3x++Oc6frpvXo9p9cv763PPjnXJ/fG/fm56/Xsfi+f5/6cG9bj+dn7cf/xers/7I/PPu4fzy/Tc8/2f+6Xz/f28+W798PcZbK/rU/vT3OH/f50fza/XJ6f37/ZX+7O9Ztz7z+W5dn+OG8/t/+efjy/r8d5/XB+6bP9yfeW47nT/mz++gPO87+bt825v79c7u/3h3n331ku3z+du/13AQCCCAAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACDQP+uDO9TQv7OkAAAAAElFTkSuQmCC",
      "text/html": [
       "<div style=\"vertical-align: middle;\"><strong>coolwarm_r</strong> </div><div class=\"cmap\"><img alt=\"coolwarm_r colormap\" title=\"coolwarm_r\" style=\"border: 1px solid #555;\" src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAgAAAABACAYAAABsv8+/AAAAGXRFWHRUaXRsZQBjb29sd2FybV9yIGNvbG9ybWFwWhD8tgAAAB90RVh0RGVzY3JpcHRpb24AY29vbHdhcm1fciBjb2xvcm1hcCA4PfgAAAAwdEVYdEF1dGhvcgBNYXRwbG90bGliIHYzLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZ2GZxVMAAAAydEVYdFNvZnR3YXJlAE1hdHBsb3RsaWIgdjMuOS4yLCBodHRwczovL21hdHBsb3RsaWIub3JnTz9adAAAAiFJREFUeJzt1kGOnDAURVHblWwl+5eyP5xBAS0MLigps3fOhLaxPzVq3fr39aeXUkr7XUsppdT12X59uW7tvX6t6/VZW73ef7XD+zacq214P85rx3Pn9+3j+f+//16XbV0/3yvjnFrX/cncev3dMrs/mVfq9ffLzf3x++Oc6frpvXo9p9cv763PPjnXJ/fG/fm56/Xsfi+f5/6cG9bj+dn7cf/xers/7I/PPu4fzy/Tc8/2f+6Xz/f28+W798PcZbK/rU/vT3OH/f50fza/XJ6f37/ZX+7O9Ztz7z+W5dn+OG8/t/+efjy/r8d5/XB+6bP9yfeW47nT/mz++gPO87+bt825v79c7u/3h3n331ku3z+du/13AQCCCAAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACCQAACAQAIAAAIJAAAIJAAAIJAAAIBAAgAAAgkAAAgkAAAgkAAAgEACAAACCQAACCQAACDQP+uDO9TQv7OkAAAAAElFTkSuQmCC\"></div><div style=\"vertical-align: middle; max-width: 514px; display: flex; justify-content: space-between;\"><div style=\"float: left;\"><div title=\"#b40426ff\" style=\"display: inline-block; width: 1em; height: 1em; margin: 0; vertical-align: middle; border: 1px solid #555; background-color: #b40426ff;\"></div> under</div><div style=\"margin: 0 auto; display: inline-block;\">bad <div title=\"#b40426ff\" style=\"display: inline-block; width: 1em; height: 1em; margin: 0; vertical-align: middle; border: 1px solid #555; background-color: #b40426ff;\"></div></div><div style=\"float: right;\">over <div title=\"#3b4cc0ff\" style=\"display: inline-block; width: 1em; height: 1em; margin: 0; vertical-align: middle; border: 1px solid #555; background-color: #3b4cc0ff;\"></div></div></div>"
      ],
      "text/plain": [
       "<matplotlib.colors.LinearSegmentedColormap at 0x7d998ff95240>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnMAAAHeCAYAAADw2o2jAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABnnklEQVR4nO3dd3gU5f4+/js9QBoYeu8EqYYqHEjoHaQdERABhUP9onBUQD0eLBRFjoqUIAh4CkgX6RBApEPABCHSISGEnt6z798f/DKf3WydzaZMcr+ua69rJvu03cmze+/M7oyTiAiIiIiISJOcC3sARERERGQ/hjkiIiIiDWOYIyIiItIwhjkiIiIy8ujRo8IeAtmIYY6IiIgUoaGh6NChA/7+97/bXOf48eMYMWIEatSoAQ8PD5QtWxYdO3bEsmXLkJGRYbX+7du3MW3aNDRo0AClSpWCl5cXWrduja+++sqm+iWdE3/NSkRERMeOHcNHH32EI0eOAADGjBmDtWvXWq03e/ZsLFiwwOz9rVu3xq5du1C+fHmT9+/YsQMjR45EcnIyXFxckJ2dbXB/u3btsH//fnh7e9v8WEoa7pkjIiIqwSIjIzF58mRs2rQJaWlpquouWbIECxcuxLhx4/Dzzz/j4sWLOHjwIN5++214enoCAM6ePYvhw4ebrH/mzBkMHz4cQ4YMwcWLF5GamorExETs3r0bjRo1AgCcOnUKH3/8cZ4eY3HHPXNEREQlWGJiorLXKywsDIGBgQCs75mLiYlBgwYNsG7dOgwZMsTo/rNnzyIoKAgpKSkAgF9++QV9+/ZV7tfpdHjppZcwdepUvPnmm0b1b9++jSZNmiA5ORlVq1ZFdHR0Xh5mscY9c0RERCWY/uHLcuXK2Vxv5cqVGDp0qMkgBzw/vDp37lxlfefOnQb37969Gz169DAZ5ACgVq1aaNOmDQDg6dOnNo+rJHIt7AFQ4UtMTESVKlWwcuVKvPbaa4U9nAKh0+nwxx9/IDIyEvfu3UNycjJcXV1RpkwZeHl5oWbNmqhXrx6qVasGJycng7pbt26Fs7MzBg0aZFffiYmJOHDgAAYPHuyARwJcv34dp0+fxoMHD5CZmQl/f380b94cL730Epyd8/Z57cGDBzh27Biio6ORmpqKF154AS1btsRLL70EFxcXh4yfTCuJ8xIoPnMzP+clUDTm5i+//IKtW7daLDN06FAl0N25c8fgvkaNGqF79+4W67/wwgsAgICAgDyMtAQQKvGWLVsmAKR9+/Z5aufw4cMCQPXN09NT/P39pU6dOtKjRw+ZMWOG/PTTT5KYmOigR/h/jhw5IqNHj5ayZcsajMHd3V1q1KghL7zwgsHfvby8pFOnTjJjxgxZunSpTJ8+XVxdXWXMmDGq+05NTZUvv/xS/P39pWbNmnl6HDqdTjZu3CiNGzc2+7xWqFBBFi5cKKmpqarbDwsLkz59+phtu2bNmrJ69eo8PQayzFHzUoRz0xpHzc38npci+T83b926pbRl7bn873//a7W9tLQ0pb3+/furHk+TJk0EgKxbt0513ZKEYY6kefPmymQLCwuzu50HDx7I2rVrZfHixTJq1Cjx8PCw6w0k5+bh4SFjxoyR27dv5/kx/v7779KpUyeD9oODgyUkJERiYmJEp9MpZZOSkmTz5s0WXzDVvGFkZGTI8uXLpWrVqgYvuPZKS0uTESNG2Pw8BgYGSlRUlM3tr1y5Utzd3W1q+4033jB47shxHDUvRTg3zXHk3MzveSlSMHNTTZizxcOHD5X23nvvPVV1f/31VwEgU6dOzfM4ijuGuRLuxIkTBi8AY8eOdVjbu3fvNvkis2zZMomKipKoqCi5fv26nD9/XjZu3CgzZ840eFHVf+NYu3at3eNYsmSJuLm5Ke1Vq1ZNdu3aZVPdPXv2mByTLS9y2dnZsn79eqlTp47JT8/2GjJkiOo338aNG0tCQoLVtteuXStOTk4CQJo0aSLvvfeerFixQr799luZMmWKVKxY0ajtBQsW2P1YyLT8nJcinJv5MTfzc16KFNzcdHSYO3/+vNLesWPHbK53/fp1qV69unz33Xd5HkNJwDBXwr3++usGk79UqVLy5MkTh7St0+mkXLlyRi8wGRkZZutkZWXJ119/LaVKlTKqFxISonoMU6dONWijadOmqh/fw4cPpWnTpqreMHbs2CGBgYHSv39/eeedd6RSpUoOecNYunSp0oanp6f06tVL3nnnHZk9e7a89dZb0qxZM7NvHBMmTLDY9pUrV5TDaps3bzZZJjU1VcaMGWPQrpeXV74cdivJ8nNeinBuOnpu5ue8FCnYuenoMPfFF18IAGnevLlN5VNTU+Xrr78Wb29vASDVq1eXNWvW5HkcxR3DXAn29OlT8fT0NHpxWbhwocP6eOmll4zat8W+ffsMPrEDkNKlS8uNGzds7vvvf/+7Qf26detKbGysXY/j4cOHBt+DsfYit2XLFomPj1fWN23alOc3jLi4OOUNePjw4fLw4UOT5c6ePSuBgYFGz7urq6vcuXPHbPvdu3eX6tWrW32Os7KypHXr1gZtb9++XfXjIdMKYl6KcG7myOvczO95KVKwc9PRYa5FixYCQPbu3WuxnE6nk6lTp4q/v7/J0OuIsRRnDHMl2FdffWVy0tSqVUuys7Md0sdf/vIXu94wRERmzpxpVPf999+3qe5//vMfg3pOTk5y/Phxex+GiIj88ccfypus2heWyMjIPIe5zz//XADIq6++avW7MKmpqdKxY0ej52/p0qUmy588eVJKlSol4eHhNo1l7dq1Bu0uXrxY9eMh0wpiXopwbubI69zMz3kpUvBz05Fhbv/+/QJARo8ebVP5uLg4uX//vpw/f14WL14sdevWNXgs3ENnHsNcCdawYUMBIP369TN6cdmxY4dD+ujcubPdbxi3b982qtu0aVOr9e7duye+vr4G9d588828PAzF22+/bdeLnP4LpL1hrlGjRlK9enWbv2Nz584doz0or7/+usmyb775pixZssTmsVy8eNGgXX5vznEKYl6KcG7myOvczM95KVLwc9NRYS4lJUUaNGggjRo1svm5yS0zM1PeeOMNZTwtW7a0ezzFHU8aXEIdPnwYf/75JypUqID169fDy8vL4P6lS5cW0sj+T82aNVGzZk2Dv0VFRVmtN3v2bMTHxyvrzs7O+OCDDxwyppkzZzrkHFFqRUZGIjIyEp9++qnN1yesUaOG0fmyHj58aLLs4MGDMW3aNJvH4+7ubrBer149m+uSeVqYlwDnZo78npeAdufm9OnTERcXh19++cXua6q6urpi9erVaNCgAQAol/siYwxzJdSKFSsAAK+//jrKli2LV1991eD+gwcP4s8//yyMoRmoVKmSwbq16wZeu3YN//73vw3+1rVrV6M3HntVrVoVnTp1ckhbavz222+oU6cORo0apapeUFCQwbq5k4n27t1b1YlG7969qyyXLl3a6ok/yTZamZcA5yaQ//MS0Obc/Oabb7Bx40bs3r0bdevWzVNbzs7OyhUiRATPnj1zxBCLHYa5EujBgwfYtm0bAGD8+PEAYHQ5FRHBd999V+Bjyy0jI8NgPfcbSG7Lly+HTqcz+NvIkSMdOqaePXs6tD1bNG3aFMuWLVO956FatWoW1+119uxZZfmdd96Bj4+PQ9otybQ0LwHOTaDozUug8Ofmpk2b8P7772PHjh3KNV7zqn79+soyX2tMY5grgVavXo3MzEx06NABjRo1AgC0bdsWTZs2NSi3bt06JCUlFcYQATx/49L/lAkAL7/8ssXyGzduNPp7u3btHDquoUOH2n25IHu1bdvWrjcqDw8Pg/XWrVvneSw6nQ7ff/89AKBTp0746KOP8twmAERHR8PJycnqLSsry6jukSNHzJb/7bffjMqLCDZt2oSBAweiatWq8PDwMFn3jTfecMhjs4VW5iXAuZmjKM1LIP/mpq327duHMWPG4D//+Q+Cg4Md1q6bmxuA54eMc3/1gJ5jmCthdDodVq1aBeD/Pv3nyL2ekJCA9evXF9jYcjt16hSePHli8Le33nrLbPnw8HDExMQY/M3X11f5voWj1KtXr8DDnL3i4uKUZWdnZ/Tr1y/PbX788ce4c+cOevTogV9++UV5oc2rF154AYsWLULDhg1N3j969Ghs2LDB5CGnZs2aYdmyZWjcuLHyN29vbyxYsAAvvviiQdn4+Hh06dIFw4cPx/HjxzFy5EgsW7YMy5cvx6hRoxz2eNTQ0rwEODfzKj/mJZB/c9MWJ06cwJAhQ/DNN9/glVdeMVvu6dOnRh8ErAkPDwcAjBgxIk9jLNYK77cXVBh27dolAMTb21uSkpIM7nvy5InRZX4aN26cp/7s/cVcdna2dO3a1aDeW2+9ZbHOypUrjfpq3bp1nsbvKI74Nas9PvnkE6XPfv365bm9FStWiJOTk4wcOdKhp8nQl5CQIPXq1TN4vvz9/W2qe/XqVaXOtm3bTJbp2bOncqqPx48fG91/8eJFKV++vENOzWCrgp6XIpybOQpjbjp6Xoo4bm7euHFD9a9ZL168KH5+fjJ//nyrZUeOHKnqMnAZGRlSv359qVKlijx79szmeiUN98yVMDlfsH711VdRpkwZg/vKlStn9Inq8uXLOHToUIGNDwCSk5PxxhtvKP26u7tj9uzZWLZsmcV6V65cMfqbn59ffgxRM86cOaMsv/vuu3a38+TJE4wcORJ/+9vfICL4z3/+g4EDB+L69euOGKaBnD1q+uLi4pCYmGi1bnZ2NgCgRYsWJvfQ/Pbbb9i3bx+A579+fOGFF4zKNG/eHKtXr7Zj5PbTwrwEODcdxVHzEnD83Lx586aybMuPDa5du4aePXsiODgYgwYNUn7hm/v222+/YfTo0UhISFB+9LJ+/XqUK1cOnTp1wtGjR43aFhFMnz4dz549w86dO0v0/4xVhRwmqQDdvXtXXFxcBICcPn3aZJmDBw8afYIeNGiQ3X2a+vT/7Nkzg9vjx4/l5s2bsm/fPpkzZ45UrlxZgOdnhZ8zZ47Vs6PnGDp0qFFfQ4cOtXvsjlQYn/6Tk5OldOnSAkAGDhxoVxu3b9+W9957z+jcYDm3MmXKyP/+9z/HDlyenw0+ICDAoK8ff/zRar1FixYJAFm9erXJ+z/66COlPWvXAG3btm2B7JkrjHkpwrmZo6DnpiPmpUj+zM1bt25Ju3btlDY8PT1l165dkpWVZbJ8VFSU1KxZ02T/5m76V4KYPHmywX09e/aU//3vf3L27FnZsmWLdOnSRfr16yd//vmn3c9TScEwV4J8+OGHAjy/SLM5Op3O6OLTLi4uNr9o52bqDcOWm6+vr4wdO1b+97//SWpqqk199ejRw6idkSNH2jVuRyuMMBcSEqIculNzWCPHyJEjlQt7W7o5OTnJzp07HT7+77//3qCfoKAgq3UaN24sPj4+Rocqc0yYMEFp75NPPrHYVkhISIGEucKYlyKcmzkKem7mdV6KOH5uXrx4UXx8fMy24+7uLn/9618N6qSnpxt94LJ2q1evnsFVMh4/fixvvvmm1KhRQzw9PaV06dJSs2ZNCQ4Olnnz5snFixften5KIoa5EiIzM1OqVKkiAKyeTfzTTz81moS2XqonN3vfMPRvfn5+smjRIklPT7fYV5cuXYzq9u/f365xO1pBv2FkZmYqVxIwt5fKmsTERHn8+LHcuHFDfv31V/n6669NXgIKgJQvX171Bb2tSUlJkRdeeMGgn2vXrpktf/r0aQEgEydONFvm/fffV9p64YUXLL6ZxsbGytixY/P0GKwprHkpwrmZoyDnpiPmpUjhz00qehjmSogtW7Yon7BMfelb371795TDPjk3f39/SUtLU92vrV+yTk9Pl8uXL8vq1aulU6dOJl+U2rVrJzExMWb76tOnj1Gdzp07qx5zfijoMPftt98KgHwJI6dOnTL5ifzrr792eF+zZs0y6OO9994zW3bSpEkCQM6dO2e2zG+//WbQXvXq1eXo0aMOH7etCmteinBu5ijIuZmf81KkYOcmFS0McyVE9+7dBYAMHz7cpvKmrgu5du1a1f3a+4u53bt3K3ss9G9NmjSRuLg4k3VGjx5tVL5Fixaqx5wfCvIN4+bNm+Ll5SUdOnSw+TCYWnFxcdK0adN8f3O+fv26weGkihUrSmZmplG5tLQ0KVu2rE3Xbhw4cKDBuJ2cnOT111+X2NhYh4/fmsKalyKcmzkKam4WxLwUKbi5SUULw1wJcO3aNeUNccOGDfLo0SOrtx9++MHoxdeeUwnk9WLeFStWNKo/YcIEk+U/+OADo7I+Pj4G39EoLAX1hpGeni7t27eXRo0ayZMnT/Kljxy///67ODs7K4+pVKlS+fJc55xKJOe2detWozIbNmwQAPLdd99Zbe/p06fSpEkTo/8VX19f+eabb8x+2dvRCnNeinBu5iiIuVmQ81Kk4OYmFR0McyVA7kNVebmdOnVKVd95ecMQEVm9erVRfRcXF4mOjjYqu2nTJpNjLgq/hCqoMPe3v/1NqlevLnfv3s2X9nPLHbTy4zxQO3bsMOijT58+RmV69eolpUuXNrtnKLfHjx9LUFCQyf+X5s2by4kTJxz9MIwU5rwU4dzMURBzs6DnpUjBzE0qOlxBxVp6ejrWrl0LAPj+++9VXdT6119/xSeffGLwt6VLl6Jt27aOHKJFw4YNw6RJkwyuA5mdnY09e/YYXbfS3OWEzp8/7/AzzRdFX3zxBTZt2oTffvsN1atXL5A+u3Xrppy3DQBSUlIcfi6ovn37okaNGspZ4/fu3Yvo6GjlepYxMTE4cOAARo4cCV9fX5vafOGFF3Dw4EEsXLgQn3zyicFF4n///Xd06NAB06dPx6JFi+Du7u7QxwNof14CnJu2Kox5CRTM3KQipLDTJOWv//znPwJAmjZtqrpuSkqKlCtXzuDTnYeHhzx8+NDmNvL66V9ElF9/6d/+/ve/myzbqlUro7KjR49W3aej5fen//Xr14u3t7ecOXPGoe1as337doPHlZycnC/95P4l5z//+U/lvvnz5wsAOXbsmF1t37x5U4YNG2Zyz1FwcLCkpKQ46mEoCnteinBu5sjPuVlY81Kk4OYmFQ28AkQxl3Nm9tyflG1RqlQpo3rp6ekICQlxyNhsZeos/fHx8SbLjh071uhvmzdvNrgWYnHz888/Y9KkSdi6davDLthtK/2rFZQvXx6lS5fOl37efPNNg+tMrlmzBjqdDsDzC883atQIHTt2tKvt2rVr46effsLp06eN2jh8+DA+/vhju8dtTnGYlwDnpiWFOS+BgpubVDQwzBVj4eHhOH78ODw9PTF69Gi72pg8ebLRhc1XrlypXDapIJi6jFOFChVMln399dfh7+9v8LfU1FT8+OOP+TK2wnbkyBG89tprWLduHbp162ZzvczMTIf0r/9GbO5QmiNUrFgRgwcPVtbv3LmDAwcO4NSpU4iMjLQ5FE2dOhU7d+40eV+bNm3w66+/4ssvvzT4+7Jly5Cenm7/4HMpLvMS4Nw0p7DnJVBwc5OKBoa5Yuzbb78FAAwZMgRly5a1q42aNWuiX79+Bn+LiorC9u3b8zo8m2RkZBhcKzBHQECAyfJeXl749NNPjf7+j3/8AzExMQ4b16VLlxzWlr3OnTuHgQMHYsmSJRgyZIjN9ZKTk9GqVSuD7zrZS/95GDZsWJ7bs2TSpEkG66tWrcIPP/wAd3d3jBkzxuZ2LF3T1MnJCTNnzsSsWbOUvyUlJeH27duqx2tOcZiXAOemOUVhXgIFOzepCCjs47yUPx49eiSenp4CQEJDQ/PU1rZt24y+62LreYvy+r2c3L9kxP///aD4+HizdXQ6nXL+Lv1b7969HfLz/G+//VacnZ1VnWzW0d/LuXz5svj7+8vnn3+uuu6oUaNk1KhReeo/R+vWrQWAVKlSxepVAByhcePGynPo5uYmPj4+Np+jTURkypQpUqVKFcnOzrZY7vbt2wbby9w1U9UqKvNShHMzhyPnZlGZlyIFPzepcDHMFVNz584V4Pkli6y9cVmTnp4u3t7eRi/AYWFhVuvmvKDY84YRHx9v8mzmtlzC6NGjR9KgQQOTX7jOyMiweQz60tPTlasMvPbaa6rqRkZGOuwN4/bt21K1alV55513bK6TkJAgJ0+elFdeeUUAyP79++3uP8eRI0eUx7N58+Y8t2eLnDPo69/UPJYpU6YI8Py8bpakpqYq7bu4uDjs3GBFZV6KcG7mcNTcLCrzUqRw5iYVLoa5Yujhw4fKRZPbtGnjkDbbtWtn9OLbt29fq/VMnVg0ISHBar3bt29LmzZtjOq2bt3a5rOn37lzR+rXr2/URteuXS1e49OUc+fOSfPmzQV4fk1JtZ90cy7bpL9XyZ43rtjYWKlXr57RY1Jzq1q1qlGQWLVqlVSrVk0CAgJk8eLFVscWFxenPLeWroXqaPHx8VKmTBnlsdSuXVvVHp2cMFe7dm2L/4cHDx5U+hgyZIgjhl6k5qUI52YOR8zN/JqXItqZm1S4HB7msrOz8/3EiI8fP5bNmzfL4sWL5auvvpJt27bZfLLQ3DIzM+XYsWPy3Xffyfz582X9+vVy+fJlB4+44KSnp8uAAQOUFwh/f39JSkrKU5s6nU5atmxp8gXoX//6l9l6Z8+eNVln0aJFZt+Ab9++Lf/85z/Fz8/PqF737t1terPR9/DhQ+nRo4dRW+7u7jJ16lQ5ceKE2T0kGRkZEhoaKsOHD1fO1P///t//U/1Cn5KSIm3btjUaw7Jly1S18+zZM+VNKy+33Nc3zcjIEDc3N4MyLVq0kPPnz5scR0xMjPJ4Jk6cWGBXTMgxYcIEZZyffPKJqro5YQ6ABAUFmbwealRUlDRq1EiA54eooqKi8jzmojQvRTg3czhibubXvMx5nFqam1R4HBbmdDqdbNiwQRo1aiRjxoxxVLMGHj9+LOPHjxdXV1epVKmSDBs2TPr27StlypQRLy8vmTlzpqpPZStXrpSqVauKq6urdO/eXUaMGCF16tQR4Pn5pbQU6mJjY2XevHkmD180bdpUPv30U1mzZo2qvRibN2+WJUuWSNeuXS2+CHXq1EkWLlyo1Hv06JFs2bJF6tata7ZOhQoVJDAwUF5++WVp1aqVNGzYUHx9fU2WrVWrlvzwww92Pzc6nU6+//57k3sict5YO3ToIIMHD5Y33nhDhg0bJm3bthUvLy+lTPPmzeXgwYM29xkWFiYhISEyc+ZMqVmzpsl+nZ2dZcCAAbJw4UJZt26dyTPn50hOTpaXX345z28YAIz+r3U6nVSoUMHs+H744Qc5evSo7NixQ9555x3x8/OTWrVqyZYtW+zeJnlx7tw5AZ7vQbl3756quvphDoCULVtWJk+eLCEhIbJixQqZMmWKsuevcePGeb5CQVGalyKcmyKOnZv5OS9znh8tzU0qPHkOczqdTjZv3mxwrcP8CHNXrlyR6tWrC/D8sEdiYqJyX1RUlAQGBgoAadeunTx9+tRiW5mZmTJixAgBnl+4++LFiwb35XyvxdvbW/bu3evwx5IfDh8+bNMLhqmLlJtj7gXW3O3XX38VV1dXu1/MnJycxMfHR2rUqCFdunSRd999V0JDQx12TcG0tDRZs2aNdOvWzejTrrk3khEjRsj+/ftVj2HmzJmqH/+2bdvMtjdjxgyHvGG0atXKZPsHDx40efF0/ZuXl5e88sorsmnTpkL/xN+iRQu7viyeO8zlvvn5+clf/vIXWbZsmaSlpeV5nEVhXooI56YeR87N/J6XItqbm1Q4nEREYKeff/4Z//jHP3Dx4kWDv48ZM0a5VI0jxMbGolWrVrh37x4CAgIQFhYGT09PozIBAQGIi4tDr169sGvXLjg7mz7zyvjx47FmzRoAwNGjR9GpUyejMq+++io2btwIHx8fnDlzBg0bNnTY46HCl5aWhosXL+L69et48OABUlJS4O7uDi8vL1SvXh0NGjRAw4YN4eTkVNhDLTCZmZk4efIk/vjjDzx9+hQiAi8vL1SpUgUNGzZEkyZNjM5tVljOnz+PKlWqoHLlyoU9FHIwzk1jWpqbVDjsDnOrVq1CdnY2+vfvj8TERLRs2VK5vqGjw1zfvn2xe/duAMDGjRsxfPhwk+U++eQTfPTRRwCAJUuWYMaMGUZlNm3apNTv3bu30m5uN2/eRMOGDZGVlYWWLVvi/PnzJerFg4iIiLQhT3vm9LVr1w6nT58G4Ngwd+DAAfTo0QMA4Ofnh0ePHsHV1dVk2ejoaNSoUQMiAn9/f9y8eRPe3t7K/ZmZmWjQoIFyAtBNmzZh6NChZvvu3r07Dh48CAD473//ixEjRjjkMRERERE5isOuAJH7sKejfP3118py7969zQY5AKhWrRoCAwMBAI8fP8aqVasM7t+xY4cS5Nzd3dGrVy+LfQ8aNEhZnj9/vsqRExEREeW/In05r6SkJOzbt09Zb9u2rdU6+mU2btxocN/mzZuV5caNG8PLy8vmtiIiIhAZGWm1fyIiIqKCVKTD3NmzZ5GVlaWsN2/e3Gqdli1bKstnzpzBvXv3lPWTJ0+qaqtp06YGewK3bt1qtQ4RERFRQSrSYe7UqVMG67Vr17Zap1atWgbrYWFhAID79+/j7t27qtry8PBApUqVlPXz589brUNERERUkIp0mAsPDzdY1w9W5uQ+VUFERITJtmw9pYF+uZy2iIiIiIqKIh3mnjx5oix7e3vDw8PDap3y5csbrN+8edOoLQDw9/e3aQz67eW0RURERFRUFOkw9+zZM2XZ1l/L5i6XkJBg1Ja97WVnZyMlJcWmekREREQFwfx5PoqA+Ph4ZdmWvXKA+TCn31Ze2ytdurTJsunp6UhPT1fWRQQZGRnw9/fnCYeJiIgoXxTpPXP6l+OyNQzlvqRJzjmRc1/aK6/tmTJ//nz4+voqNz8/P1SoUAGJiYk29UVERESkVpEOc2XKlFGW9fd4WZJzSbEcOVeA0G/LEe2ZMnv2bMTHxyu3qKgom/ogIiIisleRPsxatmxZZdnW8JW7nK+vr1Fb9rbn6upqFAr1eXh42Hz4loiIiMgRivSeOf1zwSUmJhqcQNicp0+fGqzXr1/fqC1T5Wxpr27duvzuGxERERUpRTrMNWjQQFnW6XR48OCB1ToxMTEG6wEBAQD+L9TluH//vk1j0G8vpy0iIiKioqJIh7mXX37ZYF3/Cg7m6H9PzcnJCe3atQMAVKhQAfXq1VPVVnZ2tkGYa9++vdU6RERERAWpSIe5Nm3awMvLS1n//fffrda5cOGCsty8eXNUrFhRWe/atauqtiIjIw1+ANGzZ0+rdYiIiIgKUpEOc25ubnjllVeU9XPnzlmtc+bMGWV5yJAhBvcNHz5cWY6IiEBGRobNbdWrVw/Nmze32j8RERFRQSrSYQ4AJkyYoCzv2rULOp3ObNnHjx/j+PHjAAB3d3e89dZbBvcHBwcr351LSUnBwYMHLfa9fft2ZXnKlClqh05ERESU7xwW5h4/fqwsZ2dn21Tn3r17CA4Ohre3N4YMGYLk5GSjMh07dlQOb8bGxmLv3r1m21u3bp3S94QJEwwOsQLPv0M3b948ZX3t2rVm24qJicG+ffsAAFWqVMG4ceNsekxEREREBckhYS46OhrXr19X1sPDwy1eKSHH3LlzceTIESQlJWHr1q346quvTJZbuXKlcrLeuXPnIjMz06jM06dPsXDhQgBAnTp1sGDBApNtvfrqqxgwYAAAYPPmzQaHUvV9+OGHyjnmvv/+e/j4+Fh9PEREREQFze4wl5KSguvXr2Pbtm0YOHCgwcl1w8PDMWbMGBw9ehR37twxe4Le3HvwTIU0AKhZsyYOHjyI8uXL4+LFixg9ejSSkpKU+x8+fIgBAwbg0aNHqFevHvbs2WPx5L7//e9/0bt3b4gIBg0ahPDwcOU+nU6HBQsWYM2aNXB1dcXKlSvRu3dvm54TIiIiooLmJLbsQjNh6dKlmDZtmk1lt23bhkGDBhn9/c6dOxg5ciQuXryIrl274scff7S4B+zmzZuYPn06du/ejQoVKqBTp07IyMjA4cOHkZKSghEjRmDJkiV44YUXrI4pKysLn332Gb766iskJycjODgYFStWxLlz5/Dnn3+iadOm+OabbxAUFGTTYzQlISEBvr6+iI+P5549IiIiyhd2h7nCdOfOHRw7dgzR0dFwcnJCrVq1EBwcjAoVKqhuKyUlBUePHsWVK1eQmpoKf39/tGnTBi1btszzOBnmiIiIKL9pMsxpBcMcERER5bcif2oSIiIiIjKPYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIwxjmiIiIiDSMYY6IiIhIw1wd1dDNmzdx7NgxxMbGwsfHB/Xr10dQUBBcXR3WRaHR6XQ4deoUjhw5gjlz5hT2cIiIiIgUed4zFx4ejm7duqFu3bpYuHAhIiIisGHDBvTo0QO1a9fG999/n+dBZmZmonLlynByclJ9mzBhgtX227RpY7ENFxcXdOjQAf3798/zYyEiIiJypDyFuS1btqBt27Y4dOgQFixYgD/++AP//ve/cfToURw/fhzJycl466238Le//Q0iYnc/27dvR2xsrF11u3XrZvH+w4cP4+zZs1bbCQ4ORtOmTe0aAxEREVF+cRI7U1ZoaCh69eqFzMxMjB07FmvWrDEqs3fvXvTu3RsA8Pnnn2P27Nl2DbJr164IDQ0FAJQqVQrlypWzWD4xMREJCQnw9PTEo0eP4OXlZbZs7969sXfvXovteXt7Y/v27ejSpYuqcSckJMDX1xfx8fHw8fFRVZeIiIjIFnaFufj4eAQEBOD+/ftwd3fHjRs3UK1aNZNlc4KYs7Mzzpw5g8DAQFV9Xb16FY0aNcLQoUPx2WefoX79+lbrvPrqq9i4cSMGDhyI7du3my0XERGBZs2aYf78+Xj//fdVjcsWDHNERESU3+w6zLpo0SLcv38fANC9e3ezQQ4Axo0bB+D5jwjs+fHAypUr0atXL2zYsMGmIJeamopffvkFADBkyBCLZRctWgRvb29MmjRJ9biIiIiIigLVYS4jIwMrVqxQ1gcMGGCxfL9+/ZRftO7fvx9hYWGq+jt+/DjWrVsHZ2fbhrpnzx4kJyfDzc3N4g8WoqKisGHDBkycOBG+vr6qxkRERERUVKgOc6GhoXj69Kmy3rZtW4vlfX190bBhQ2V948aNqvo7efIkypcvb3P5TZs2AXh+eNfPz89suSVLliArKwt3797FunXrcO3aNVXjIiIiIioKVIe5kydPKstubm4ICAiwWqdly5bK8rZt21T15+TkZHNZWw+xxsXFYdWqVQCAn376CW+88QYaNGiA6tWrY/LkyTh+/LiqMRIREREVFtVh7tSpU8py1apV4e7ubrVOrVq1lOVr164hISFBbbc22bNnD5KSkuDi4oJBgwaZLbds2TIkJSUZ/T06OhrLly9Hx44d0bFjR1y+fDlfxklERETkKKrDXHh4uLJcuXJlm+rkLnfp0iW13dok5xBrp06d4O/vb7bcw4cP8dJLL6FWrVpwcXExWeb48eNo27Yt9u3bly9jJSIiInIE1WHuyZMnyrKlwKQv93febt68qbZbq9LS0mz+Feu//vUvnD9/Hrdu3UJcXBz279+PyZMno0yZMgblkpKSMGjQIIO9kURERERFiaowl5ycjMzMTGXd09PTpnq5y+XHYdacQ6xOTk545ZVXbK7n5eWF7t2747vvvkN0dDRmzpxp8MvZtLQ0DB06FImJiVbbSk9PR0JCgsGNiIiIKD+pCnPx8fEG6x4eHjbVK4gwl3OItX379qhSpYpdbfj5+eHLL7/Er7/+arDX8d69e1i8eLHV+vPnz4evr69yq169ul3jICIiIrKVqjCX+1xvtv7SNPf30vJynVZT0tLSsHPnTgDA0KFD89xehw4dcODAAYPDruvXr7dab/bs2YiPj1duUVFReR4LERERkSWqwlzu75Slp6fbVC8tLc1g3dvbW023VuUcYgWAwYMHO6TNFi1a4IMPPlDWb926hYcPH1qs4+HhAR8fH4MbERERUX5SHebc3NyUdVvDXO5yjr7iQs4h1latWqFmzZoOa3fatGkGh4ijo6Md1jYRERGRI6g+zKoflvSvBGFJ7nK2XGPVVmp+xapWmTJlEBQUZNAXERERUVGi+tQkDRo0UJbv379vU52YmBiDdVuuGmGrvXv3Kr80dXSYA4A6deooy7aeioWIiIiooKgOcy+//LKyfO/ePeh0Oqt19H8IEBAQ4NDDrDmHWJs2berQPX459K/vWrFiRYe3T0RERJQXqsNc165dleXU1FRcvXrVap0LFy4oyz179lTbpVn6v2LNj71yAPDo0SMAzw8NO/q7fkRERER5pTrMtWnTxuBaq+fOnbNYPi0tDREREcq6I0NXfh9iBYCTJ08CgKoTERMREREVFNVhztnZGePHj1fWf/75Z4vl9+zZo/yatXnz5ujYsaPaLs3KOcTaoEEDNGnSxGHt5jh79iwuXboEV1dXg8dMREREVFSoDnMAMGXKFFSqVAkAsGvXLuVQpClr1qxRlj/88EOj+y9duoTAwED4+Phg4sSJNn0HD7D/EOvVq1exevVq7Nu3D1lZWWbL6XQ6vP322wCADz74wOCHH0RERERFhthpy5YtAkAAyOTJk02WOXr0qFJm+PDhJst07txZKQNA1q9fb1P/27dvV+qcO3fOpjrJycni7e2t1Ktfv74cP37cZNlp06YJABkzZozodDqb2s8tPj5eAEh8fLxd9YmIiIissWvPHPD8SgshISFwcXHBsmXLsHjxYoPLdF24cAHDhg0DAPTp0werV6822U52drbBemZmpk395xxirVWrFgIDA22qk52djYyMDGX92rVr6Ny5M+bOnYsHDx5Ap9PhwoUL6N+/P1auXIl58+bhhx9+sPmyZUREREQFzUkkbxdKDQ0NxYwZMxAREYFGjRohMDAQsbGxOHz4MLy8vDBr1izMmTPH6PqsOS5evIgxY8bg5s2bGDZsGEJCQuDq6mqxz/T0dFSoUAEJCQl45513sHjxYpvHu3nzZkydOhUPHjwwus/V1RWenp4YNmwY3nvvPTRs2NDmdk1JSEiAr68v4uPjeWkvIiIiyhd5DnM5Lly4gDNnzuDx48coVaoUAgICEBQUhFKlSjmieYfS6XQ4ceIErly5gidPnsDFxQXlypVDkyZN0KJFC3h4eDikH4Y5IiIiym8OC3NkjGGOiIiI8pvd35kjIiIiosLHMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYQxzRERERBrGMEdERESkYa6OaujmzZs4duwYYmNj4ePjg/r16yMoKAiurg7rQpX4+HjMmTMHn3zyCcqVK2exbFZWFk6dOoXw8HAkJCSgatWqaNWqFQICAgpotERERET2yfOeufDwcHTr1g1169bFwoULERERgQ0bNqBHjx6oXbs2vv/+e0eM08CuXbvg5ORk8ebn54fY2FirQS4kJAS1atVCcHAwtm/fjvDwcHz88cdo3LgxunTpgitXrjh8/EREREQOI3mwefNm8fT0FACyYMEC0el0yn0nTpyQsmXLCgCZOHGiwX151alTJwFg9Xb06FGzbWRmZsqIESMEgFSsWFEuXrxocN/cuXMFgHh7e8vevXvtGmd8fLwAkPj4eLvqExEREVljd5g7dOiQuLm5CQAZO3asyTJ79uxRgtXnn39u9yD1nT592mqIc3Jykj59+lhsZ9y4cVZD31//+lcBID4+PhIZGal6rAxzRERElN+cRETU7s2Lj49HQEAA7t+/D3d3d9y4cQPVqlUzWbZr164IDQ2Fs7Mzzpw5g8DAQLXdGRgyZAiOHDmCu3fvokyZMna1sWnTJgwfPhwA0Lt3b+zevdtkuZs3b6Jhw4bIyspCy5Ytcf78eTg5OdncT0JCAnx9fREfHw8fHx+7xkpERERkiV3fmVu0aBHu378PAOjevbvZIAcA48aNAwDodDrMmTPHnu4U165dw/bt2zFlyhS7g1xmZibeffddo/GZUqdOHQQFBQEALly4gA0bNtjVJxEREVF+UR3mMjIysGLFCmV9wIABFsv369dP+UXr/v37ERYWprZLxeLFi+Hh4YFp06bZ3caOHTtw+/ZtAIC7uzt69eplsfygQYOU5fnz59vdLxEREVF+UB3mQkND8fTpU2W9bdu2Fsv7+vqiYcOGyvrGjRvVdgkAePjwIdatW4eyZcti1apVOHToEFJSUlS3s3nzZmW5cePG8PLyslhe//FFREQgMjJSdZ9ERERE+UV1mDt58qSy7ObmZtO52Fq2bKksb9u2TW2XAIBvvvkGaWlpiImJwdy5c9GtWzf4+fkhODgYS5cuRVxcnE3t6I+/efPmVss3bdrU4Fx5W7duVT12IiIiovyiOsydOnVKWa5atSrc3d2t1qlVq5ayfO3aNSQkJKjqMzk5GcuXLzf6e2ZmJo4cOYJp06ahWrVqWLBgAXQ6ndl27t+/j7t37yrrtWvXttq3h4cHKlWqpKyfP39e1diJiIiI8pPqMBceHq4sV65c2aY6uctdunRJVZ9Xr15F48aNERAQgLJly5osk5ycjNmzZ2PAgAFIT083WUZ/7KbGZY5+uYiICBtHTURERJT/VIe5J0+eKMv+/v421SlfvrzB+s2bN1X12bJlSxw7dgyXL1/G06dPERkZiRUrVpg8zcmuXbswYsQImDrjiv7YAfvGr3bsRERERPlJVZhLTk5GZmamsu7p6WlTvdzl1B5mza1hw4aYOHEizp07h9DQUDRo0MDg/m3btmHJkiVG9Z49e2ZxXObol8vOzjb7w4v09HQkJCQY3IiIiIjyk6owFx8fb7Du4eFhUz1Hhzl9wcHBuHjxIsaOHWvw948//tion/we//z58+Hr66vcqlevblP7RERERPZSFeacnQ2L23o1BBcXF4N1Oy46YVGpUqWwevVqjBo1SvlbYmIitm/fblAuv8c/e/ZsxMfHK7eoqCib2iciIiKyl6owl/uqC+Z+aJBbWlqawbq3t7eabm3i5OSEpUuXomLFisrf9H95C+T/+D08PODj42NwIyIiIspPqsOcm5ubsm5rGMpdztfXV023NvP19cWECROU9ejoaIP7c/8S1p7xu7q62n0pMSIiIiJHU32YtWbNmsq6/pUgLMldrn79+mq6VaVv377Kcu49arnPK2fP+OvWrWvz4VkiIiKi/Kb61CT6vxy9f/++TXViYmIM1m25aoS96tSpoyznPvVI7hBpz/jzc+xEREREaqkOcy+//LKyfO/ePYtXXMih/0OAgICAfDvMCgB+fn7Ksv735wCgQoUKqFevnrKufzUIc7Kzsw3CXPv27fM+SCIiIiIHUR3munbtqiynpqbi6tWrVutcuHBBWe7Zs6faLlV59OiRstyqVSuj+/XH//vvv1ttLzIy0uBwbX6Pn4iIiEgN1WGuTZs2BtdaPXfunMXyaWlpBpfAGjJkiNouVTl58iQAwN3dHf369TO6f/jw4cpyREQEMjIyLLZ35swZZblevXpo3ry5g0ZKRERElHeqw5yzszPGjx+vrP/8888Wy+/Zs0f5NWjz5s3RsWNHtV2qsmbNGgDAq6++avJwbnBwsPLduZSUFBw8eNBie/rnqpsyZYrjBkpERETkAKrDHPA81FSqVAnA82uh6h/azC0nXAHAhx9+aHT/pUuXEBgYCB8fH0ycONHoO3hZWVnYtm0b1q1bhzt37lgc1759+7B7925UrFgRixYtMlnGyckJ8+bNU9bXrl1rtr2YmBjs27cPAFClShWMGzfOYv9EREREBU7stGXLFgEgAGTy5Mkmyxw9elQpM3z4cJNlOnfurJQBIOvXrze4/5NPPlHuc3FxkenTp0tqaqpRO1euXJHy5cuLn5+fnDt3zur4BwwYIADEyclJTp8+bbLMuHHjlL53795ttc3c4uPjBYDEx8errktERERkC7v2zAHA4MGDERISAhcXFyxbtgyLFy82uMzVhQsXMGzYMABAnz59sHr1apPtZGdnG6xnZmYarOtf1D47OxvffPMN2rZti4MHD0Kn0yEhIQErVqxAu3btULVqVZw6dQqBgYFWx//f//4XvXv3hohg0KBBCA8PV+7T6XRYsGAB1qxZA1dXV6xcuRK9e/e2/qQQERERFTAnkbxdKDU0NBQzZsxAREQEGjVqhMDAQMTGxuLw4cPw8vLCrFmzMGfOHKPrm+a4ePEixowZg5s3b2LYsGEICQmBq6urcn98fDzGjBmDnTt3Gh2CdXV1RVZWFlq0aIFJkyZh/PjxZvsxJSsrC5999hm++uorJCcnIzg4GBUrVsS5c+fw559/omnTpvjmm28QFBRk13OTkJAAX19fxMfH89JeRERElC/yHOZyXLhwAWfOnMHjx49RqlQpBAQEICgoCKVKlXJE83jw4AGOHDmC2NhYJCUlwcfHB9WqVUPr1q1RrVq1PLWdkpKCo0eP4sqVK0hNTYW/vz/atGmDli1b5qldhjkiIiLKbw4Lc2SMYY6IiIjym93fmSMiIiKiwscwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGsYwR0RERKRhDHNEREREGubqqIZu3ryJY8eOITY2Fj4+Pqhfvz6CgoLg6uqwLhRJSUk4duwYbty4gYSEBJQrVw7NmjVDmzZt8qU/nU6HU6dO4ciRI5gzZ47D2yciIiKyV56TT3h4ON555x0cOnQIAQEBeOmllxAVFYVjx46hatWq+Mc//oE333zTEWNFYmIi/vGPf2D58uVIS0szur9KlSr4+OOP8dZbb6lqt02bNjh79qzVcuHh4araJSIiIspvTiIi9lbesmULRo0ahbS0NCxYsADvvvsunJycAAAnT55E37598ezZM0ycOBHLly9X7rPHgwcP0L17d0RERFgtO27cOHz//fc29Xf48GF06dLFarng4GCEhobaNNYcCQkJ8PX1RXx8PHx8fFTVJSIiIrKF3WEuNDQUvXr1QmZmJsaOHYs1a9YYldm7dy969+4NAPj8888xe/ZsuwaZnZ2Nzp074/jx46hfvz4GDRqEWrVqIS0tDZGRkdixYwcePnxoUGfhwoV49913rbbdu3dv7N2712IZb29vbN++3abQp49hjoiIiPKbXWEuPj4eAQEBuH//Ptzd3XHjxg1Uq1bNZNmuXbsiNDQUzs7OOHPmDAIDA1UPcsWKFZg0aRLef/99fPbZZ3B2NvzdRkpKCt5++22EhIQof/P29sbt27dRrlw5s+1GRESgWbNmmD9/Pt5//33V47KGYY6IiIjym12/Zl20aBHu378PAOjevbvZIAc8P+QJPP8RgT0/HhARLFiwAK+++irmz59vFOQAoHTp0li5ciUGDhyo/C0xMRG7d++2+ji8vb0xadIk1eMiIiIiKgpUh7mMjAysWLFCWR8wYIDF8v369VN+Ybp//36EhYWp6u/o0aN48uQJvv76a6tlP/30U4P133//3WzZqKgobNiwARMnToSvr6+qMREREREVFarDXGhoKJ4+faqst23b1mJ5X19fNGzYUFnfuHGjqv4OHTqESZMmoUKFClbLNmnSBFWqVFHW4+PjzZZdsmQJsrKycPfuXaxbtw7Xrl1TNS4iIiKiokB1mDt58qSy7ObmhoCAAKt1WrZsqSxv27ZNVX9dunTBjBkzbC5fvXp1Zdnc9+Xi4uKwatUqAMBPP/2EN954Aw0aNED16tUxefJkHD9+XNUYiYiIiAqL6jB36tQpZblq1apwd3e3WqdWrVrK8rVr15CQkGBzf8HBwQZ726zR/z2HfojUt2zZMiQlJRn9PTo6GsuXL0fHjh3RsWNHXL582eZ+iYiIiAqD6jCnf+LcypUr21Qnd7lLly6p7dZmsbGxAAB3d3f06tXLZJmHDx/ipZdeQq1ateDi4mKyzPHjx9G2bVvs27cv38ZKRERElFeqw9yTJ0+UZX9/f5vqlC9f3mD95s2baru1yePHj3H37l0AwGuvvWb2hw3/+te/cP78edy6dQtxcXHYv38/Jk+ejDJlyhiUS0pKwqBBgwz2RhIREREVJarCXHJyMjIzM5V1T09Pm+rlLqfmMKsaOacicXd3x9y5c22q4+Xlhe7du+O7775DdHQ0Zs6caXD6k7S0NAwdOhSJiYlW20pPT0dCQoLBjYiIiCg/qQpzuX8d6uHhYVO9ggpzOScNfvfdd1GvXj3V9f38/PDll1/i119/NdjreO/ePSxevNhq/fnz58PX11e56f8Yg4iIiCg/qApzuU/Ya+u1VnN/Ly0Pl4M168CBAzh+/DgCAwPx0Ucf5amtDh064MCBAwaHXdevX2+13uzZsxEfH6/coqKi8jQOIiIiImtUhbnc3ylLT0+3qV5aWprBure3t5purUpNTcX06dPh7++PLVu2wM3NLc9ttmjRAh988IGyfuvWLaPrv+bm4eEBHx8fgxsRERFRflId5vSDkq1hLnc5R19xYdasWbh79y527tyJmjVrOqzdadOmGRwijo6OdljbRERERI6g+jCrfljSvxKEJbnL1a9fX023Fq1duxarVq3CTz/9hHbt2jmsXeB5eA0KClLWc+9hJCIiIipsqk9N0qBBA2X5/v37NtWJiYkxWLflqhG2OHDgAP72t79h3bp16Nu3r0PazK1OnTrKsq2nYiEiIiIqKKrD3Msvv6ws37t3Dzqdzmod/R8CBAQEOOQw65kzZzBkyBAsWbIEI0aMyHN75vj5+SnLFStWzLd+iIiIiOyhOsx17dpVWU5NTcXVq1et1rlw4YKy3LNnT7VdGrl06RJ69+6N999/H5MmTcpze5Y8evQIwPNDw47+rh8RERFRXqkOc23atDG41uq5c+cslk9LS0NERISyPmTIELVdGrhx4wZ69OiBcePGYc6cOVbLP3nyBAcPHrS7v5MnTwIAXnnlFbvbICIiIsovqsOcs7Mzxo8fr6z//PPPFsvv2bNH+TVr8+bN0bFjR7VdKmJiYtC9e3f06dMHX3zxhU11Jk+ebNOhYFPOnj2LS5cuwdXV1eAxExERERUVqsMcAEyZMgWVKlUCAOzatUs5FGnKmjVrlOUPP/zQ6P5Lly4hMDAQPj4+mDhxotng9eTJE3Tv3h2BgYFYuXKlxfGJCG7cuIFx48bh9OnT6Natm3Lf1atXsXr1auzbtw9ZWVlm29DpdHj77bcBAB988IHBDz+IiIiIigyx05YtWwSAAJDJkyebLHP06FGlzPDhw02W6dy5s1IGgKxfv96oTEJCgrRq1UoAiLu7u3h4eFi8ubi4KO3985//VNpJTk4Wb29v5b769evL8ePHTY5r2rRpAkDGjBkjOp3OjmdIJD4+XgBIfHy8XfWJiIiIrLFrzxwADB48GCEhIXBxccGyZcuwePFig8t0XbhwAcOGDQMA9OnTB6tXrzbZTnZ2tsF6ZmamwXpaWhr69++vfDcvIyMD6enpFm85bTo7O2PcuHEGfWVkZCjr165dQ+fOnTF37lw8ePAAOp0OFy5cQP/+/bFy5UrMmzcPP/zwg82XLSMiIiIqaE4iebtQamhoKGbMmIGIiAg0atQIgYGBiI2NxeHDh+Hl5YVZs2Zhzpw5RtdnzXHx4kWMGTMGN2/exLBhwxASEgJXV1fl/nHjxuGHH36wa2y9e/fG7t27Df62efNmTJ06FQ8ePDAq7+rqCk9PTwwbNgzvvfceGjZsaFe/ORISEuDr64v4+Hhe2ouIiIjyRZ7DXI4LFy7gzJkzePz4MUqVKoWAgAAEBQWhVKlSjmjeoXQ6HU6cOIErV67gyZMncHFxQbly5dCkSRO0aNECHh4eDumHYY6IiIjym8PCHBljmCMiIqL8Zvd35oiIiIio8DHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWkYwxwRERGRhjHMEREREWmYq6MaunnzJo4dO4bY2Fj4+Pigfv36CAoKgqurw7owEBMTg6NHjyI6Ohqenp6oW7cuunTpAk9PT9VtpaWl4bfffsOVK1eQkpKCGjVqoH379qhVq5bjB05ERETkQHneMxceHo5u3bqhbt26WLhwISIiIrBhwwb06NEDtWvXxvfff++IcSru3LmDoUOHolq1apgzZw4uXLiAnTt3YtCgQahWrRrmz58PEbGprezsbMyfPx9VqlTBgAEDsHfvXoSFhWHWrFmoW7cuXnnlFcTExDh0/EREREQOJXmwefNm8fT0FACyYMEC0el0yn0nTpyQsmXLCgCZOHGiwX32OnbsmNLmtGnTJCMjQ7nvypUrUqtWLQEg/fv3l/T0dIttJSYmSnBwsACQRo0aya1bt5T7UlJSZOzYsQJAKlWqJOfPn7drvPHx8QJA4uPj7apPREREZI3dYe7QoUPi5uYmAGTs2LEmy+zZs0cACAD5/PPP7R6kiMilS5fEx8dHAEjXrl1NhsOIiAhxdXUVADJhwgSzbWVnZ0v37t0FgLi7u8vVq1eNymRlZUmHDh0EgFSrVk0ePnyoeswMc0RERJTf7DrMGh8fj1GjRiEzMxPu7u6YN2+eyXK9evVCly5dAAAffPABzp8/b093yMrKwmuvvYaEhAQAwOeffw4nJyejck2aNMHrr78OAAgJCcH27dtNtvfll1/iwIEDAIDx48ejfv36RmVcXFwwf/58AEB0dDTeeustu8ZORERElJ/sCnOLFi3C/fv3AQDdu3dHtWrVzJYdN24cAECn02HOnDn2dIcffvgB4eHhAIAXX3wRbdq0sdofAMydOxc6nc7g/idPnuCzzz4zWT63v/zlL0rQ27FjB06ePGnX+ImIiIjyi+owl5GRgRUrVijrAwYMsFi+X79+yi9a9+/fj7CwMLVd4ptvvrG5v/bt26NChQoAgMuXLxvtnfvhhx+UPXxVqlRBq1atLLY3cOBAZTlnTx0RERFRUaE6zIWGhuLp06fKetu2bS2W9/X1RcOGDZX1jRs3quovMjISly5dsrk/Z2dntG7d2mx/mzdvVpYt7eEz1d++ffsQHx9vtQ4RERFRQVEd5vQPNbq5uSEgIMBqnZYtWyrL27Zts7s/AGjevLmq/nbt2oXMzEwAQHp6Oi5cuGB3WxkZGdi1a5fVOkREREQFRXWYO3XqlLJctWpVuLu7W62jf/Lda9euKYc51fbn4uKCGjVqqOovOTkZkZGRAICwsDBkZGQo99WuXdtqWzVq1DD4sYW9P+IgIiIiyg+qw1zODxEAoHLlyjbVyV1O/7Cpmv4qVKgAZ2frQ87dX0REhFFbpsqZ4ubmBn9/f6O2iIiIiIoC1WHuyZMnyrJ+yLGkfPnyBus3b94slP7027K3PTVjJyIiIspvqsJccnKy8v0zADZfBzV3OTWHWZ89e+aw/vTbsrc9NWMnIiIiym+uagrn/iWnh4eHTfXyEub0+8xrf44Yv6Wxp6enIz09XVnP6Y8BkIiISHu8vb1NXqSgqFEV5nJ/X83WB+ji4mKwLiJ29ZnX/hwxfktjnz9/Pv75z38a/b169eo29UNERERFx8OHD42+ulUUqQpzZcqUMVjX3wtlSVpamsG6t7e3qj5z+slrf44Yv6Wxz549G++8846yHhcXh5o1a+Lu3bvw9fW1qS/SroSEBFSvXh1RUVHw8fEp7OFQPuP2Llm4vUuWnO1tyxk7igLVYc7Nzc3gvG22yF1OTbApW7ascpLivPZXtmxZi+Vsac/S2D08PEweuvX19eXkL0F8fHy4vUsQbu+Shdu7ZNHCIVZA5Q8gnJ2dUbNmTWVd/0oQluQuZ+rC9ubonwsur/3lPq+cPe2pGTsRERFRflN9apIGDRooy/fv37epTkxMjMG6LVeNMNVfbGysTd+3M9effluAbeMXEcTGxhq1RURERFQUqA5zL7/8srJ879496HQ6q3WioqKU5YCAAFWHWfX7y8zMNAhWtvTn4+ODF198EcDzy3eVLl1aue/u3btW27p//z6ysrKU9fbt29s0buD5Ydd//OMfNv9qlrSN27tk4fYuWbi9SxatbW/VYa5r167KcmpqKq5evWq1jv71UHv27Kmqvy5duhis//7776r669KlC1xdn3810N3dHX/5y1/sbsvFxQXdunWzWieHh4cHPv74Y838M1DecHuXLNzeJQu3d8mite2tOsy1adPG4Nqn586ds1g+LS3N4BJYQ4YMUdVf5cqVDQKYtf4A4OzZs2b7Gz58uKq2zpw5oywHBQWhXLlyVusQERERFRTVYc7Z2Rnjx49X1n/++WeL5ffs2aP8GrR58+bo2LGj2i4xYcIEm/u7cOEC7ty5A+D5tVyHDRtmcP+rr76q/BLpxo0buHz5ssX2tm/frixPnTpVzbCJiIiI8p3qMAcAU6ZMQaVKlQAAu3btwqNHj8yWXbNmjbL84YcfGt1/6dIlBAYGwsfHBxMnTjT5Hby//vWvaNKkCYDne9P++OMPm/p79913jXaRli5dGrNnz1bW165da7at8+fPIzw8HMDzINq/f3+zZYmIiIgKg5OouRyDnq1btyqHMCdPnozvvvvOqMyvv/6Kzp07A3h+eHPjxo1GZYKCgnD06FFlff369Rg9erRRubNnz+Lll19GVlYW+vbti19++cWozPXr1/Hiiy8iIyMD7du3x2+//WZ01QcAyMrKQosWLfDHH3/Azc0NH330Edq1a4egoCDl+3Uigh49euDgwYNwc3PD2bNn0bx5cxufHWMxMTE4evQooqOj4enpibp166JLly42Xx9WX1paGn777TdcuXIFKSkpqFGjBtq3b29w+JuIiKg40+l0uHfvXr5eZenJkyc4cuQI7ty5AycnJ9SuXRvBwcF2XQggKysLp06dQnh4OBISElC1alW0atXKMWfJkDwICQkRFxcXASBffvml6HQ65b6wsDCpUKGCAJA+ffpIYmKiyTY6duwoAJTb6tWrzfa3c+dOKV26tACQGTNmSEZGhnLfjRs3pGHDhgJAWrduLbGxsSbb+P3336Vr164CQDw9PQWA0ma1atVk1apVkp6eLlOmTBEAUqZMGfn555/tfIZEbt++LUOGDBEnJyepVauWjBgxQrp37y5ubm7ywgsvyOeff27wvFmSlZUln3/+uZQtW1ZKlSolffr0keHDh0uVKlXE2dlZBg0aJPfu3bN7rMXZjRs3ZO3atbJgwQJZtmyZHDhwQDIzM/Olr8TERNm9e7d8++238tlnn8ny5cvl+PHj+dZfdna2HD9+XD777LN8aV+LCnJ72yIuLk4mT54sT548sVo2MzNTjh07Jt99953Mnz9f1q9fL5cvXy6AUWpXUdvejsT5bUyn08mGDRukUaNGMmbMmHzp4/HjxzJ+/HhxdXWVSpUqybBhw6Rv375SpkwZ8fLykpkzZ0p6errN7a1cuVKqVq0qrq6u0r17dxkxYoTUqVNHAEhwcHCe53iewpyIyKFDh6Rp06YCQBo1aiQjR46Url27irOzs/j4+Mi8efMkKyvLbP0LFy5Is2bNxMvLS8aOHWt1AoaFhSkBsGbNmjJixAjp06ePeHh4iKenp0yfPl1SUlJM1t28ebMS4BYsWCCJiYkyffp08fT0FHd3d3F1dRUA4uXlJQCkY8eOcvHiRbufm2PHjknZsmUFgEybNs0gfF65ckVq1aolAKR///5W/ykSExMlODhYeZ5v3bql3JeSkiJjx44VAFKpUiU5f/683WMubvTDe0BAgIwcOVI6deokTk5OSnh3lISEBHn77beV/7HctypVqkhISIjqdlu3bm2yvdy38PBwhz0WrSrI7Z3jl19+sWn7DB482Gpb+f2CX9wUxPbOyMiQSpUq2bSNc9/eeustq+1zfttOp9PJ5s2bpUmTJsrzkh9h7sqVK1K9enUBIEOGDDHYGRUVFSWBgYECQNq1aydPnz612FZmZqaMGDFCAEjFihUNMkVmZqbMnTtXAIi3t7fs3bvX7jHnOczlCAsLkxUrVsinn34qixcvlt27d5sNVY4QGRkpq1evls8++0y++OIL2bZtm8TFxZktf+jQIXFzcxMAMnbsWIP7nj17Jlu3bpXx48cr/yBvv/12nsZ36dIl8fHxEQDStWtXk3vfIiIilAA5YcIEs21lZ2dL9+7dBYC4u7vL1atXjcpkZWVJhw4dlD2MDx8+zNP4i4Pc4V1/G5w4cUIJ2hMnTrR576g5sbGxyocaa7dx48bZ3F9oaKhNbQYHB+dp/MVBQW5vfZ06dbJpGx09etRsGwX1gl+cFNT2/umnn+wKcgBk48aNFtvm/Lbdjh07pEWLFkbPjaPD3P3796Vq1arKB4TU1FSTZfz8/ASA9OrVS7Kzs822N27cOKuvAX/9618FgPj4+EhkZKRd43ZYmCvK4uLipHLlykoYioqKMlu2S5cuAkCcnZ3l3LlzdvWXmZkpzZo1Uzbg6dOnzZbV39Dbtm0zWWbhwoVKmUmTJplt69dff1XKDRw40K6xFxeWwnuOPXv2KM/X559/bndf+kG6fv368ve//12+++47Wbx4sbz11lvK1w30bwsXLrSp7V69ell9off29pZDhw7ZPf7ioCC3t77Tp09b3T5OTk7Sp08fi+0U1At+cVGQ2zvnPQGAlCpVSqpWrWrxlvMh3tPT0+zXi3JwftsmJCREli9fLtHR0XLlyhWDox+ODnN9+vSxKYzPmzdPKbdkyRKTZfQ/CPTu3dtsWzdu3FB27LRs2dKuDx8lIszNmTNHeUL79u1rsey///1vpWyPHj3s6i8kJERp48UXX7RY9rffflPKNm7c2CjhP378WHlxACBnz5612F79+vWVsidOnLBr/FpX0OF9+fLlAkDef/99k5/QkpOTZcKECUYv0Na+PxUeHi4AZP78+XaNq6Qo6O2tb/DgwVKuXDlJSkqyu42CfMEvDgpye//555/i5OQkw4YNM3lExJSc0G3tAzXnt/3atm2bL2Fu//79Srt+fn4Wv/YVFRUlTk5OAkD8/f0lISHB4P6MjAzlq1QAZNOmTRb77tatm1L2v//9r+qxF/swl56eLuXKlVOepJUrV1osHxcXp7xgArDr+2f6x/Nnz55tsWx2drbBnpstW7YY3P/FF18o91WpUsVq37NmzVLK9+/fX/XYi4OCDO86nU5q1qwpr776qtWyAwcONAh0P/74o8Xyo0aNEm9vb4tfH6CC/7CW4+rVq+Ls7Cwffvih3W0U9At+cVCQ2/udd96R3r17WzyMpi8lJUXKlCkjAGT9+vUWy3J+269z5875Eub69u2rtDtixAir5Vu1aqWUX7x4scF9mzZtUu5zd3e3upd26dKlSvmmTZuqHnuxD3P6u9oB2PSDhhdffFEp/+6776rq78qVKwb9bd++3Wod/X+g4cOHG9yn/wlk0KBBVtvK/Q9U0l4oCjq8Hz58WLy8vOTBgwdWy0ZERBj8b8yaNcts2bt374qrq6vFMlQ4H9ZyTJw4UUqVKpWn76cW9Au+1hX09m7btq2q7btlyxYBIG5ubvLs2TOz5Ti/8yY/wlxiYqLB/8q//vUvq3VyznoBQNq0aWNwX84eWgDSokULq22dPXvW4P3hypUrqsZv10mDteTkyZPKspubm03nc2nZsqWyvG3bNrv7A2DTuen0+9u1axcyMzMBAOnp6QbXhlXbVkZGBnbt2mW1TnESGhqKp0+fKutt27a1WN7X1xcNGzZU1k2dC9GSQ4cOYdKkSahQoYLVsk2aNEGVKlWU9fj4eLNllyxZgqysLNy9exfr1q3DtWvXVI2rpCjo7Z3j4cOHWLduHcqWLYtVq1bh0KFDSElJUd3O5s2bleXGjRvDy8vLYnn9xxcREYHIyEjVfWpZQW/vkydPonz58jaX37RpE4Dn1zD38/MzW47zu+g5e/YssrKylHW177dnzpzBvXv3lHX9LGBLW02bNlXOcws8P5evGsU+zJ06dUpZrlq1Ktzd3a3W0T/57rVr15CQkGBXfy4uLqhRo4aq/pKTk5UX6LCwMGRkZCj31a5d22pbNWrUgJOTk7J+/vx5W4ZdbBR0eO/SpQtmzJhhc3n9k1uau85vXFwcVq1aBQD46aef8MYbb6BBgwaoXr06Jk+ejOPHj6saY3FW0Ns7xzfffIO0tDTExMRg7ty56NatG/z8/BAcHIylS5ciLi7OpnYK+gVf6wp6e+u/llqTmpqqnMze0jXIOb+LJv33bsC299vcJ+oPCwsDANy/fx93795V1ZaHh4dyZS1A/Xt3sQ9zOZfjAoDKlSvbVCd3uUuXLtnVX4UKFUxegcJafxEREUZtmSpnipubG/z9/Y3aKikKOrwHBwcb7G2zRvQuuKL/JqNv2bJlSEpKMvp7dHQ0li9fjo4dO6Jjx45WrytcEhT09gaef+Bavny50d8zMzNx5MgRTJs2DdWqVcOCBQtMXp4wR2G84GtdYWxvW+3ZswdJSUlwcXHBoEGDzJbj/C6acr/f6s8zcxz53p27nNr37mIf5p48eaIs64ccS3LvVr9582ah9Kfflr3tqRl7cVDQ4V2t2NhYAIC7uzt69eplsszDhw/x0ksvoVatWnBxcTFZ5vjx42jbti327duXb2PVgsLY3levXkXjxo0REBCAsmXLmiyTnJyM2bNnY8CAAUhPTzdZpjBe8LWuKM/vnEOsnTp1svhazfldNOm/33p7extd192UovTeXazDXHJysvL9MwA2Xwc1dzk1n+SePXvmsP7027K3vfz6FFpUFXR4V+Px48fKnpjXXnvN7LX9/vWvf+H8+fO4desW4uLisH//fkyePBllypQxKJeUlIRBgwYZHR4oSQpje7ds2RLHjh3D5cuX8fTpU0RGRmLFihUIDAw0Krtr1y6MGDHCYI+sqbED/LBmi6I6v9PS0mw6xApwfhdVRe29Ozs7W9X3cIt1mMv9BXNbkjaQtzCn32de+3PE+EtSmCuM8K7G7t27ATzfKzd37lyb6nh5eaF79+747rvvEB0djZkzZxocuk9LS8PQoUORmJiYL2MuyorK9m7YsCEmTpyIc+fOITQ0FA0aNDC4f9u2bViyZIlRvcJ4wdeyorK9Tck5xOrk5IRXXnnF5nqc30VHUXvv1m/PFsU6zOX+vpqtX2bNvevb1KdqW/rMa3+OGL+asWtdYUwgNUJCQgAA7777LurVq6e6vp+fH7788kv8+uuvBnsl7t27h8WLFztsnFpRFLd3cHAwLl68iLFjxxr8/eOPPzbqpyiOvygrys9XziHW9u3bq/oOrT7O78JV1N679duzRbEOc7l3W5v77kpuaWlpBuve3t529ZnX/hwxfjVj17rCmEC2OnDgAI4fP47AwEB89NFHeWqrQ4cOOHDggMH/x/r16/M6RM0pqtu7VKlSWL16NUaNGqX8LTExEdu3bzcoV1THX1QV1ecrLS0NO3fuBAAMHTo0z+1xfheOovberd+eLYp9mHNzc1PWbX1Cc5cz990mU/S/EJ3X/nJ/udqe9tSMXesKYwLZIjU1FdOnT4e/vz+2bNli8D9prxYtWuCDDz5Q1m/duoWHDx/muV0tKarbG3geNJYuXYqKFSsqf8v93aeiPP6iqKg+XzmHWAFg8ODBDmmT87vgFbX3bldXV6P/eUuKdZhzdnZGzZo1lXX9k01akrtc/fr1be5T//QCee0v96kK7GlPzdi1rjDCuy1mzZqFu3fvYufOnQb/j3k1bdo0g0NI0dHRDmtbC4rq9tZvd8KECcp67u1TGC/4WlZUt3fOIdZWrVpxfmuY/vttYmKiwQmEzcnP9+66deuqOs9hsQ5zAAy+jHz//n2b6sTExBis23JiSlP9xcbG2rRL31x/ub9Ibcv4RUQ5/YV+WyVBYYR3a9auXYtVq1bhp59+Qrt27RzWLvD8zS0oKEhZz70Horgrits7t759+yrLubdPYbzga1lR3N5qfsWqVkmf3wVN//1Wp9PhwYMHVuuYe+/O/T9mT/ZQ+95d7MPcyy+/rCzfu3fP4kk8c0RFRSnLAQEBqj7J6feXmZlpEKxs6c/HxwcvvvgigOdnhC9durRyn/4JRs25f/++wSeK9u3b2zTu4qKgw7slBw4cwN/+9jesW7fO4E3dkerUqaMs23qqhuKkKG1vUyxtn8J4wde6ora99+7dq/zS1NFhDuD8Lkj6792Abe+3+u/dTk5Oygf2ChUqGPzIzZa2srOzDf5X1b53F/sw17VrV2U5NTUVV69etVpH/3qoPXv2VNVfly5dDNZ///13Vf116dJFuVyPu7s7/vKXv9jdlouLC7p162a1TnFS0OHdnDNnzmDIkCFYsmQJRowYkef2zNG//qP+97NKiqKyvc2xtH0K4wVf64ra9s45xNq0adN82cNb0ud3QWrTpo3BtZHVvt82b97cYBvpZw9b2oqMjDTY+6o2exT7MNemTRuDy7mcO3fOYvm0tDSDs6qr/bRVuXJlgwBmrT/g+QV+zfU3fPhwVW2dOXNGWQ4KCjJ7/c/iqqDDuymXLl1C79698f7772PSpEl5bs+SR48eAXi+l6ck/dglR1HY3pbkbB/g+XeqcivoF3ytK0rbW/9XrPmxVw7g/C5Ibm5uBucIVPt+a+m9OyIiwuA669baqlevnk3XajYgJcAnn3wiAASADBs2zGLZrVu3KmWbN29uV38//vij0kbr1q0tlg0LC1PKVqhQQdLS0gzuT05OFh8fH6XMH3/8YbG9Zs2aKWW3bdtm1/i1LDs7W2rVqqU8Bz/++KPF8qmpqeLh4aGUP3bsWJ76v379ulSuXFlmzZplU/nHjx/LgQMH7O6vSZMmAkDeffddu9vQssLe3tZs3rxZAIi7u7vExcUZ3X/o0CFlLKVLl5b09HSL7a1Zs0YpX69evfwadpFVlLb3tm3blHYjIiIc1q6+kj6/zencubPy3I8ZM8Zh7R47dkxpt1KlSpKdnW227KNHj8TFxUWZ37GxsQb363Q6qV+/vtLerl27LPY9YMAApeySJUtUj71EhLmnT59KpUqVlBfMhw8fmi3br18/5QndvHmz0f0RERHy0ksvibe3t0yYMMHkxs7IyFAmoZOTk1y6dMlsf1OnTlX6+/LLL02WmT9/vlLm73//u9m2zp07ZxBEs7KyzJYtzgo6vOe4d++e1K5dW8aPH29zneHDh8u+ffvs6u/MmTMCQFxdXeXPP/+0q43ioLC2ty369OkjAOT11183eX9Bv+AXB0Vle7/22msCQBo0aODQdnNwfpv34osvKtt11KhRNtWJjo6WoKAg8fLyksGDB0tSUpLJcj179rRpPn755ZdKualTp5os87///c+m/9V79+4pHzqqVKki8fHxNj0mfSUizImIbNmyRXlSJ0+ebLLM0aNHlTLDhw83WUb/EwEAWb9+vclyZ86cEVdXVwEgffv2NVnm2rVr4u7uLgCkffv2Zj8FZGZmyksvvaSE0du3bxuV0el00q1bNwEgbm5ucvHiRZNtlQQFHd5Fnu9ha9y4sQwdOtRqiNbpdHL9+nUZO3as1KxZ06DNP//8U77//nvZu3evZGZmmm0jOztbOnToIADk448/tthfcVeQ2zszM1O2bt0qa9euNTkP9e3du1cASMWKFY0+tesryBf84qAw5nduqamp4u3tLQBk9uzZNo+d8zvvoqKiDPa2NmvWTHQ6ndV6Y8aMMXjvnjdvnslyt2/fVrZtixYtJCMjw6jMkydPpHz58gJA6tSpYzYYivzfBzAnJyc5ffq0yTLjxo1TxrV7926rj8WUEhPmRERCQkKU3aJffvmlwT9AWFiYVKhQQQBInz59JDEx0WQbHTt2NPiHWL16tdn+du7cKaVLlxYAMmPGDIN/ihs3bkjDhg2VQ7GWXuxFRB48eCCtWrUSAPLiiy/KrVu3lPvS09NlypQpAkDKlCkjP//8s43PSPFVkOE9ISFB2Tbu7u7i4eFh8ZbzPwhA/vnPfyrtJCcnKy8iAKR+/fpy/Phxk+OaNm2acojBlhey4q6gtrf+XiEXFxeZPn26pKamGrVz5coVKV++vPj5+cm5c+esjr+gXvCLi4L+cJ7b9u3blTq2bF8Rzu+8SE5OlmvXrsnWrVuVHRv6t9GjR8uRI0fk9u3bRl9VyjFq1CiDOh9++KHZ/k6fPq2Etb/+9a8GeeDBgwdK0K5Xr57VvaZJSUnSu3dvASCVK1eW33//XbkvOztbOfLm6uoqK1euVPnM/J8SFeZEnn9HpWnTpgJAGjVqJCNHjpSuXbuKs7Oz+Pj4yLx58yzuWblw4YI0a9ZMvLy8ZOzYsRY/XYk8D4k5AbBmzZoyYsQI6dOnj3h4eIinp6dMnz5dUlJSbBp7UlKSTJ8+XTw9PcXT01P69OkjI0aMkBo1aggA6dixY4neI5dbQYT31NRUozcEW2/Ozs4SFRWltJWQkGDwiTNngs+ZM0diY2MlOztbwsLCpF+/fuLu7i7z5s3jC72egtjes2fPNtqOzZo1kwMHDkh2drbEx8fL8uXLxdfXV1q0aCGRkZE2jb2gXvCLk4L+cK5v5MiRAkBq1apl83g5v+337bff2vy6au674rdv35YOHTpImTJlZMCAAVb3bN+4cUP69u0rTk5OUrFiRRk2bJgMHDhQfHx8xNXVVUaPHi2PHz+2afyZmZny8ccfi4+Pj7i4uEi3bt1k5MiRyg6dpk2byuHDh1U+K4ZKXJjLERYWJitWrJBPP/1UFi9eLLt377Y5VNkjMjJSVq9eLZ999pl88cUXsm3bNpNfiLbFs2fPZOvWrfLFF1/IZ599JqtXr7b5TaOkye/wPnbsWLuCHADp3bu3UX+bNm2SihUrmizv6uqqjIPb27T83t5xcXEycOBAcXZ2Nrl9gOeHZlauXKn6O6sF8YJf3BT0h3MRkbS0NOVHae+8846q8XJ+a8/t27flxx9/lPnz58uCBQtkw4YN8uDBA7vaSk5Olt27d8vixYvl008/lRUrVkhYWJhDxukkUkKu0kwl2oULF3DmzBk8fvwYpUqVQkBAAIKCglCqVKnCHpoRnU6HEydO4MqVK3jy5AlcXFxQrlw5NGnSBC1atICHh0dhD7HIy+/t/eDBAxw5cgSxsbFISkqCj48PqlWrhtatW6NatWp5ajslJQVHjx7FlStXkJqaCn9/f7Rp0wYtW7Z0yNiLI85vKukY5oiIiIg0rNifNJiIiIioOGOYIyIiItIwhjkiIiIiDWOYIyIiItIwhjkiIiIiDWOYIyIiItIwhjkiIiIiDWOYIyIiItIwhjkiIiIiDWOYIyIiItIwhjkiIiIiDWOYIyIiItIwhjkiIiIiDWOYIyIiItIwhjkiIiIiDfv/AGsn30N/Gv3UAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# from analysis.mpl_styles import *\n",
    "import matplotlib\n",
    "import seaborn as sns\n",
    "\n",
    "# matplotlib.rcParams['mathtext.fontset'] = 'custom'\n",
    "matplotlib.rcParams[\"mathtext.rm\"] = \"Bitstream Vera Sans\"\n",
    "matplotlib.rcParams[\"mathtext.it\"] = \"Bitstream Vera Sans:italic\"\n",
    "matplotlib.rcParams[\"mathtext.bf\"] = \"Bitstream Vera Sans:bold\"\n",
    "matplotlib.rcParams[\"mathtext.fontset\"] = \"stix\"\n",
    "matplotlib.rcParams[\"font.family\"] = \"STIXGeneral\"\n",
    "matplotlib.rcParams[\"font.size\"] = \"28\"\n",
    "\n",
    "matplotlib.rcParams[\"axes.spines.right\"] = False\n",
    "matplotlib.rcParams[\"axes.spines.top\"] = False\n",
    "matplotlib.rcParams[\"axes.facecolor\"] = \"white\"\n",
    "matplotlib.rcParams[\"savefig.facecolor\"] = \"white\"\n",
    "\n",
    "palette = sns.color_palette(\"colorblind\")\n",
    "sns.set_palette(palette)\n",
    "ENTITY_COLOR = palette[0]\n",
    "CONTEXT_COLOR = palette[1]\n",
    "\n",
    "OPEN_COLOR = palette[2]\n",
    "CLOSED_COLOR = palette[3]\n",
    "LINE_COLOR = palette[7]\n",
    "\n",
    "REAL_COLOR = palette[5]\n",
    "FAKE_COLOR = palette[7]\n",
    "DIFF_COLOR = palette[6]\n",
    "\n",
    "FT_COLOR = palette[0]\n",
    "ICL_COLOR = palette[1]\n",
    "ZS_COLOR = palette[2]\n",
    "\n",
    "train_methods_colormap = {\n",
    "    \"finetune\": FT_COLOR,\n",
    "    \"few_shot\": ICL_COLOR,\n",
    "    \"zero-shot\": ZS_COLOR,\n",
    "    \"FT\": FT_COLOR,\n",
    "    \"IC\": ICL_COLOR,\n",
    "    \"ZS\": ZS_COLOR,\n",
    "}\n",
    "\n",
    "\n",
    "matplotlib.pyplot.title(r\"ABC123 vs $\\mathrm{ABC123}^{123}$\")\n",
    "matplotlib.rcParams[\"mathtext.rm\"]\n",
    "\n",
    "cm = sns.color_palette(\"coolwarm_r\", as_cmap=True)\n",
    "cm.set_bad(cm(-np.inf))\n",
    "print(matplotlib.colors.to_hex(cm(-np.inf)))\n",
    "cm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Parameters ###\n",
    "##################\n",
    "\n",
    "# Data parameters\n",
    "\n",
    "# wandb stuff\n",
    "PROJECT_NAME = \"sftcontext\"\n",
    "GROUP_NAME = None\n",
    "TAGS = [\"basefakepedia\", \"analysis\", \"summarize\", \"across-models\"]\n",
    "LOG_DATASETS = True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Construct dataframes for analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "analysis_dir = \"analysis/summarize/generalization_datasets\"\n",
    "os.makedirs(analysis_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[EvalConfig(dataset_name='BaseFakepedia', subsplit='nodup_relpid', k_demonstrations=0, context_weight_format='instruction', do_steering=False), EvalConfig(dataset_name='MultihopFakepedia', subsplit='nodup_relpid', k_demonstrations=0, context_weight_format='instruction', do_steering=False), EvalConfig(dataset_name='Arithmetic', subsplit='d2ub9', k_demonstrations=0, context_weight_format='instruction', do_steering=False), EvalConfig(dataset_name='BaseFakepedia', subsplit='nodup_relpid', k_demonstrations=10, context_weight_format='instruction', do_steering=False), EvalConfig(dataset_name='MultihopFakepedia', subsplit='nodup_relpid', k_demonstrations=10, context_weight_format='instruction', do_steering=False), EvalConfig(dataset_name='Arithmetic', subsplit='d2ub9', k_demonstrations=10, context_weight_format='instruction', do_steering=False)]\n"
     ]
    }
   ],
   "source": [
    "dataset_names = [\"BaseFakepedia\"]\n",
    "zero_shot_evals = [\n",
    "    {\n",
    "        \"dataset_name\": \"BaseFakepedia\",\n",
    "        \"subsplit\": \"nodup_relpid\",\n",
    "        \"k_demonstrations\": 0,\n",
    "        \"context_weight_format\": \"instruction\",\n",
    "        \"do_steering\": False,\n",
    "    },\n",
    "    {\n",
    "        \"dataset_name\": \"MultihopFakepedia\",\n",
    "        \"subsplit\": \"nodup_relpid\",\n",
    "        \"k_demonstrations\": 0,\n",
    "        \"context_weight_format\": \"instruction\",\n",
    "        \"do_steering\": False,\n",
    "    },\n",
    "    {\n",
    "        \"dataset_name\": \"Arithmetic\",\n",
    "        \"subsplit\": \"d2ub9\",\n",
    "        \"k_demonstrations\": 0,\n",
    "        \"context_weight_format\": \"instruction\",\n",
    "        \"do_steering\": False,\n",
    "    },\n",
    "]\n",
    "few_shot_evals = [\n",
    "    {\n",
    "        \"dataset_name\": \"BaseFakepedia\",\n",
    "        \"subsplit\": \"nodup_relpid\",\n",
    "        \"k_demonstrations\": 10,\n",
    "        \"context_weight_format\": \"instruction\",\n",
    "        \"do_steering\": False,\n",
    "    },\n",
    "    {\n",
    "        \"dataset_name\": \"MultihopFakepedia\",\n",
    "        \"subsplit\": \"nodup_relpid\",\n",
    "        \"k_demonstrations\": 10,\n",
    "        \"context_weight_format\": \"instruction\",\n",
    "        \"do_steering\": False,\n",
    "    },\n",
    "    {\n",
    "        \"dataset_name\": \"Arithmetic\",\n",
    "        \"subsplit\": \"d2ub9\",\n",
    "        \"k_demonstrations\": 10,\n",
    "        \"context_weight_format\": \"instruction\",\n",
    "        \"do_steering\": False,\n",
    "    },\n",
    "]\n",
    "evals = zero_shot_evals + few_shot_evals\n",
    "evals = [EvalConfig(**eval) for eval in evals]\n",
    "print(evals)\n",
    "subsplit_names = [\n",
    "    \"nodup_relpid\",\n",
    "]\n",
    "seeds = [1, 2, 3]\n",
    "train_sizes = [2048]\n",
    "no_train_statuses = [True, False]\n",
    "# no_train_statuses = [False]\n",
    "peft_modules = [\n",
    "    json.dumps(\n",
    "        [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\"],\n",
    "        # [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
    "        separators=(\",\", \":\"),\n",
    "    ),\n",
    "]\n",
    "context_weight_formats = [\"instruction\"]\n",
    "model_id_and_bs_and_ga_and_quantize_and_peft_tuples = [\n",
    "    (\"Meta-Llama-3.1-8B-Instruct\", 8, 2, None, True),\n",
    "    # (\"Meta-Llama-3.1-8B\", 8, 2, None, True),\n",
    "    # (\"Meta-Llama-3.1-8B\", 4, 2, None, True),\n",
    "    (\"Mistral-7B-Instruct-v0.3\", 8, 2, None, True),\n",
    "    # (\"Mistral-7B-v0.3\", 4, 2, None, True),\n",
    "    (\"gemma-2-9b-it\", 2, 8, None, True),\n",
    "    # (\"gemma-2-9b\", 4, 2, None, True),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "overwrite = False\n",
    "df_dict = []\n",
    "p_scores_df_dict = []\n",
    "metrics_dict = []\n",
    "\n",
    "for ds in dataset_names:\n",
    "    for sp in subsplit_names:\n",
    "        for seed in seeds:\n",
    "            for ts in train_sizes:\n",
    "                for nts in no_train_statuses:\n",
    "                    for pm in peft_modules:\n",
    "                        for cwf in context_weight_formats:\n",
    "                            for (\n",
    "                                model_id,\n",
    "                                bs,\n",
    "                                ga,\n",
    "                                quantize,\n",
    "                                peft,\n",
    "                            ) in model_id_and_bs_and_ga_and_quantize_and_peft_tuples:\n",
    "                                dict_vals = dict(\n",
    "                                    DATASET_NAME=ds,\n",
    "                                    SUBSPLIT=sp,\n",
    "                                    SEED=seed,\n",
    "                                    TRAIN_SIZE=ts,\n",
    "                                    MODEL_ID=model_id,\n",
    "                                    PEFT=peft and not nts,\n",
    "                                    LORA_MODULES=json.loads(pm),\n",
    "                                    LOAD_IN_4BIT=(quantize == \"4bit\"),\n",
    "                                    LOAD_IN_8BIT=(quantize == \"8bit\"),\n",
    "                                    BATCH_SZ=bs,\n",
    "                                    GRAD_ACCUM=ga,\n",
    "                                    NO_TRAIN=nts,\n",
    "                                    CONTEXT_WEIGHT_AT_END=False,\n",
    "                                    CONTEXT_WEIGHT_FORMAT=cwf,\n",
    "                                    ANSWER_FORMAT_PROMPT_POSITION=\"end\",\n",
    "                                    ADD_ANSWER_FORMAT_PROMPT=False,\n",
    "                                )\n",
    "                                (\n",
    "                                    data_dir,\n",
    "                                    input_dir,\n",
    "                                    model_dir,\n",
    "                                    results_dir,\n",
    "                                    val_results_path,\n",
    "                                    data_id,\n",
    "                                    full_model_id,\n",
    "                                    DATASET_KWARGS_IDENTIFIABLE,\n",
    "                                    MODEL_KWARGS_IDENTIFIABLE,\n",
    "                                ) = construct_paths_and_dataset_kwargs(**dict_vals)\n",
    "                                for (\n",
    "                                    eval_name,\n",
    "                                    eval_subsplit,\n",
    "                                    eval_k_demonstrations,\n",
    "                                    eval_ctx_weight_format,\n",
    "                                    eval_do_steering,\n",
    "                                ) in evals:\n",
    "                                    test_results_dir = construct_test_results_dir(\n",
    "                                        base_results_dir=results_dir,\n",
    "                                        subsplit=eval_subsplit,\n",
    "                                        context_weight_format=eval_ctx_weight_format,\n",
    "                                        eval_name=eval_name,\n",
    "                                        k_demonstrations=eval_k_demonstrations,\n",
    "                                        in_domain_demonstrations=False,\n",
    "                                        answer_format_prompt_position=None,\n",
    "                                        add_answer_format_prompt=False,\n",
    "                                        do_steering=False,\n",
    "                                        steering_prior_value=None,\n",
    "                                        steering_context_value=None,\n",
    "                                        steering_layer=None,\n",
    "                                    )\n",
    "                                    test_results_path = os.path.join(\n",
    "                                        test_results_dir, \"test.csv\"\n",
    "                                    )\n",
    "                                    # test_pscore_results_path = os.path.join(test_results_dir, \"test_pscore.csv\")\n",
    "                                    test_metrics_path = os.path.join(\n",
    "                                        test_results_dir, \"metrics.json\"\n",
    "                                    )\n",
    "                                    test_metrics_query_only_path = os.path.join(\n",
    "                                        test_results_dir, \"metrics_query_only.json\"\n",
    "                                    )\n",
    "                                    if os.path.isfile(test_results_path):\n",
    "                                        # Load predictions/results\n",
    "                                        res = pd.read_csv(\n",
    "                                            test_results_path,\n",
    "                                        )\n",
    "                                        for k, v in dict_vals.items():\n",
    "                                            if isinstance(v, list):\n",
    "                                                v = [v] * len(res)\n",
    "                                            res[k] = v\n",
    "                                        scores: List[dict] = res.to_dict(\"records\")\n",
    "                                        df_dict += [\n",
    "                                            {\n",
    "                                                **dict_vals,\n",
    "                                                **{\n",
    "                                                    \"EVAL_NAME\": eval_name,\n",
    "                                                    \"EVAL_K_DEMONSTRATIONS\": eval_k_demonstrations,\n",
    "                                                    \"EVAL_CTX_WEIGHT_FORMAT\": eval_ctx_weight_format,\n",
    "                                                    \"TEACH_METHOD\": (\n",
    "                                                        \"few_shot\"\n",
    "                                                        if nts\n",
    "                                                        else \"finetune\"\n",
    "                                                    ),\n",
    "                                                },\n",
    "                                                **d,\n",
    "                                            }\n",
    "                                            for d in scores\n",
    "                                        ]\n",
    "                                    # if os.path.isfile(test_pscore_results_path):\n",
    "                                    #     # Load predictions/results\n",
    "                                    #     res = pd.read_csv(\n",
    "                                    #         test_pscore_results_path,\n",
    "                                    #     )\n",
    "                                    #     for k, v in dict_vals.items():\n",
    "                                    #         if isinstance(v, list):\n",
    "                                    #             v = [v] * len(res)\n",
    "                                    #         res[k] = v\n",
    "                                    #     p_scores: List[dict] = res.to_dict(\"records\")\n",
    "                                    #     p_scores_df_dict += [{**dict_vals, **{\"EVAL_NAME\": eval_name, \"EVAL_K_DEMONSTRATIONS\": eval_k_demonstrations, \"EVAL_CTX_WEIGHT_FORMAT\": eval_ctx_weight_format, \"TEACH_METHOD\": \"few_shot\" if nts else \"finetune\"}, **d} for d in p_scores]\n",
    "                                    if os.path.isfile(test_metrics_path):\n",
    "                                        # Load metrics\n",
    "                                        metrics = load_dataset_from_path(\n",
    "                                            test_metrics_path\n",
    "                                        )\n",
    "                                        if os.path.isfile(test_metrics_query_only_path):\n",
    "                                            metrics_query_only = load_dataset_from_path(\n",
    "                                                test_metrics_query_only_path\n",
    "                                            )\n",
    "                                            metrics_query_only = {\n",
    "                                                f\"QO_{k}\": v\n",
    "                                                for k, v in metrics_query_only.items()\n",
    "                                            }\n",
    "                                        else:\n",
    "                                            metrics_query_only = {}\n",
    "                                        metrics_dict += [\n",
    "                                            {\n",
    "                                                **dict_vals,\n",
    "                                                **{\n",
    "                                                    \"EVAL_NAME\": eval_name,\n",
    "                                                    \"EVAL_K_DEMONSTRATIONS\": eval_k_demonstrations,\n",
    "                                                    \"EVAL_CTX_WEIGHT_FORMAT\": eval_ctx_weight_format,\n",
    "                                                    \"TEACH_METHOD\": (\n",
    "                                                        \"few_shot\"\n",
    "                                                        if nts\n",
    "                                                        else \"finetune\"\n",
    "                                                    ),\n",
    "                                                },\n",
    "                                                **metrics,\n",
    "                                                **metrics_query_only,\n",
    "                                            }\n",
    "                                        ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 81 entries, 0 to 80\n",
      "Data columns (total 30 columns):\n",
      " #   Column                         Non-Null Count  Dtype  \n",
      "---  ------                         --------------  -----  \n",
      " 0   DATASET_NAME                   81 non-null     object \n",
      " 1   SUBSPLIT                       81 non-null     object \n",
      " 2   SEED                           81 non-null     int64  \n",
      " 3   TRAIN_SIZE                     81 non-null     int64  \n",
      " 4   MODEL_ID                       81 non-null     object \n",
      " 5   PEFT                           81 non-null     bool   \n",
      " 6   LORA_MODULES                   81 non-null     object \n",
      " 7   LOAD_IN_4BIT                   81 non-null     bool   \n",
      " 8   LOAD_IN_8BIT                   81 non-null     bool   \n",
      " 9   BATCH_SZ                       81 non-null     int64  \n",
      " 10  GRAD_ACCUM                     81 non-null     int64  \n",
      " 11  NO_TRAIN                       81 non-null     bool   \n",
      " 12  CONTEXT_WEIGHT_AT_END          81 non-null     bool   \n",
      " 13  CONTEXT_WEIGHT_FORMAT          81 non-null     object \n",
      " 14  ANSWER_FORMAT_PROMPT_POSITION  81 non-null     object \n",
      " 15  ADD_ANSWER_FORMAT_PROMPT       81 non-null     bool   \n",
      " 16  EVAL_NAME                      81 non-null     object \n",
      " 17  EVAL_K_DEMONSTRATIONS          81 non-null     int64  \n",
      " 18  EVAL_CTX_WEIGHT_FORMAT         81 non-null     object \n",
      " 19  TEACH_METHOD                   81 non-null     object \n",
      " 20  acc                            81 non-null     float64\n",
      " 21  context_acc                    81 non-null     float64\n",
      " 22  context_mr                     80 non-null     float64\n",
      " 23  context_pct_other              81 non-null     float64\n",
      " 24  overall_mr                     80 non-null     float64\n",
      " 25  overall_pct_other              81 non-null     float64\n",
      " 26  pair_acc                       81 non-null     float64\n",
      " 27  prior_acc                      81 non-null     float64\n",
      " 28  prior_mr                       80 non-null     float64\n",
      " 29  prior_pct_other                81 non-null     float64\n",
      "dtypes: bool(6), float64(10), int64(5), object(9)\n",
      "memory usage: 15.8+ KB\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['DATASET_NAME', 'SUBSPLIT', 'SEED', 'TRAIN_SIZE', 'MODEL_ID', 'PEFT',\n",
       "       'LORA_MODULES', 'LOAD_IN_4BIT', 'LOAD_IN_8BIT', 'BATCH_SZ',\n",
       "       'GRAD_ACCUM', 'NO_TRAIN', 'CONTEXT_WEIGHT_AT_END',\n",
       "       'CONTEXT_WEIGHT_FORMAT', 'ANSWER_FORMAT_PROMPT_POSITION',\n",
       "       'ADD_ANSWER_FORMAT_PROMPT', 'EVAL_NAME', 'EVAL_K_DEMONSTRATIONS',\n",
       "       'EVAL_CTX_WEIGHT_FORMAT', 'TEACH_METHOD', 'acc', 'context_acc',\n",
       "       'context_mr', 'context_pct_other', 'overall_mr', 'overall_pct_other',\n",
       "       'pair_acc', 'prior_acc', 'prior_mr', 'prior_pct_other'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df = pd.DataFrame(metrics_dict)\n",
    "# [[\"EVAL_NAME\", \"TEACH_METHOD\", \"accuracy\", \"QO_accuracy\", \"pair_accuracy\"]]\n",
    "metrics_df.info()\n",
    "metrics_df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_57298/2532063031.py:14: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  metrics_df_short[\"TEACH_METHOD\"] = metrics_df_short.apply(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SEED</th>\n",
       "      <th>Model</th>\n",
       "      <th>Test Dataset</th>\n",
       "      <th>EVAL_K_DEMONSTRATIONS</th>\n",
       "      <th>Train/Eval Setting</th>\n",
       "      <th>pair_acc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>MultihopFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>MultihopFakepedia</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>3</td>\n",
       "      <td>Mistral-7B-Instruct-v0.3</td>\n",
       "      <td>MultihopFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>3</td>\n",
       "      <td>Mistral-7B-Instruct-v0.3</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>MultihopFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.796</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.654</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>81 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    SEED                       Model       Test Dataset  \\\n",
       "0      1  Meta-Llama-3.1-8B-Instruct      BaseFakepedia   \n",
       "1      1  Meta-Llama-3.1-8B-Instruct  MultihopFakepedia   \n",
       "2      1  Meta-Llama-3.1-8B-Instruct         Arithmetic   \n",
       "3      1  Meta-Llama-3.1-8B-Instruct      BaseFakepedia   \n",
       "4      1  Meta-Llama-3.1-8B-Instruct  MultihopFakepedia   \n",
       "..   ...                         ...                ...   \n",
       "76     3    Mistral-7B-Instruct-v0.3  MultihopFakepedia   \n",
       "77     3    Mistral-7B-Instruct-v0.3         Arithmetic   \n",
       "78     3               gemma-2-9b-it      BaseFakepedia   \n",
       "79     3               gemma-2-9b-it  MultihopFakepedia   \n",
       "80     3               gemma-2-9b-it         Arithmetic   \n",
       "\n",
       "    EVAL_K_DEMONSTRATIONS Train/Eval Setting  pair_acc  \n",
       "0                       0          zero shot     0.448  \n",
       "1                       0          zero shot     0.400  \n",
       "2                       0          zero shot     0.312  \n",
       "3                      10           few_shot     0.868  \n",
       "4                      10           few_shot     0.714  \n",
       "..                    ...                ...       ...  \n",
       "76                      0           finetune     0.258  \n",
       "77                      0           finetune     0.252  \n",
       "78                      0           finetune     0.844  \n",
       "79                      0           finetune     0.796  \n",
       "80                      0           finetune     0.654  \n",
       "\n",
       "[81 rows x 6 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df_short = metrics_df[\n",
    "    [\n",
    "        # \"DATASET_NAME\",\n",
    "        \"SEED\",\n",
    "        \"MODEL_ID\",\n",
    "        # \"NO_TRAIN\",\n",
    "        \"EVAL_NAME\",\n",
    "        \"EVAL_K_DEMONSTRATIONS\",\n",
    "        \"TEACH_METHOD\",\n",
    "        # \"acc\",\n",
    "        \"pair_acc\",\n",
    "    ]\n",
    "]\n",
    "metrics_df_short[\"TEACH_METHOD\"] = metrics_df_short.apply(\n",
    "    lambda x: (\n",
    "        \"zero shot\"\n",
    "        if x[\"TEACH_METHOD\"] == \"few_shot\" and x[\"EVAL_K_DEMONSTRATIONS\"] == 0\n",
    "        else x[\"TEACH_METHOD\"]\n",
    "    ),\n",
    "    axis=1,\n",
    ")\n",
    "metrics_df_short = metrics_df_short.rename(\n",
    "    columns={\n",
    "        \"MODEL_ID\": \"Model\",\n",
    "        \"EVAL_NAME\": \"Test Dataset\",\n",
    "        \"TEACH_METHOD\": \"Train/Eval Setting\",\n",
    "    }\n",
    ")\n",
    "metrics_df_short\n",
    "# metrics_df_short[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SEED</th>\n",
       "      <th>Model</th>\n",
       "      <th>Test Dataset</th>\n",
       "      <th>EVAL_K_DEMONSTRATIONS</th>\n",
       "      <th>Train/Eval Setting</th>\n",
       "      <th>pair_acc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>3</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.862</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>2</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>2</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.654</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>81 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    SEED                       Model   Test Dataset  EVAL_K_DEMONSTRATIONS  \\\n",
       "0      1  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                      0   \n",
       "27     2  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                      0   \n",
       "54     3  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                      0   \n",
       "3      1  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                     10   \n",
       "30     2  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                     10   \n",
       "..   ...                         ...            ...                    ...   \n",
       "44     2               gemma-2-9b-it     Arithmetic                     10   \n",
       "71     3               gemma-2-9b-it     Arithmetic                     10   \n",
       "26     1               gemma-2-9b-it     Arithmetic                      0   \n",
       "53     2               gemma-2-9b-it     Arithmetic                      0   \n",
       "80     3               gemma-2-9b-it     Arithmetic                      0   \n",
       "\n",
       "   Train/Eval Setting  pair_acc  \n",
       "0           zero shot     0.448  \n",
       "27          zero shot     0.448  \n",
       "54          zero shot     0.448  \n",
       "3            few_shot     0.868  \n",
       "30           few_shot     0.862  \n",
       "..                ...       ...  \n",
       "44           few_shot     0.464  \n",
       "71           few_shot     0.440  \n",
       "26           finetune     0.602  \n",
       "53           finetune     0.626  \n",
       "80           finetune     0.654  \n",
       "\n",
       "[81 rows x 6 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define the desired order for Test Dataset and Train/Eval Setting\n",
    "dataset_order = [\"BaseFakepedia\", \"MultihopFakepedia\", \"Arithmetic\"]\n",
    "setting_order = [\"zero shot\", \"few_shot\", \"finetune\"]\n",
    "\n",
    "# Create category datatypes with ordered categories\n",
    "metrics_df_short[\"Test Dataset\"] = pd.Categorical(\n",
    "    metrics_df_short[\"Test Dataset\"], categories=dataset_order, ordered=True\n",
    ")\n",
    "metrics_df_short[\"Train/Eval Setting\"] = pd.Categorical(\n",
    "    metrics_df_short[\"Train/Eval Setting\"], categories=setting_order, ordered=True\n",
    ")\n",
    "\n",
    "# Sort the DataFrame\n",
    "metrics_df_short = metrics_df_short.sort_values(\n",
    "    [\"Model\", \"Test Dataset\", \"Train/Eval Setting\"]\n",
    ")\n",
    "metrics_df_short"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_57298/2733578268.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
      "  metrics_df_avg = metrics_df_short.groupby(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>pair_acc_mean</th>\n",
       "      <th>pair_acc_std</th>\n",
       "      <th>pair_acc_sem</th>\n",
       "      <th>Pair Acc (\\pm SD)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Model</th>\n",
       "      <th>Test Dataset</th>\n",
       "      <th>Train/Eval Setting</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"9\" valign=\"top\">Meta-Llama-3.1-8B-Instruct</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">BaseFakepedia</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.448000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.45 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.884667</td>\n",
       "      <td>0.034196</td>\n",
       "      <td>0.019743</td>\n",
       "      <td>0.88 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.931333</td>\n",
       "      <td>0.016042</td>\n",
       "      <td>0.009262</td>\n",
       "      <td>0.93 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">MultihopFakepedia</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.400000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.40 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.650667</td>\n",
       "      <td>0.111433</td>\n",
       "      <td>0.064336</td>\n",
       "      <td>0.65 \\pm 0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.881333</td>\n",
       "      <td>0.032021</td>\n",
       "      <td>0.018487</td>\n",
       "      <td>0.88 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Arithmetic</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.312000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.31 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.410667</td>\n",
       "      <td>0.024111</td>\n",
       "      <td>0.013920</td>\n",
       "      <td>0.41 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.539333</td>\n",
       "      <td>0.006429</td>\n",
       "      <td>0.003712</td>\n",
       "      <td>0.54 \\pm 0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"9\" valign=\"top\">Mistral-7B-Instruct-v0.3</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">BaseFakepedia</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.114000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.11 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.338667</td>\n",
       "      <td>0.024111</td>\n",
       "      <td>0.013920</td>\n",
       "      <td>0.34 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.920000</td>\n",
       "      <td>0.025534</td>\n",
       "      <td>0.014742</td>\n",
       "      <td>0.92 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">MultihopFakepedia</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.142000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.14 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.202000</td>\n",
       "      <td>0.027055</td>\n",
       "      <td>0.015620</td>\n",
       "      <td>0.20 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.414000</td>\n",
       "      <td>0.136880</td>\n",
       "      <td>0.079027</td>\n",
       "      <td>0.41 \\pm 0.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Arithmetic</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.088000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.09 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.148667</td>\n",
       "      <td>0.021385</td>\n",
       "      <td>0.012347</td>\n",
       "      <td>0.15 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.302667</td>\n",
       "      <td>0.045490</td>\n",
       "      <td>0.026264</td>\n",
       "      <td>0.30 \\pm 0.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"9\" valign=\"top\">gemma-2-9b-it</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">BaseFakepedia</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.446667</td>\n",
       "      <td>0.001155</td>\n",
       "      <td>0.000667</td>\n",
       "      <td>0.45 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.825333</td>\n",
       "      <td>0.046576</td>\n",
       "      <td>0.026891</td>\n",
       "      <td>0.83 \\pm 0.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.844000</td>\n",
       "      <td>0.002000</td>\n",
       "      <td>0.001155</td>\n",
       "      <td>0.84 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">MultihopFakepedia</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.606667</td>\n",
       "      <td>0.004619</td>\n",
       "      <td>0.002667</td>\n",
       "      <td>0.61 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.829333</td>\n",
       "      <td>0.025716</td>\n",
       "      <td>0.014847</td>\n",
       "      <td>0.83 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.783333</td>\n",
       "      <td>0.018583</td>\n",
       "      <td>0.010729</td>\n",
       "      <td>0.78 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Arithmetic</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.548667</td>\n",
       "      <td>0.001155</td>\n",
       "      <td>0.000667</td>\n",
       "      <td>0.55 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.434667</td>\n",
       "      <td>0.032332</td>\n",
       "      <td>0.018667</td>\n",
       "      <td>0.43 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.627333</td>\n",
       "      <td>0.026026</td>\n",
       "      <td>0.015026</td>\n",
       "      <td>0.63 \\pm 0.03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                 pair_acc_mean  \\\n",
       "Model                      Test Dataset      Train/Eval Setting                  \n",
       "Meta-Llama-3.1-8B-Instruct BaseFakepedia     zero shot                0.448000   \n",
       "                                             few_shot                 0.884667   \n",
       "                                             finetune                 0.931333   \n",
       "                           MultihopFakepedia zero shot                0.400000   \n",
       "                                             few_shot                 0.650667   \n",
       "                                             finetune                 0.881333   \n",
       "                           Arithmetic        zero shot                0.312000   \n",
       "                                             few_shot                 0.410667   \n",
       "                                             finetune                 0.539333   \n",
       "Mistral-7B-Instruct-v0.3   BaseFakepedia     zero shot                0.114000   \n",
       "                                             few_shot                 0.338667   \n",
       "                                             finetune                 0.920000   \n",
       "                           MultihopFakepedia zero shot                0.142000   \n",
       "                                             few_shot                 0.202000   \n",
       "                                             finetune                 0.414000   \n",
       "                           Arithmetic        zero shot                0.088000   \n",
       "                                             few_shot                 0.148667   \n",
       "                                             finetune                 0.302667   \n",
       "gemma-2-9b-it              BaseFakepedia     zero shot                0.446667   \n",
       "                                             few_shot                 0.825333   \n",
       "                                             finetune                 0.844000   \n",
       "                           MultihopFakepedia zero shot                0.606667   \n",
       "                                             few_shot                 0.829333   \n",
       "                                             finetune                 0.783333   \n",
       "                           Arithmetic        zero shot                0.548667   \n",
       "                                             few_shot                 0.434667   \n",
       "                                             finetune                 0.627333   \n",
       "\n",
       "                                                                 pair_acc_std  \\\n",
       "Model                      Test Dataset      Train/Eval Setting                 \n",
       "Meta-Llama-3.1-8B-Instruct BaseFakepedia     zero shot               0.000000   \n",
       "                                             few_shot                0.034196   \n",
       "                                             finetune                0.016042   \n",
       "                           MultihopFakepedia zero shot               0.000000   \n",
       "                                             few_shot                0.111433   \n",
       "                                             finetune                0.032021   \n",
       "                           Arithmetic        zero shot               0.000000   \n",
       "                                             few_shot                0.024111   \n",
       "                                             finetune                0.006429   \n",
       "Mistral-7B-Instruct-v0.3   BaseFakepedia     zero shot               0.000000   \n",
       "                                             few_shot                0.024111   \n",
       "                                             finetune                0.025534   \n",
       "                           MultihopFakepedia zero shot               0.000000   \n",
       "                                             few_shot                0.027055   \n",
       "                                             finetune                0.136880   \n",
       "                           Arithmetic        zero shot               0.000000   \n",
       "                                             few_shot                0.021385   \n",
       "                                             finetune                0.045490   \n",
       "gemma-2-9b-it              BaseFakepedia     zero shot               0.001155   \n",
       "                                             few_shot                0.046576   \n",
       "                                             finetune                0.002000   \n",
       "                           MultihopFakepedia zero shot               0.004619   \n",
       "                                             few_shot                0.025716   \n",
       "                                             finetune                0.018583   \n",
       "                           Arithmetic        zero shot               0.001155   \n",
       "                                             few_shot                0.032332   \n",
       "                                             finetune                0.026026   \n",
       "\n",
       "                                                                 pair_acc_sem  \\\n",
       "Model                      Test Dataset      Train/Eval Setting                 \n",
       "Meta-Llama-3.1-8B-Instruct BaseFakepedia     zero shot               0.000000   \n",
       "                                             few_shot                0.019743   \n",
       "                                             finetune                0.009262   \n",
       "                           MultihopFakepedia zero shot               0.000000   \n",
       "                                             few_shot                0.064336   \n",
       "                                             finetune                0.018487   \n",
       "                           Arithmetic        zero shot               0.000000   \n",
       "                                             few_shot                0.013920   \n",
       "                                             finetune                0.003712   \n",
       "Mistral-7B-Instruct-v0.3   BaseFakepedia     zero shot               0.000000   \n",
       "                                             few_shot                0.013920   \n",
       "                                             finetune                0.014742   \n",
       "                           MultihopFakepedia zero shot               0.000000   \n",
       "                                             few_shot                0.015620   \n",
       "                                             finetune                0.079027   \n",
       "                           Arithmetic        zero shot               0.000000   \n",
       "                                             few_shot                0.012347   \n",
       "                                             finetune                0.026264   \n",
       "gemma-2-9b-it              BaseFakepedia     zero shot               0.000667   \n",
       "                                             few_shot                0.026891   \n",
       "                                             finetune                0.001155   \n",
       "                           MultihopFakepedia zero shot               0.002667   \n",
       "                                             few_shot                0.014847   \n",
       "                                             finetune                0.010729   \n",
       "                           Arithmetic        zero shot               0.000667   \n",
       "                                             few_shot                0.018667   \n",
       "                                             finetune                0.015026   \n",
       "\n",
       "                                                                Pair Acc (\\pm SD)  \n",
       "Model                      Test Dataset      Train/Eval Setting                    \n",
       "Meta-Llama-3.1-8B-Instruct BaseFakepedia     zero shot              0.45 \\pm 0.00  \n",
       "                                             few_shot               0.88 \\pm 0.03  \n",
       "                                             finetune               0.93 \\pm 0.02  \n",
       "                           MultihopFakepedia zero shot              0.40 \\pm 0.00  \n",
       "                                             few_shot               0.65 \\pm 0.11  \n",
       "                                             finetune               0.88 \\pm 0.03  \n",
       "                           Arithmetic        zero shot              0.31 \\pm 0.00  \n",
       "                                             few_shot               0.41 \\pm 0.02  \n",
       "                                             finetune               0.54 \\pm 0.01  \n",
       "Mistral-7B-Instruct-v0.3   BaseFakepedia     zero shot              0.11 \\pm 0.00  \n",
       "                                             few_shot               0.34 \\pm 0.02  \n",
       "                                             finetune               0.92 \\pm 0.03  \n",
       "                           MultihopFakepedia zero shot              0.14 \\pm 0.00  \n",
       "                                             few_shot               0.20 \\pm 0.03  \n",
       "                                             finetune               0.41 \\pm 0.14  \n",
       "                           Arithmetic        zero shot              0.09 \\pm 0.00  \n",
       "                                             few_shot               0.15 \\pm 0.02  \n",
       "                                             finetune               0.30 \\pm 0.05  \n",
       "gemma-2-9b-it              BaseFakepedia     zero shot              0.45 \\pm 0.00  \n",
       "                                             few_shot               0.83 \\pm 0.05  \n",
       "                                             finetune               0.84 \\pm 0.00  \n",
       "                           MultihopFakepedia zero shot              0.61 \\pm 0.00  \n",
       "                                             few_shot               0.83 \\pm 0.03  \n",
       "                                             finetune               0.78 \\pm 0.02  \n",
       "                           Arithmetic        zero shot              0.55 \\pm 0.00  \n",
       "                                             few_shot               0.43 \\pm 0.03  \n",
       "                                             finetune               0.63 \\pm 0.03  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df_avg = metrics_df_short.groupby(\n",
    "    [\n",
    "        \"Model\",\n",
    "        \"Test Dataset\",\n",
    "        \"Train/Eval Setting\",\n",
    "    ]\n",
    ").agg(\n",
    "    pair_acc_mean=(\"pair_acc\", \"mean\"),\n",
    "    pair_acc_std=(\"pair_acc\", \"std\"),\n",
    "    pair_acc_sem=(\"pair_acc\", \"sem\"),\n",
    ")\n",
    "# metrics_df_avg[\"pair_acc_mean\"]\n",
    "metrics_df_avg[\"Pair Acc (\\pm SD)\"] = metrics_df_avg.apply(\n",
    "    lambda x: f\"{x['pair_acc_mean']:.2f} \\pm {x['pair_acc_std']:.2f}\", axis=1\n",
    ")\n",
    "metrics_df_avg\n",
    "# metrics_df_avg.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Test Dataset</th>\n",
       "      <th>BaseFakepedia</th>\n",
       "      <th>MultihopFakepedia</th>\n",
       "      <th>Arithmetic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Model</th>\n",
       "      <th>Train/Eval Setting</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Meta-Llama-3.1-8B-Instruct</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.45 \\pm 0.00</td>\n",
       "      <td>0.40 \\pm 0.00</td>\n",
       "      <td>0.31 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.88 \\pm 0.03</td>\n",
       "      <td>0.65 \\pm 0.11</td>\n",
       "      <td>0.41 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.93 \\pm 0.02</td>\n",
       "      <td>0.88 \\pm 0.03</td>\n",
       "      <td>0.54 \\pm 0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Mistral-7B-Instruct-v0.3</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.11 \\pm 0.00</td>\n",
       "      <td>0.14 \\pm 0.00</td>\n",
       "      <td>0.09 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.34 \\pm 0.02</td>\n",
       "      <td>0.20 \\pm 0.03</td>\n",
       "      <td>0.15 \\pm 0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.92 \\pm 0.03</td>\n",
       "      <td>0.41 \\pm 0.14</td>\n",
       "      <td>0.30 \\pm 0.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">gemma-2-9b-it</th>\n",
       "      <th>zero shot</th>\n",
       "      <td>0.45 \\pm 0.00</td>\n",
       "      <td>0.61 \\pm 0.00</td>\n",
       "      <td>0.55 \\pm 0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>few_shot</th>\n",
       "      <td>0.83 \\pm 0.05</td>\n",
       "      <td>0.83 \\pm 0.03</td>\n",
       "      <td>0.43 \\pm 0.03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>finetune</th>\n",
       "      <td>0.84 \\pm 0.00</td>\n",
       "      <td>0.78 \\pm 0.02</td>\n",
       "      <td>0.63 \\pm 0.03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Test Dataset                                   BaseFakepedia  \\\n",
       "Model                      Train/Eval Setting                  \n",
       "Meta-Llama-3.1-8B-Instruct zero shot           0.45 \\pm 0.00   \n",
       "                           few_shot            0.88 \\pm 0.03   \n",
       "                           finetune            0.93 \\pm 0.02   \n",
       "Mistral-7B-Instruct-v0.3   zero shot           0.11 \\pm 0.00   \n",
       "                           few_shot            0.34 \\pm 0.02   \n",
       "                           finetune            0.92 \\pm 0.03   \n",
       "gemma-2-9b-it              zero shot           0.45 \\pm 0.00   \n",
       "                           few_shot            0.83 \\pm 0.05   \n",
       "                           finetune            0.84 \\pm 0.00   \n",
       "\n",
       "Test Dataset                                  MultihopFakepedia     Arithmetic  \n",
       "Model                      Train/Eval Setting                                   \n",
       "Meta-Llama-3.1-8B-Instruct zero shot              0.40 \\pm 0.00  0.31 \\pm 0.00  \n",
       "                           few_shot               0.65 \\pm 0.11  0.41 \\pm 0.02  \n",
       "                           finetune               0.88 \\pm 0.03  0.54 \\pm 0.01  \n",
       "Mistral-7B-Instruct-v0.3   zero shot              0.14 \\pm 0.00  0.09 \\pm 0.00  \n",
       "                           few_shot               0.20 \\pm 0.03  0.15 \\pm 0.02  \n",
       "                           finetune               0.41 \\pm 0.14  0.30 \\pm 0.05  \n",
       "gemma-2-9b-it              zero shot              0.61 \\pm 0.00  0.55 \\pm 0.00  \n",
       "                           few_shot               0.83 \\pm 0.03  0.43 \\pm 0.03  \n",
       "                           finetune               0.78 \\pm 0.02  0.63 \\pm 0.03  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df_unpivoted = metrics_df_avg.reset_index().pivot(\n",
    "    index=[\"Model\", \"Train/Eval Setting\"],\n",
    "    columns=\"Test Dataset\",\n",
    "    values=\"Pair Acc (\\pm SD)\",\n",
    ")\n",
    "metrics_df_unpivoted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# metrics_df_unpivoted = metrics_df_avg.reset_index().pivot(\n",
    "#     index=[\"Model\", \"Train/Eval Setting\"],\n",
    "#     columns=\"Test Dataset\",\n",
    "#     values=\"pair_acc_mean\",\n",
    "# )\n",
    "# df = metrics_df_unpivoted.reset_index()\n",
    "# df = df[df[\"Model\"] == \"Meta-Llama-3.1-8B\"][\n",
    "#     [\"Train/Eval Setting\", \"BaseFakepedia\", \"MultihopFakepedia\", \"Arithmetic\"]\n",
    "# ].set_index(\"Train/Eval Setting\")\n",
    "# print(df)\n",
    "\n",
    "# sns.heatmap(\n",
    "#     data=df,\n",
    "#     annot=True,\n",
    "#     # fmt=\"s\",\n",
    "#     cmap=\"coolwarm\",\n",
    "#     cbar_kws={\"label\": \"Pair Accuracy\"},\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{lllll}\n",
      "\\toprule\n",
      " & Test Dataset & BaseFakepedia & MultihopFakepedia & Arithmetic \\\\\n",
      "Model & Train/Eval Setting &  &  &  \\\\\n",
      "\\midrule\n",
      "\\multirow[t]{3}{*}{Meta-Llama-3.1-8B-Instruct} & zero shot & 0.45 \\pm 0.00 & 0.40 \\pm 0.00 & 0.31 \\pm 0.00 \\\\\n",
      " & few_shot & 0.88 \\pm 0.03 & 0.65 \\pm 0.11 & 0.41 \\pm 0.02 \\\\\n",
      " & finetune & 0.93 \\pm 0.02 & 0.88 \\pm 0.03 & 0.54 \\pm 0.01 \\\\\n",
      "\\cline{1-5}\n",
      "\\multirow[t]{3}{*}{Mistral-7B-Instruct-v0.3} & zero shot & 0.11 \\pm 0.00 & 0.14 \\pm 0.00 & 0.09 \\pm 0.00 \\\\\n",
      " & few_shot & 0.34 \\pm 0.02 & 0.20 \\pm 0.03 & 0.15 \\pm 0.02 \\\\\n",
      " & finetune & 0.92 \\pm 0.03 & 0.41 \\pm 0.14 & 0.30 \\pm 0.05 \\\\\n",
      "\\cline{1-5}\n",
      "\\multirow[t]{3}{*}{gemma-2-9b-it} & zero shot & 0.45 \\pm 0.00 & 0.61 \\pm 0.00 & 0.55 \\pm 0.00 \\\\\n",
      " & few_shot & 0.83 \\pm 0.05 & 0.83 \\pm 0.03 & 0.43 \\pm 0.03 \\\\\n",
      " & finetune & 0.84 \\pm 0.00 & 0.78 \\pm 0.02 & 0.63 \\pm 0.03 \\\\\n",
      "\\cline{1-5}\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    metrics_df_unpivoted.to_latex(\n",
    "        multicolumn=True,\n",
    "        multirow=True,\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{lllr}\n",
      "\\toprule\n",
      "Model & Test Dataset & Train/Eval Setting & Pair Acc ($\\pm$ SD) \\\\\n",
      "\\midrule\n",
      "Meta-Llama-3.1-8B-Instruct & BaseFakepedia & zero shot & 0.45 $\\pm$ 0.00 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & BaseFakepedia & few_shot & 0.88 $\\pm$ 0.03 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & BaseFakepedia & finetune & 0.93 $\\pm$ 0.02 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & MultihopFakepedia & zero shot & 0.40 $\\pm$ 0.00 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & MultihopFakepedia & few_shot & 0.65 $\\pm$ 0.11 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & MultihopFakepedia & finetune & 0.88 $\\pm$ 0.03 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & Arithmetic & zero shot & 0.31 $\\pm$ 0.00 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & Arithmetic & few_shot & 0.41 $\\pm$ 0.02 \\\\\n",
      "Meta-Llama-3.1-8B-Instruct & Arithmetic & finetune & 0.54 $\\pm$ 0.01 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & BaseFakepedia & zero shot & 0.11 $\\pm$ 0.00 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & BaseFakepedia & few_shot & 0.34 $\\pm$ 0.02 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & BaseFakepedia & finetune & 0.92 $\\pm$ 0.03 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & MultihopFakepedia & zero shot & 0.14 $\\pm$ 0.00 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & MultihopFakepedia & few_shot & 0.20 $\\pm$ 0.03 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & MultihopFakepedia & finetune & 0.41 $\\pm$ 0.14 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & Arithmetic & zero shot & 0.09 $\\pm$ 0.00 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & Arithmetic & few_shot & 0.15 $\\pm$ 0.02 \\\\\n",
      "Mistral-7B-Instruct-v0.3 & Arithmetic & finetune & 0.30 $\\pm$ 0.05 \\\\\n",
      "gemma-2-9b-it & BaseFakepedia & zero shot & 0.45 $\\pm$ 0.00 \\\\\n",
      "gemma-2-9b-it & BaseFakepedia & few_shot & 0.83 $\\pm$ 0.05 \\\\\n",
      "gemma-2-9b-it & BaseFakepedia & finetune & 0.84 $\\pm$ 0.00 \\\\\n",
      "gemma-2-9b-it & MultihopFakepedia & zero shot & 0.61 $\\pm$ 0.00 \\\\\n",
      "gemma-2-9b-it & MultihopFakepedia & few_shot & 0.83 $\\pm$ 0.03 \\\\\n",
      "gemma-2-9b-it & MultihopFakepedia & finetune & 0.78 $\\pm$ 0.02 \\\\\n",
      "gemma-2-9b-it & Arithmetic & zero shot & 0.55 $\\pm$ 0.00 \\\\\n",
      "gemma-2-9b-it & Arithmetic & few_shot & 0.43 $\\pm$ 0.03 \\\\\n",
      "gemma-2-9b-it & Arithmetic & finetune & 0.63 $\\pm$ 0.03 \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# metrics_df_avg.to_latex()\n",
    "# Convert to LaTeX\n",
    "latex_table = (\n",
    "    metrics_df_avg[\"Pair Acc (\\\\pm SD)\"]\n",
    "    .reset_index()\n",
    "    .to_latex(index=False, escape=False, column_format=\"lllr\")\n",
    ")\n",
    "\n",
    "# Improve the LaTeX output\n",
    "latex_table = latex_table.replace(\"pair\\_acc\\_mean\", \"pair\\\\_acc\\\\_mean\")\n",
    "latex_table = latex_table.replace(\"pair\\_acc\\_std\", \"pair\\\\_acc\\\\_std\")\n",
    "latex_table = latex_table.replace(\"pair\\_acc\\_sem\", \"pair\\\\_acc\\\\_sem\")\n",
    "latex_table = latex_table.replace(\"Train/Eval Setting\", \"Train/Eval Setting\")\n",
    "latex_table = latex_table.replace(\"\\pm\", \"$\\\\pm$\")\n",
    "\n",
    "print(latex_table)\n",
    "\n",
    "# Optionally, save to a file\n",
    "with open(\"table.tex\", \"w\") as f:\n",
    "    f.write(latex_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SEED</th>\n",
       "      <th>Model</th>\n",
       "      <th>Test Dataset</th>\n",
       "      <th>EVAL_K_DEMONSTRATIONS</th>\n",
       "      <th>Train/Eval Setting</th>\n",
       "      <th>pair_acc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>3</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>0</td>\n",
       "      <td>zero shot</td>\n",
       "      <td>0.448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2</td>\n",
       "      <td>Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>BaseFakepedia</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.862</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>2</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>10</td>\n",
       "      <td>few_shot</td>\n",
       "      <td>0.440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>2</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>3</td>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>Arithmetic</td>\n",
       "      <td>0</td>\n",
       "      <td>finetune</td>\n",
       "      <td>0.654</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>81 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    SEED                       Model   Test Dataset  EVAL_K_DEMONSTRATIONS  \\\n",
       "0      1  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                      0   \n",
       "27     2  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                      0   \n",
       "54     3  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                      0   \n",
       "3      1  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                     10   \n",
       "30     2  Meta-Llama-3.1-8B-Instruct  BaseFakepedia                     10   \n",
       "..   ...                         ...            ...                    ...   \n",
       "44     2               gemma-2-9b-it     Arithmetic                     10   \n",
       "71     3               gemma-2-9b-it     Arithmetic                     10   \n",
       "26     1               gemma-2-9b-it     Arithmetic                      0   \n",
       "53     2               gemma-2-9b-it     Arithmetic                      0   \n",
       "80     3               gemma-2-9b-it     Arithmetic                      0   \n",
       "\n",
       "   Train/Eval Setting  pair_acc  \n",
       "0           zero shot     0.448  \n",
       "27          zero shot     0.448  \n",
       "54          zero shot     0.448  \n",
       "3            few_shot     0.868  \n",
       "30           few_shot     0.862  \n",
       "..                ...       ...  \n",
       "44           few_shot     0.464  \n",
       "71           few_shot     0.440  \n",
       "26           finetune     0.602  \n",
       "53           finetune     0.626  \n",
       "80           finetune     0.654  \n",
       "\n",
       "[81 rows x 6 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df_short"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: nbformat in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (5.10.4)\n",
      "Requirement already satisfied: fastjsonschema>=2.15 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from nbformat) (2.20.0)\n",
      "Requirement already satisfied: jsonschema>=2.6 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from nbformat) (4.23.0)\n",
      "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from nbformat) (5.7.2)\n",
      "Requirement already satisfied: traitlets>=5.1 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from nbformat) (5.14.3)\n",
      "Requirement already satisfied: attrs>=22.2.0 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat) (23.2.0)\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat) (2023.12.1)\n",
      "Requirement already satisfied: referencing>=0.28.4 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat) (0.35.1)\n",
      "Requirement already satisfied: rpds-py>=0.7.1 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from jsonschema>=2.6->nbformat) (0.20.0)\n",
      "Requirement already satisfied: platformdirs>=2.5 in /home/kevin/mambaforge/envs/sftcontext/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat) (4.2.2)\n",
      "\u001b[33mDEPRECATION: nb-black 1.0.7 has a non-standard dependency specifier black>='19.3'; python_version >= \"3.6\". pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of nb-black or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install --upgrade nbformat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install git+https://github.com/jkminder/pycolors.git\n",
    "from pycolors import TailwindColorPalette\n",
    "\n",
    "COLORS = TailwindColorPalette()\n",
    "\n",
    "\n",
    "def get_label_color(label, COLORS):\n",
    "    if \"FT\" in label:\n",
    "        return COLORS.get_shade(0, 700)\n",
    "    elif \"FS\" in label or \"ICL\" in label:\n",
    "        return COLORS.get_shade(9, 600)\n",
    "    elif \"ZS\" in label:\n",
    "        return COLORS.get_shade(6, 600)\n",
    "    else:\n",
    "        return \"black\"  # Default color"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_57298/4037465814.py:18: FutureWarning:\n",
      "\n",
      "The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
      "\n",
      "/tmp/ipykernel_57298/4037465814.py:21: FutureWarning:\n",
      "\n",
      "The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['#fd850d', '#cf2e33', '#009ddb']\n"
     ]
    },
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "error_y": {
          "array": [
           0,
           0,
           0
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#fd850d"
         },
         "name": "ZS",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.448,
          0.4000000000000001,
          0.312
         ]
        },
        {
         "error_y": {
          "array": [
           0.034195516275285776,
           0.11143308904151104,
           0.024110855093366818
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#cf2e33"
         },
         "name": "ICL",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.8846666666666666,
          0.6506666666666666,
          0.4106666666666667
         ]
        },
        {
         "error_y": {
          "array": [
           0.016041612554021242,
           0.032020826556060915,
           0.006429100507328626
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#009ddb"
         },
         "name": "FT",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.9313333333333333,
          0.8813333333333334,
          0.5393333333333333
         ]
        }
       ],
       "layout": {
        "barmode": "group",
        "font": {
         "family": "Computer Modern",
         "size": 24
        },
        "legend": {
         "font": {
          "family": "Computer Modern",
          "size": 60
         },
         "traceorder": "normal",
         "x": 0.785,
         "y": 0.98
        },
        "margin": {
         "b": 50,
         "l": 50,
         "r": 50,
         "t": 50
        },
        "showlegend": true,
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "uniformtext": {
         "minsize": 25,
         "mode": "show"
        },
        "xaxis": {
         "tickfont": {
          "family": "Computer Modern",
          "size": 50
         },
         "title": {
          "font": {
           "family": "Computer Modern",
           "size": 72
          },
          "text": "Test Dataset"
         }
        },
        "yaxis": {
         "range": [
          0,
          1
         ],
         "tickfont": {
          "family": "Computer Modern",
          "size": 50
         },
         "title": {
          "font": {
           "family": "Computer Modern",
           "size": 72
          },
          "text": "PairAcc"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_57298/4037465814.py:18: FutureWarning:\n",
      "\n",
      "The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
      "\n",
      "/tmp/ipykernel_57298/4037465814.py:21: FutureWarning:\n",
      "\n",
      "The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['#fd850d', '#cf2e33', '#009ddb']\n"
     ]
    },
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "error_y": {
          "array": [
           0,
           0,
           0
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#fd850d"
         },
         "name": "ZS",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.114,
          0.142,
          0.08800000000000001
         ]
        },
        {
         "error_y": {
          "array": [
           0.024110855093366825,
           0.02705549851693737,
           0.02138535324312726
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#cf2e33"
         },
         "name": "ICL",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.33866666666666667,
          0.20199999999999999,
          0.14866666666666667
         ]
        },
        {
         "error_y": {
          "array": [
           0.025534290669607374,
           0.13687950905814938,
           0.045489925624618635
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#009ddb"
         },
         "name": "FT",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.9199999999999999,
          0.414,
          0.3026666666666667
         ]
        }
       ],
       "layout": {
        "barmode": "group",
        "font": {
         "family": "Computer Modern",
         "size": 24
        },
        "legend": {
         "font": {
          "family": "Computer Modern",
          "size": 60
         },
         "traceorder": "normal",
         "x": 0.785,
         "y": 0.98
        },
        "margin": {
         "b": 50,
         "l": 50,
         "r": 50,
         "t": 50
        },
        "showlegend": true,
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "uniformtext": {
         "minsize": 25,
         "mode": "show"
        },
        "xaxis": {
         "tickfont": {
          "family": "Computer Modern",
          "size": 50
         },
         "title": {
          "font": {
           "family": "Computer Modern",
           "size": 72
          },
          "text": "Test Dataset"
         }
        },
        "yaxis": {
         "range": [
          0,
          1
         ],
         "tickfont": {
          "family": "Computer Modern",
          "size": 50
         },
         "title": {
          "font": {
           "family": "Computer Modern",
           "size": 72
          },
          "text": "PairAcc"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_57298/4037465814.py:18: FutureWarning:\n",
      "\n",
      "The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n",
      "\n",
      "/tmp/ipykernel_57298/4037465814.py:21: FutureWarning:\n",
      "\n",
      "The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['#fd850d', '#cf2e33', '#009ddb']\n"
     ]
    },
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "error_y": {
          "array": [
           0.0011547005383792605,
           0.004618802153516994,
           0.0011547005383792605
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#fd850d"
         },
         "name": "ZS",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.4466666666666667,
          0.6066666666666666,
          0.5486666666666667
         ]
        },
        {
         "error_y": {
          "array": [
           0.04657610259922282,
           0.025716402029314522,
           0.03233161507461903
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#cf2e33"
         },
         "name": "ICL",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.8253333333333334,
          0.8293333333333334,
          0.4346666666666667
         ]
        },
        {
         "error_y": {
          "array": [
           0.0020000000000000018,
           0.018583146486355166,
           0.026025628394590883
          ],
          "thickness": 6,
          "type": "data",
          "visible": true,
          "width": 9
         },
         "marker": {
          "color": "#009ddb"
         },
         "name": "FT",
         "type": "bar",
         "x": [
          "BaseFakepedia",
          "MultihopFakepedia",
          "Arithmetic"
         ],
         "y": [
          0.844,
          0.7833333333333333,
          0.6273333333333334
         ]
        }
       ],
       "layout": {
        "barmode": "group",
        "font": {
         "family": "Computer Modern",
         "size": 24
        },
        "legend": {
         "font": {
          "family": "Computer Modern",
          "size": 60
         },
         "traceorder": "normal",
         "x": 0.785,
         "y": 0.98
        },
        "margin": {
         "b": 50,
         "l": 50,
         "r": 50,
         "t": 50
        },
        "showlegend": true,
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "uniformtext": {
         "minsize": 25,
         "mode": "show"
        },
        "xaxis": {
         "tickfont": {
          "family": "Computer Modern",
          "size": 50
         },
         "title": {
          "font": {
           "family": "Computer Modern",
           "size": 72
          },
          "text": "Test Dataset"
         }
        },
        "yaxis": {
         "range": [
          0,
          1
         ],
         "tickfont": {
          "family": "Computer Modern",
          "size": 50
         },
         "title": {
          "font": {
           "family": "Computer Modern",
           "size": 72
          },
          "text": "PairAcc"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "for model, metrics_df_short_i_only in metrics_df_short.groupby(\"Model\"):\n",
    "    # metrics_df_short_i_only = metrics_df_short[\n",
    "    #     (metrics_df_short[\"Model\"] == \"Meta-Llama-3.1-8B-Instruct\")\n",
    "    # ]\n",
    "    # metrics_df_short_i_only = metrics_df_short[\n",
    "    #     (metrics_df_short[\"Model\"] == \"gemma-2-9b-it\")\n",
    "    # ]\n",
    "    # metrics_df_short_i_only = metrics_df_short_i_only[\n",
    "    #     (metrics_df_short_i_only[\"Model\"] == \"Mistral-7B-Instruct-v0.3\")\n",
    "    # ]\n",
    "    import plotly.graph_objects as go\n",
    "    import pandas as pd\n",
    "\n",
    "    rename_dict = {\"few_shot\": \"ICL\", \"zero shot\": \"ZS\", \"finetune\": \"FT\"}\n",
    "\n",
    "    metrics_df_short_i_only[\"Train Method\"] = metrics_df_short_i_only[\n",
    "        \"Train/Eval Setting\"\n",
    "    ].replace(rename_dict)\n",
    "\n",
    "    aggregated_data = (\n",
    "        metrics_df_short_i_only.groupby([\"Test Dataset\", \"Train Method\"])[\"pair_acc\"]\n",
    "        .agg([\"mean\", \"std\"])\n",
    "        .reset_index()\n",
    "    )\n",
    "\n",
    "    fig = go.Figure()\n",
    "\n",
    "    order = [\"ZS\", \"ICL\", \"FT\"]\n",
    "\n",
    "    def get_custom_color(setting):\n",
    "        return get_label_color(setting, COLORS)\n",
    "\n",
    "    colors = [get_custom_color(setting) for setting in order]\n",
    "    print(colors)\n",
    "\n",
    "    for setting, color in zip(order, colors):\n",
    "        data = aggregated_data[aggregated_data[\"Train Method\"] == setting]\n",
    "        fig.add_trace(\n",
    "            go.Bar(\n",
    "                x=data[\"Test Dataset\"],\n",
    "                y=data[\"mean\"],\n",
    "                name=setting,\n",
    "                error_y=dict(\n",
    "                    type=\"data\",\n",
    "                    array=data[\"std\"],\n",
    "                    visible=True,\n",
    "                    thickness=6,\n",
    "                    width=9,\n",
    "                ),\n",
    "                marker_color=color,\n",
    "            )\n",
    "        )\n",
    "\n",
    "    fig.update_layout(\n",
    "        xaxis_title=\"Test Dataset\",\n",
    "        yaxis_title=\"PairAcc\",\n",
    "        barmode=\"group\",\n",
    "        font=dict(family=\"Computer Modern\", size=24),\n",
    "        margin=dict(l=50, r=50, t=50, b=50),\n",
    "        showlegend=True,\n",
    "    )\n",
    "\n",
    "    fig.update_xaxes(\n",
    "        title_font=dict(family=\"Computer Modern\", size=72),\n",
    "        tickfont=dict(family=\"Computer Modern\", size=50),\n",
    "    )\n",
    "    fig.update_yaxes(\n",
    "        title_font=dict(family=\"Computer Modern\", size=72),\n",
    "        tickfont=dict(family=\"Computer Modern\", size=50),\n",
    "    )\n",
    "\n",
    "    fig.update_layout(\n",
    "        legend=dict(\n",
    "            x=0.785,\n",
    "            y=0.98,\n",
    "            traceorder=\"normal\",\n",
    "            font=dict(\n",
    "                family=\"Computer Modern\",\n",
    "                size=60,\n",
    "                # color=\"black\"\n",
    "            ),\n",
    "        )\n",
    "    )\n",
    "    # legend=dict(font=dict(family=\"Computer Modern\", size=60)))\n",
    "\n",
    "    fig.update_layout(\n",
    "        uniformtext_minsize=25,\n",
    "        uniformtext_mode=\"show\",\n",
    "        yaxis_range=[0, 1],\n",
    "    )\n",
    "    fig.write_image(f\"{model}_generalization_dataset.pdf\", format=\"pdf\")\n",
    "    fig.write_image(\n",
    "        f\"{model}_generalization_dataset.png\",\n",
    "        format=\"png\",\n",
    "        scale=2,\n",
    "        width=1800,\n",
    "        height=900,\n",
    "    )\n",
    "    fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sftcontext",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
