{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de369d3c",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8af77d29",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from pathlib import Path\n",
    "import matplotlib.pyplot as plt\n",
    "from experiments.summarize import main"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "29d68944",
   "metadata": {},
   "outputs": [],
   "source": [
    "COLS = [\n",
    "    \"post_score\",\n",
    "    \"post_rewrite_success\",\n",
    "    \"post_rewrite_diff\",\n",
    "    \"post_paraphrase_success\",\n",
    "    \"post_paraphrase_diff\",\n",
    "    \"post_neighborhood_success\",\n",
    "    \"post_neighborhood_diff\",\n",
    "    \"post_ngram_entropy\",\n",
    "    \"post_reference_score\",\n",
    "]\n",
    "OPTIM = [1, 1, 1, 1, 1, 1, 1, 1, 1]\n",
    "LIM = [50, 75, 20, 60, 10, 45, -5, 600, 31]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fba58b26",
   "metadata": {},
   "outputs": [],
   "source": [
    "BASELINE_NAMES = [\"GPT-2 XL\", \"GPT-2 M\", \"GPT-2 L\", \"GPT-J\"]\n",
    "\n",
    "\n",
    "def execute(RUN_DIR, RUN_DATA, FIRST_N):\n",
    "    data = {}\n",
    "    for k, (d, alt_para) in RUN_DATA.items():\n",
    "        cur = main(\n",
    "            dir_name=RUN_DIR / d, runs=[\"run_000\"], first_n_cases=FIRST_N, abs_path=True\n",
    "        )\n",
    "        assert len(cur) == 1\n",
    "        data[k] = cur[0]\n",
    "\n",
    "        # We replaced the paraphrase metrics last minute\n",
    "        # run_000 should be the original, run_001 with transplanted paraphrases\n",
    "        if alt_para:\n",
    "            cur = main(\n",
    "                dir_name=RUN_DIR / d,\n",
    "                runs=[\"run_001\"],\n",
    "                first_n_cases=FIRST_N,\n",
    "                abs_path=True,\n",
    "            )\n",
    "            if len(cur) == 1:\n",
    "                data[k][\"pre_paraphrase_success\"] = cur[0][\"pre_paraphrase_success\"]\n",
    "                data[k][\"post_paraphrase_success\"] = cur[0][\"post_paraphrase_success\"]\n",
    "                data[k][\"pre_score\"] = cur[0][\"pre_score\"]\n",
    "                data[k][\"post_score\"] = cur[0][\"post_score\"]\n",
    "            else:\n",
    "                raise\n",
    "    m = []\n",
    "    for k, v in data.items():\n",
    "        m.append(\n",
    "            [k]\n",
    "            + [\n",
    "                v[\n",
    "                    z\n",
    "                    if all(k != z for z in BASELINE_NAMES) or z == \"time\"\n",
    "                    else \"pre_\" + z[len(\"post_\") :]\n",
    "                ]\n",
    "                for z in COLS\n",
    "            ]\n",
    "        )\n",
    "\n",
    "    m_np = np.array([[col[0] for col in row[1:]] for row in m[1:]])\n",
    "    m_amax = np.argmax(m_np, axis=0)\n",
    "    m_amin = np.argmin(m_np, axis=0)\n",
    "\n",
    "    res = []\n",
    "\n",
    "    for i, row in enumerate(m):\n",
    "        lstr = [row[0]]\n",
    "        for j, el in enumerate(row[1:]):\n",
    "            mean, std = np.round(el[0], 1), el[1]\n",
    "            interval = 1.96 * std / np.sqrt(FIRST_N)\n",
    "\n",
    "            mean, interval = str(mean), f\"{np.round(interval, 1)}\"\n",
    "            bmark = m_amax if OPTIM[j] == 1 else m_amin\n",
    "            res_str = f\"{mean} ({interval})\" if not np.isnan(std) else f\"{mean}\"\n",
    "            if bmark[j] + 1 == i:\n",
    "                lstr.append(\"\\\\goodmetric{\" + res_str + \"}\")\n",
    "            elif not any(lstr[0] == z for z in BASELINE_NAMES) and (\n",
    "                (OPTIM[j] == 1 and float(mean) < LIM[j])\n",
    "                or (OPTIM[j] == 0 and float(mean) > LIM[j])\n",
    "            ):\n",
    "                lstr.append(\"\\\\badmetric{\" + res_str + \"}\")\n",
    "            else:\n",
    "                lstr.append(res_str)\n",
    "\n",
    "        res.append(\n",
    "            \" & \".join(lstr)\n",
    "            + \"\\\\\\\\\"\n",
    "            + (\"\\\\midrule\" if any(lstr[0] == z for z in BASELINE_NAMES) else \"\")\n",
    "        )\n",
    "\n",
    "    return \"\\n\".join(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f25701f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "gap = \"\\n\\\\midrule\\\\midrule\\n\"\n",
    "\n",
    "dir2medium = Path(\"/share/projects/rewriting-knowledge/OFFICIAL_DATA/cf/gpt2-medium\")\n",
    "data2medium = {\n",
    "    \"GPT-2 M\": (\"ROME\", False),\n",
    "    \"FT+L\": (\"FT_L\", False),\n",
    "    \"ROME\": (\"ROME\", False),\n",
    "}\n",
    "first2medium = 7500\n",
    "\n",
    "dir2large = Path(\"/share/projects/rewriting-knowledge/OFFICIAL_DATA/cf/gpt2-large\")\n",
    "data2large = {\n",
    "    \"GPT-2 L\": (\"ROME\", False),\n",
    "    \"FT+L\": (\"FT_L\", False),\n",
    "    \"ROME\": (\"ROME\", False),\n",
    "}\n",
    "first2large = 7500\n",
    "\n",
    "print(\n",
    "    execute(dir2medium, data2medium, first2medium)\n",
    "    + gap\n",
    "    + execute(dir2large, data2large, first2large)\n",
    ")\n",
    "\n",
    "dir2xl = Path(\"/share/projects/rewriting-knowledge/OFFICIAL_DATA/cf/gpt2-xl\")\n",
    "data2xl = {\n",
    "    \"GPT-2 XL\": (\"FT\", True),\n",
    "    \"FT\": (\"FT\", True),\n",
    "    \"FT+L\": (\"FT_L\", True),\n",
    "    \"KN\": (\"KN\", False),\n",
    "    \"KE\": (\"KE\", False),\n",
    "    \"KE-CF\": (\"KE_CF\", False),\n",
    "    \"MEND\": (\"MEND\", False),\n",
    "    \"MEND-CF\": (\"MEND_CF\", False),\n",
    "    \"ROME\": (\"ROME\", False),\n",
    "}\n",
    "first2xl = 7500\n",
    "\n",
    "dirj = Path(\"/share/projects/rewriting-knowledge/OFFICIAL_DATA/cf/gptj\")\n",
    "dataj = {\n",
    "    \"GPT-J\": (\"FT\", True),\n",
    "    \"FT\": (\"FT\", True),\n",
    "    \"FT+L\": (\"FT_L\", True),\n",
    "    \"MEND\": (\"MEND\", False),\n",
    "    \"ROME\": (\"ROME\", False),\n",
    "}\n",
    "firstj = 2000\n",
    "\n",
    "print(execute(dir2xl, data2xl, first2xl) + gap + execute(dirj, dataj, firstj))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdef5b45",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
