{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c01b7039",
   "metadata": {},
   "source": [
    "# Table Aggregation\n",
    "\n",
    "In this notebook we aggregate the raw data into tables as used in our figures."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "announced-sample",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "def aggregate(df):\n",
    "    best_of_10 = df.groupby([\"sample_id\"]).max()\n",
    "    best_of_10[\"num_full_match\"] = (best_of_10[\"consistency\"] == 1.0)\n",
    "    res = best_of_10.groupby([\"num_nodes\", \"num_networks\"]).mean()\n",
    "    res[\"num_full_match\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_full_match\"]\n",
    "    return res\n",
    "\n",
    "def aggregate_3pred(df, consistency_column=\"overall\"):\n",
    "    best_of_10 = df.groupby([\"sample_id\"]).max()\n",
    "    best_of_10[\"num_full_match\"] = (best_of_10[\"overall\"] == 1.0)\n",
    "    best_of_10[\"num_close_match\"] = (best_of_10[\"overall\"] > 0.95)\n",
    "    best_of_10[\"num_close_match90\"] = (best_of_10[\"overall\"] > 0.9)\n",
    "    best_of_10[\"num_full_fwd\"] = (best_of_10[\"fwd\"] == 1.0)\n",
    "    best_of_10[\"num_reachable\"] = (best_of_10[\"reachable\"] == 1.0) # np.logical_and((best_of_10[\"fwd\"] == 1.0).values, (best_of_10[\"reachable\"] == 1.0).values)\n",
    "    best_of_10[\"num_trafficIsolation\"] = (best_of_10[\"trafficIsolation\"] == 1.0) # np.logical_and((best_of_10[\"fwd\"] == 1.0).values, (best_of_10[\"reachable\"] == 1.0).values)\n",
    "    res = best_of_10.groupby([\"num_nodes\", \"num_networks\"]).mean()\n",
    "    res[\"num_full_match\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_full_match\"]\n",
    "    res[\"num_full_fwd\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_full_fwd\"]\n",
    "    res[\"num_reachable\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_reachable\"]\n",
    "    res[\"num_trafficIsolation\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_trafficIsolation\"]\n",
    "    res[\"num_close_match\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_close_match\"]\n",
    "    res[\"num_close_match90\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_close_match90\"]\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "c6b2e41e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def aggregate_paper(df, consistency_column=\"overall\", aggregate_by_n=False):\n",
    "    best_of_10 = df.groupby([\"sample_id\"]).max()\n",
    "    best_of_10[\"num_full_match\"] = (best_of_10[\"overall\"] == 1.0)\n",
    "    best_of_10[\"num_close_match\"] = (best_of_10[\"overall\"] > 0.95)\n",
    "    best_of_10[\"num_close_match90\"] = (best_of_10[\"overall\"] > 0.9)\n",
    "    best_of_10[\"num_full_fwd\"] = (best_of_10[\"fwd\"] == 1.0)\n",
    "    best_of_10[\"num_reachable\"] = (best_of_10[\"reachable\"] == 1.0) # np.logical_and((best_of_10[\"fwd\"] == 1.0).values, (best_of_10[\"reachable\"] == 1.0).values)\n",
    "    best_of_10[\"num_trafficIsolation\"] = (best_of_10[\"trafficIsolation\"] == 1.0) # np.logical_and((best_of_10[\"fwd\"] == 1.0).values, (best_of_10[\"reachable\"] == 1.0).values)\n",
    "    \n",
    "    if aggregate_by_n:\n",
    "        def group(sample_id):\n",
    "            if sample_id < 33: return 0\n",
    "            elif sample_id < 67: return 1\n",
    "            else: return 2\n",
    "        best_of_10[\"num_nodes\"] = [group(s) for s in best_of_10.reset_index()[\"num_nodes\"].values]\n",
    "    else:\n",
    "        def group(sample_id):\n",
    "            if sample_id < 8: return 0\n",
    "            elif sample_id < 16: return 1\n",
    "            else: return 2\n",
    "        best_of_10[\"num_nodes\"] = [group(s) for s in best_of_10.reset_index()[\"sample_id\"].values]\n",
    "        \n",
    "#     print(best_of_10.groupby([\"num_nodes\", \"num_networks\"]).count())\n",
    "    res = best_of_10.groupby([\"num_nodes\", \"num_networks\"]).mean()\n",
    "    res[\"num_full_match\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_full_match\"]\n",
    "    res[\"num_full_fwd\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_full_fwd\"]\n",
    "    res[\"num_reachable\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_reachable\"]\n",
    "    res[\"num_trafficIsolation\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_trafficIsolation\"]\n",
    "    res[\"num_close_match\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_close_match\"]\n",
    "    res[\"num_close_match90\"] = best_of_10.groupby([\"num_nodes\",\"num_networks\"]).sum()[\"num_close_match90\"]\n",
    "    return res\n",
    "def to_latex_paper(df):\n",
    "    #print(to_latex(df))\n",
    "    lines = []\n",
    "    for row in df.iloc:\n",
    "        def group_name(g): return \"Small\" if g == 0 else (\"Medium\" if g == 1 else \"Large\")\n",
    "        group = group_name(row[\"num_nodes\"])\n",
    "        columns = [\n",
    "            group,\n",
    "            \"%.2f\" % row[\"fwd\"],\n",
    "            \"%.2f\" % row[\"reachable\"],\n",
    "            \"%.2f\" % row[\"trafficIsolation\"],\n",
    "            \"\\\\textbf{%.2f}\" % row[\"overall\"],\n",
    "            \"%.2f\" % (row[\"num_full_match\"] / 8),\n",
    "            \"%.2f\" % (row[\"num_close_match90\"] / 8),\n",
    "        ]\n",
    "        lines.append(\"& \" + \" & \".join(columns) + \"\\\\\\\\\")\n",
    "    return \"\\n\".join(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da27aae8",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls -lisah ../"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6bfd9398",
   "metadata": {},
   "source": [
    "### Unsat Using Random Sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "6e933ab4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 25  28  31  71  54  56  29  16  61  39  17  19 153  23  18  36]\n",
      "                        fwd  reachable  trafficIsolation  overall  time  \\\n",
      "num_nodes num_networks                                                    \n",
      "0         4               9          9                 9        9     9   \n",
      "1         4               5          5                 5        5     5   \n",
      "2         4               2          2                 2        2     2   \n",
      "\n",
      "                        num_full_match  num_close_match  num_close_match90  \\\n",
      "num_nodes num_networks                                                       \n",
      "0         4                          9                9                  9   \n",
      "1         4                          5                5                  5   \n",
      "2         4                          2                2                  2   \n",
      "\n",
      "                        num_full_fwd  num_reachable  num_trafficIsolation  \n",
      "num_nodes num_networks                                                     \n",
      "0         4                        9              9                     9  \n",
      "1         4                        5              5                     5  \n",
      "2         4                        2              2                     2  \n",
      "$3\\times 16$ requirements & Small & 0.81 & 1.00 & 1.00 & \\textbf{0.81} & 0.00 & 0.38\\\\\n",
      "& Medium & 0.89 & 1.00 & 1.00 & \\textbf{0.89} & 0.00 & 0.25\\\\\n",
      "& Large & 0.87 & 1.00 & 1.00 & \\textbf{0.87} & 0.00 & 0.12\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-UNSAT_DATASET_LESS-rnd-15473.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f), aggregate_by_n=True).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0cdb1bf",
   "metadata": {},
   "source": [
    "### Unsat using model (5 Samples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "0105a427",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                        fwd  reachable  trafficIsolation  overall  time  \\\n",
      "num_nodes num_networks                                                    \n",
      "0         4               8          8                 8        8     8   \n",
      "1         4               8          8                 8        8     8   \n",
      "\n",
      "                        num_full_match  num_close_match  num_close_match90  \\\n",
      "num_nodes num_networks                                                       \n",
      "0         4                          8                8                  8   \n",
      "1         4                          8                8                  8   \n",
      "\n",
      "                        num_full_fwd  num_reachable  num_trafficIsolation  \n",
      "num_nodes num_networks                                                     \n",
      "0         4                        8              8                     8  \n",
      "1         4                        8              8                     8  \n",
      "$3\\times 16$ requirements & Small & 0.97 & 1.00 & 1.00 & \\textbf{0.97} & 0.00 & 1.00\\\\\n",
      "& Medium & 0.94 & 1.00 & 1.00 & \\textbf{0.94} & 0.00 & 0.75\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-UNSAT_DATASET_LESS-14709.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "497dca6d",
   "metadata": {},
   "source": [
    "### Unsat using model (10 samples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "5f503f50",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.86 & 1.00 & 1.00 & \\textbf{0.86} & 0.00 & 0.38\\\\\n",
      "& Medium & 0.87 & 1.00 & 1.00 & \\textbf{0.87} & 0.00 & 0.38\\\\\n",
      "& Large & 0.83 & 1.00 & 1.00 & \\textbf{0.83} & 0.00 & 0.12\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-UNSAT_DATASET-samples10-17480.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "49f97868",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.97 & 0.91 & 0.95 & \\textbf{0.95} & 0.38 & 0.88\\\\\n",
      "& Medium & 0.97 & 0.95 & 0.97 & \\textbf{0.96} & 0.38 & 0.88\\\\\n",
      "& Large & 0.93 & 0.93 & 0.97 & \\textbf{0.93} & 0.12 & 0.88\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-18885.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "ff19d7c0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.97 & 0.91 & 0.95 & \\textbf{0.95} & 0.38 & 0.88\\\\\n",
      "& Medium & 0.97 & 0.95 & 0.97 & \\textbf{0.96} & 0.38 & 0.88\\\\\n",
      "& Large & 0.93 & 0.93 & 0.97 & \\textbf{0.93} & 0.12 & 0.88\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-18885.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "2c68d25d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 2$ requirements & Small & 0.97 & 0.94 & 0.97 & \\textbf{0.96} & 0.25 & 0.88\\\\\n",
      "& Medium & 0.95 & 0.98 & 1.00 & \\textbf{0.97} & 0.12 & 1.00\\\\\n",
      "& Large & 0.93 & 0.92 & 0.98 & \\textbf{0.93} & 0.00 & 0.75\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-17428.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 2$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e8b17ff4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 2$ requirements & Small & 0.96 & 0.93 & 0.97 & \\textbf{0.95} & 0.25 & 0.88\\\\\n",
      "& Medium & 0.96 & 0.97 & 1.00 & \\textbf{0.97} & 0.25 & 1.00\\\\\n",
      "& Large & 0.91 & 0.91 & 0.98 & \\textbf{0.92} & 0.00 & 0.75\\\\\n",
      "\\midrule\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-19068.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 2$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fb3b5bbf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.95 & 0.93 & 0.89 & \\textbf{0.91} & 0.12 & 0.50\\\\\n",
      "& Medium & 0.97 & 0.97 & 0.94 & \\textbf{0.95} & 0.25 & 0.88\\\\\n",
      "& Large & 0.92 & 0.91 & 0.94 & \\textbf{0.90} & 0.00 & 0.75\\\\\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-topk-12558.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b97a2f60",
   "metadata": {},
   "source": [
    "### python3 eval_consistency.py --dataset ./dataset-ported/bgp-qlty-reqs-16 ../../models/bgp-train-long-model-epoch5160.pt --random 1 --sampling-mode topk --num-samples 20 --num-shots 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "078241a8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.97 & 0.96 & 0.99 & \\textbf{0.96} & 0.25 & 0.88\\\\\n",
      "& Medium & 0.99 & 0.98 & 0.99 & \\textbf{0.98} & 0.38 & 1.00\\\\\n",
      "& Large & 0.96 & 0.96 & 1.00 & \\textbf{0.96} & 0.25 & 0.88\\\\\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-topk-11423.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "66b3831d",
   "metadata": {},
   "source": [
    "### python3 eval_consistency.py --dataset ./dataset-ported/bgp-qlty-reqs-8 ../../models/bgp-train-long-model-epoch5160.pt --random 1 --sampling-mode topk --num-samples 20 --num-shots 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "270f895f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.99 & 1.00 & 0.98 & \\textbf{0.99} & 0.62 & 1.00\\\\\n",
      "& Medium & 0.99 & 0.91 & 1.00 & \\textbf{0.96} & 0.38 & 0.75\\\\\n",
      "& Large & 0.96 & 0.97 & 0.98 & \\textbf{0.94} & 0.38 & 0.62\\\\\n"
     ]
    }
   ],
   "source": [
    "f = \"../results-PAPER_DATASET-topk-req8-14551.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "53f7a43f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 16$ requirements & Small & 0.97 & 0.89 & 0.95 & \\textbf{0.94} & 0.38 & 0.75\\\\\n",
      "& Medium & 0.98 & 0.97 & 0.97 & \\textbf{0.98} & 0.38 & 1.00\\\\\n",
      "& Large & 0.92 & 0.88 & 0.95 & \\textbf{0.91} & 0.12 & 0.50\\\\\n"
     ]
    }
   ],
   "source": [
    "f = \"specification-consistency/results-eval-64-bgp-qlty-reqs-16-4shot-11562.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eab89bea",
   "metadata": {},
   "source": [
    "## Figure on Specification Consistency Across Increasingly Large Topologies and Specifications"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "1e95bac9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "$3\\times 2$ requirements & Small & 0.95 & 0.94 & 1.00 & \\textbf{0.95} & 0.62 & 0.62\\\\\n",
      "& Medium & 0.96 & 1.00 & 0.94 & \\textbf{0.96} & 0.88 & 0.88\\\\\n",
      "& Large & 0.94 & 0.94 & 1.00 & \\textbf{0.93} & 0.62 & 0.62\\\\\n",
      "\\midrule\n",
      "$3\\times 8$ requirements & Small & 0.97 & 0.98 & 0.97 & \\textbf{0.98} & 0.50 & 1.00\\\\\n",
      "& Medium & 0.99 & 0.95 & 1.00 & \\textbf{0.98} & 0.50 & 1.00\\\\\n",
      "& Large & 0.96 & 0.95 & 0.91 & \\textbf{0.94} & 0.38 & 0.88\\\\\n",
      "\\midrule\n",
      "$3\\times 16$ requirements & Small & 0.97 & 0.89 & 0.95 & \\textbf{0.94} & 0.38 & 0.75\\\\\n",
      "& Medium & 0.98 & 0.97 & 0.97 & \\textbf{0.98} & 0.38 & 1.00\\\\\n",
      "& Large & 0.92 & 0.88 & 0.95 & \\textbf{0.91} & 0.12 & 0.50\\\\\n"
     ]
    }
   ],
   "source": [
    "f = \"specification-consistency/results-eval-64-bgp-qlty-reqs-2-4shot-15649.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 2$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")\n",
    "\n",
    "f = \"specification-consistency/results-eval-64-bgp-qlty-reqs-8-4shot-14793.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 8$ requirements \" + to_latex_paper(df_2))\n",
    "print(\"\\\\midrule\")\n",
    "\n",
    "f = \"specification-consistency/results-eval-64-bgp-qlty-reqs-16-4shot-11562.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"$3\\\\times 16$ requirements \" + to_latex_paper(df_2))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c6fb0096",
   "metadata": {},
   "source": [
    "## Figure on Baseline/Oneshot/Multi-Shot Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "e6f4e108",
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_latex_paper_baseline(df):\n",
    "    #print(to_latex(df))\n",
    "    lines = []\n",
    "    for row in df.iloc:\n",
    "        def group_name(g): return \"Small\" if g == 0 else (\"Medium\" if g == 1 else \"Large\")\n",
    "        group = group_name(row[\"num_nodes\"])\n",
    "        columns = [\n",
    "            group,\n",
    "            \"%.2f\" % row[\"overall\"],\n",
    "        ]\n",
    "        lines.append(\"& \" + \" & \".join(columns) + \"\\\\\\\\\")\n",
    "    return \"\\n\".join(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "d415a08e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random\n",
      "& Small & 0.83\\\\\n",
      "& Medium & 0.88\\\\\n",
      "& Large & 0.78\\\\\n",
      "1-Shot\n",
      "& Small & 0.92\\\\\n",
      "& Medium & 0.95\\\\\n",
      "& Large & 0.88\\\\\n",
      "4-Shot\n",
      "& Small & 0.94\\\\\n",
      "& Medium & 0.98\\\\\n",
      "& Large & 0.91\\\\\n"
     ]
    }
   ],
   "source": [
    "f = \"multishot-sampling/results-eval-64-bgp-qlty-reqs-16-random-13819.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(\"Random\")\n",
    "print(to_latex_paper_baseline(df_2))\n",
    "\n",
    "print(\"1-Shot\")\n",
    "f = \"multishot-sampling/results-eval-64-bgp-qlty-reqs-16-oneshot-14551.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(to_latex_paper_baseline(df_2))\n",
    "\n",
    "print(\"4-Shot\")\n",
    "f = \"specification-consistency/results-eval-64-bgp-qlty-reqs-16-4shot-11562.pkl-results.pkl\"\n",
    "df_2 = aggregate_paper(pd.read_pickle(f)).reset_index()\n",
    "print(to_latex_paper_baseline(df_2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56443fec",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
