{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '../outputs/mnist_lenet_calib_eval_full_passive_learning_eval_full/tbal_mnist_lenet_calib_eval_full_tbal_eval_full__01-16-2024__06-09-16.xlsx'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 14\u001b[0m\n\u001b[1;32m      6\u001b[0m dataset, method, training_time_method \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m      7\u001b[0m     search\u001b[38;5;241m.\u001b[39mgroup(\u001b[38;5;241m1\u001b[39m),\n\u001b[1;32m      8\u001b[0m     search\u001b[38;5;241m.\u001b[39mgroup(\u001b[38;5;241m2\u001b[39m),\n\u001b[1;32m      9\u001b[0m     search\u001b[38;5;241m.\u001b[39mgroup(\u001b[38;5;241m3\u001b[39m),\n\u001b[1;32m     10\u001b[0m )\n\u001b[1;32m     12\u001b[0m training_time_method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvanilla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m training_time_method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstd_xent_only\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m training_time_method\n\u001b[0;32m---> 14\u001b[0m src \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_excel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../outputs/mnist_lenet_calib_eval_full_passive_learning_eval_full/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mfilename\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m src\u001b[38;5;241m.\u001b[39mcalib_conf\u001b[38;5;241m.\u001b[39mfillna(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone\u001b[39m\u001b[38;5;124m\"\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     17\u001b[0m caption \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAuto-labeling error and coverage of post-hoc methods for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtraining_time_method\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m training-time methods via \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m on \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdataset\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m dataset.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
      "File \u001b[0;32m~/code/TBAL/env/lib/python3.10/site-packages/pandas/io/excel/_base.py:504\u001b[0m, in \u001b[0;36mread_excel\u001b[0;34m(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend, engine_kwargs)\u001b[0m\n\u001b[1;32m    502\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(io, ExcelFile):\n\u001b[1;32m    503\u001b[0m     should_close \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 504\u001b[0m     io \u001b[38;5;241m=\u001b[39m \u001b[43mExcelFile\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    505\u001b[0m \u001b[43m        \u001b[49m\u001b[43mio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    506\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    507\u001b[0m \u001b[43m        \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    508\u001b[0m \u001b[43m        \u001b[49m\u001b[43mengine_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    509\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    510\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;129;01mand\u001b[39;00m engine \u001b[38;5;241m!=\u001b[39m io\u001b[38;5;241m.\u001b[39mengine:\n\u001b[1;32m    511\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    512\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEngine should not be specified when passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    513\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man ExcelFile - ExcelFile already has the engine set\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    514\u001b[0m     )\n",
      "File \u001b[0;32m~/code/TBAL/env/lib/python3.10/site-packages/pandas/io/excel/_base.py:1563\u001b[0m, in \u001b[0;36mExcelFile.__init__\u001b[0;34m(self, path_or_buffer, engine, storage_options, engine_kwargs)\u001b[0m\n\u001b[1;32m   1561\u001b[0m     ext \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxls\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1562\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1563\u001b[0m     ext \u001b[38;5;241m=\u001b[39m \u001b[43minspect_excel_format\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcontent_or_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\n\u001b[1;32m   1565\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1566\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m ext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1567\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1568\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExcel file format cannot be determined, you must specify \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1569\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man engine manually.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1570\u001b[0m         )\n",
      "File \u001b[0;32m~/code/TBAL/env/lib/python3.10/site-packages/pandas/io/excel/_base.py:1419\u001b[0m, in \u001b[0;36minspect_excel_format\u001b[0;34m(content_or_path, storage_options)\u001b[0m\n\u001b[1;32m   1416\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(content_or_path, \u001b[38;5;28mbytes\u001b[39m):\n\u001b[1;32m   1417\u001b[0m     content_or_path \u001b[38;5;241m=\u001b[39m BytesIO(content_or_path)\n\u001b[0;32m-> 1419\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1420\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcontent_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m   1421\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m handle:\n\u001b[1;32m   1422\u001b[0m     stream \u001b[38;5;241m=\u001b[39m handle\u001b[38;5;241m.\u001b[39mhandle\n\u001b[1;32m   1423\u001b[0m     stream\u001b[38;5;241m.\u001b[39mseek(\u001b[38;5;241m0\u001b[39m)\n",
      "File \u001b[0;32m~/code/TBAL/env/lib/python3.10/site-packages/pandas/io/common.py:872\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    863\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[1;32m    864\u001b[0m             handle,\n\u001b[1;32m    865\u001b[0m             ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    868\u001b[0m             newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    869\u001b[0m         )\n\u001b[1;32m    870\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    871\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m--> 872\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    873\u001b[0m     handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[1;32m    875\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../outputs/mnist_lenet_calib_eval_full_passive_learning_eval_full/tbal_mnist_lenet_calib_eval_full_tbal_eval_full__01-16-2024__06-09-16.xlsx'"
     ]
    }
   ],
   "source": [
    "# filename = \"twenty_newsgroups_calib_0_passive_learning_eval_full_std_xent_only.xlsx\"\n",
    "filename = \"tbal_mnist_lenet_calib_eval_full_tbal_eval_full__01-16-2024__06-09-16.xlsx\"\n",
    "\n",
    "# write a regular expression to extract the method name from the filename (e.g. 'twenty_newsgroups_calib_0_passive_learning_eval_full_std_xent_only.xlsx')\n",
    "search = re.search(r\"(.*)_calib_eval_full_(.*)_eval_full_(.*).xlsx\", filename)\n",
    "dataset, method, training_time_method = (\n",
    "    search.group(1),\n",
    "    search.group(2),\n",
    "    search.group(3),\n",
    ")\n",
    "\n",
    "training_time_method = \"vanilla\" if training_time_method == \"std_xent_only\" else training_time_method\n",
    "\n",
    "src = pd.read_excel(f\"../outputs/mnist_lenet_calib_eval_full_tbal_eval_full/{filename}\", index_col=0)\n",
    "src.calib_conf.fillna(\"None\", inplace=True)\n",
    "\n",
    "caption = f\"Auto-labeling error and coverage of post-hoc methods for {training_time_method} training-time methods via {method.replace('_', ' ')} on {dataset.replace('_', ' ')} dataset.\"\n",
    "label = f\"table:{dataset}_{method}_{training_time_method}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "column_alignments = ['l', 'l', 'l', 'l']\n",
    "excel_to_text_header_mapping = {\n",
    "    'calib_conf': 'Post-hoc Method',\n",
    "    'training_conf': 'Training-time Method',\n",
    "    'Auto-Labeling-Err': 'Auto-labeling Error',\n",
    "    'Coverage': 'Coverage',\n",
    "}\n",
    "conf_to_text_mapping = {\n",
    "    'None': ' - ',\n",
    "    'auto_label_opt_v0': 'Ours',\n",
    "    'dirichlet': 'Dirichlet',\n",
    "    'histogram_binning_top_label': 'Histogram binning top label',\n",
    "    'scaling_binning': 'Scaling binning',\n",
    "    'temp_scaling': 'Temperature scaling',\n",
    "    'std_cross_entropy': 'Vanilla',\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_theader(values):\n",
    "    return ' & '.join(map(lambda x: x.ljust(30), values)) + ' \\\\\\\\'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_tbody(df):\n",
    "\n",
    "    coverage_max_id = src['Coverage-Mean'].argmax()\n",
    "    auto_labeling_err_min_id = src['Auto-Labeling-Err-Mean'].argmin()\n",
    "\n",
    "    table_rows = []\n",
    "    for idx, row in df.iterrows():\n",
    "        table_row = []\n",
    "        for col in excel_to_text_header_mapping.keys():\n",
    "            if col == \"Auto-Labeling-Err\":\n",
    "                mean = row[col + \"-Mean\"]\n",
    "                sd = row[col + \"-Std\"]\n",
    "                str_val = rf\"{mean:.3f} $\\pm$ {sd:.3f}\"\n",
    "                if idx == auto_labeling_err_min_id:\n",
    "                    str_val = rf\"\\textbf{{{str_val}}}\"\n",
    "                table_row.append(str_val)\n",
    "            elif col == \"Coverage\":\n",
    "                mean = row[col + \"-Mean\"]\n",
    "                sd = row[col + \"-Std\"]\n",
    "                str_val = rf\"{mean:.3f} $\\pm$ {sd:.3f}\"\n",
    "                if idx == coverage_max_id:\n",
    "                    str_val = rf\"\\textbf{{{str_val}}}\"\n",
    "                table_row.append(str_val)\n",
    "            else:\n",
    "                table_row.append(f\"{conf_to_text_mapping[row[col]]}\")\n",
    "        table_row = map(lambda x: x.ljust(30), table_row)\n",
    "        table_rows.append(' & '.join(table_row))\n",
    "    return ' \\\\\\\\\\n        '.join(table_rows) + ' \\\\\\\\'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "prefix = rf\"\"\"\\begin{{table*}}[t]\n",
    "    \\centering\n",
    "    \\begin{{tabular}}{'{' + ''.join(column_alignments) + '}'}\n",
    "        \\toprule\n",
    "        {generate_theader(excel_to_text_header_mapping.values())}\n",
    "        \\midrule\n",
    "        {generate_tbody(src)}\n",
    "        \\bottomrule\n",
    "    \\end{{tabular}}\n",
    "    \\caption{'{' + caption + '}'}\n",
    "    \\label{'{' + label + '}'}\n",
    "\\end{{table*}}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{table*}[t]\n",
      "    \\centering\n",
      "    \\begin{tabular}{llll}\n",
      "        \\toprule\n",
      "        Post-hoc Method                & Training-time Method           & Auto-labeling Error            & Coverage                       \\\\\n",
      "        \\midrule\n",
      "         -                             & Vanilla                        & \\textbf{2.111 $\\pm$ 0.379}     & 77.692 $\\pm$ 2.786             \\\\\n",
      "        Ours                           & Vanilla                        & 3.777 $\\pm$ 0.597              & \\textbf{94.638 $\\pm$ 1.266}    \\\\\n",
      "        Ours                           & Vanilla                        & 3.075 $\\pm$ 0.450              & 94.171 $\\pm$ 1.497             \\\\\n",
      "        Ours                           & Vanilla                        & 3.777 $\\pm$ 0.597              & 94.638 $\\pm$ 1.266             \\\\\n",
      "        Ours                           & Vanilla                        & 3.075 $\\pm$ 0.450              & 94.171 $\\pm$ 1.497             \\\\\n",
      "        Ours                           & Vanilla                        & 3.200 $\\pm$ 0.974              & 94.009 $\\pm$ 1.722             \\\\\n",
      "        Ours                           & Vanilla                        & 3.352 $\\pm$ 0.481              & 94.437 $\\pm$ 1.582             \\\\\n",
      "        Ours                           & Vanilla                        & 3.200 $\\pm$ 0.974              & 94.009 $\\pm$ 1.722             \\\\\n",
      "        Ours                           & Vanilla                        & 3.352 $\\pm$ 0.481              & 94.437 $\\pm$ 1.582             \\\\\n",
      "        Dirichlet                      & Vanilla                        & 3.632 $\\pm$ 0.689              & 81.704 $\\pm$ 0.081             \\\\\n",
      "        Histogram binning top label    & Vanilla                        & 3.745 $\\pm$ 0.886              & 84.616 $\\pm$ 5.500             \\\\\n",
      "        Scaling binning                & Vanilla                        & 3.569 $\\pm$ 0.791              & 83.512 $\\pm$ 5.671             \\\\\n",
      "        Temperature scaling            & Vanilla                        & 3.789 $\\pm$ 0.885              & 84.548 $\\pm$ 5.743             \\\\\n",
      "        \\bottomrule\n",
      "    \\end{tabular}\n",
      "    \\caption{Auto-labeling error and coverage of post-hoc methods for _01-15-2024__16-38-47 training-time methods via passive learning on mnist lenet dataset.}\n",
      "    \\label{table:mnist_lenet_passive_learning__01-15-2024__16-38-47}\n",
      "\\end{table*}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(prefix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
