{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# filename = \"twenty_newsgroups_calib_0_passive_learning_eval_full_std_xent_only.xlsx\"\n",
    "filename = \"tiny_imagenet_CLIP_calib_0_passive_learning_eval_full_std_xent_only.xlsx\"\n",
    "\n",
    "# write a regular expression to extract the method name from the filename (e.g. 'twenty_newsgroups_calib_0_passive_learning_eval_full_std_xent_only.xlsx')\n",
    "search = re.search(r\"(.*)_calib_0_(.*)_eval_full_(.*).xlsx\", filename)\n",
    "dataset, method, training_time_method = (\n",
    "    search.group(1),\n",
    "    search.group(2),\n",
    "    search.group(3),\n",
    ")\n",
    "\n",
    "training_time_method = \"vanilla\" if training_time_method == \"std_xent_only\" else training_time_method\n",
    "\n",
    "src = pd.read_excel(f\"outputs/{filename}\", index_col=0)\n",
    "src.calib_conf.fillna(\"None\", inplace=True)\n",
    "\n",
    "caption = f\"Auto-labeling error and coverage of post-hoc methods for {training_time_method} training-time methods via {method.replace('_', ' ')} on {dataset.replace('_', ' ')} dataset.\"\n",
    "label = f\"table:{dataset}_{method}_{training_time_method}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "column_alignments = ['l', 'l', 'l', 'l']\n",
    "excel_to_text_header_mapping = {\n",
    "    'calib_conf': 'Post-hoc Method',\n",
    "    'training_conf': 'Training-time Method',\n",
    "    'Auto-Labeling-Err': 'Auto-labeling Error',\n",
    "    'Coverage': 'Coverage',\n",
    "}\n",
    "conf_to_text_mapping = {\n",
    "    'None': ' - ',\n",
    "    'auto_label_opt_v0': 'Ours',\n",
    "    'dirichlet': 'Dirichlet',\n",
    "    'histogram_binning_top_label': 'Histogram binning top label',\n",
    "    'scaling_binning': 'Scaling binning',\n",
    "    'temp_scaling': 'Temperature scaling',\n",
    "    'std_cross_entropy': 'Vanilla',\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_theader(values):\n",
    "    return ' & '.join(map(lambda x: x.ljust(30), values)) + ' \\\\\\\\'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_tbody(df):\n",
    "\n",
    "    coverage_max_id = src['Coverage-Mean'].argmax()\n",
    "    auto_labeling_err_min_id = src['Auto-Labeling-Err-Mean'].argmin()\n",
    "\n",
    "    table_rows = []\n",
    "    for idx, row in df.iterrows():\n",
    "        table_row = []\n",
    "        for col in excel_to_text_header_mapping.keys():\n",
    "            if col == \"Auto-Labeling-Err\":\n",
    "                mean = row[col + \"-Mean\"]\n",
    "                sd = row[col + \"-Std\"]\n",
    "                str_val = rf\"{mean:.3f} $\\pm$ {sd:.3f}\"\n",
    "                if idx == auto_labeling_err_min_id:\n",
    "                    str_val = rf\"\\textbf{{{str_val}}}\"\n",
    "                table_row.append(str_val)\n",
    "            elif col == \"Coverage\":\n",
    "                mean = row[col + \"-Mean\"]\n",
    "                sd = row[col + \"-Std\"]\n",
    "                str_val = rf\"{mean:.3f} $\\pm$ {sd:.3f}\"\n",
    "                if idx == coverage_max_id:\n",
    "                    str_val = rf\"\\textbf{{{str_val}}}\"\n",
    "                table_row.append(str_val)\n",
    "            else:\n",
    "                table_row.append(f\"{conf_to_text_mapping[row[col]]}\")\n",
    "        table_row = map(lambda x: x.ljust(30), table_row)\n",
    "        table_rows.append(' & '.join(table_row))\n",
    "    return ' \\\\\\\\\\n        '.join(table_rows) + ' \\\\\\\\'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "prefix = rf\"\"\"\\begin{{table*}}[t]\n",
    "    \\centering\n",
    "    \\begin{{tabular}}{'{' + ''.join(column_alignments) + '}'}\n",
    "        \\toprule\n",
    "        {generate_theader(excel_to_text_header_mapping.values())}\n",
    "        \\midrule\n",
    "        {generate_tbody(src)}\n",
    "        \\bottomrule\n",
    "    \\end{{tabular}}\n",
    "    \\caption{'{' + caption + '}'}\n",
    "    \\label{'{' + label + '}'}\n",
    "\\end{{table*}}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{table*}[t]\n",
      "    \\centering\n",
      "    \\begin{tabular}{llll}\n",
      "        \\toprule\n",
      "        Post-hoc Method                & Training-time Method           & Auto-labeling Error            & Coverage                       \\\\\n",
      "        \\midrule\n",
      "         -                             & Vanilla                        & 6.505 $\\pm$ 0.417              & 20.521 $\\pm$ 1.279             \\\\\n",
      "        Ours                           & Vanilla                        & \\textbf{2.154 $\\pm$ 3.127}     & \\textbf{38.189 $\\pm$ 2.369}    \\\\\n",
      "        Dirichlet                      & Vanilla                        & 12.275 $\\pm$ 1.279             & 11.903 $\\pm$ 0.695             \\\\\n",
      "        Histogram binning top label    & Vanilla                        & 10.659 $\\pm$ 0.622             & 13.634 $\\pm$ 0.437             \\\\\n",
      "        Scaling binning                & Vanilla                        & 10.326 $\\pm$ 0.209             & 12.879 $\\pm$ 1.126             \\\\\n",
      "        Temperature scaling            & Vanilla                        & 10.902 $\\pm$ 0.105             & 14.085 $\\pm$ 1.133             \\\\\n",
      "        \\bottomrule\n",
      "    \\end{tabular}\n",
      "    \\caption{Auto-labeling error and coverage of post-hoc methods for vanilla training-time methods via passive learning on tiny imagenet CLIP dataset.}\n",
      "    \\label{table:tiny_imagenet_CLIP_passive_learning_vanilla}\n",
      "\\end{table*}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(prefix)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
