{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f0146668",
   "metadata": {},
   "outputs": [],
   "source": [
    "from metagen.benchmarks import *\n",
    "import pandas as pd\n",
    "import numpy as np\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bc62c58b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_pivot(df):\n",
    "    df = df[df.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])].copy()\n",
    "\n",
    "    # Define mappings\n",
    "    category_mapping = {\n",
    "        'inverse_design': 'Inverse Design',\n",
    "        'material_understanding': 'Material Understanding',\n",
    "        'reconstruction': 'Reconstruction'\n",
    "    }\n",
    "\n",
    "    metric_mapping = {\n",
    "        'Average Normalized Error': 'Error',\n",
    "        'Chamfer Distance': 'CD'\n",
    "    }\n",
    "\n",
    "    # Apply mappings\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric'] = df['metric'].replace(metric_mapping)\n",
    "\n",
    "    # Rename the column 'metric' to 'Metric'\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "\n",
    "    pivot_table = pd.pivot_table(\n",
    "    df,\n",
    "    index='Model',\n",
    "    columns=['Category', 'Metric'],\n",
    "    values='value',\n",
    "    aggfunc='mean'\n",
    "    )\n",
    "    return pivot_table\n",
    "\n",
    "# Define which metrics should be minimized or maximized\n",
    "minimize_metrics = {'Error', 'CD'}\n",
    "maximize_metrics = {'IoU', 'Valid'}\n",
    "\n",
    "# Function to bold best values\n",
    "def highlight_best(df):\n",
    "    formatted_df = df.copy()\n",
    "\n",
    "    for col in df.columns:\n",
    "        values = df[col]\n",
    "        metric_name = col[1]  # assuming MultiIndex columns: (Category, Metric)\n",
    "\n",
    "        # Determine best value (ignoring NaNs)\n",
    "        if metric_name in minimize_metrics:\n",
    "            best_val = values.min(skipna=True)\n",
    "        elif metric_name in maximize_metrics:\n",
    "            best_val = values.max(skipna=True)\n",
    "        else:\n",
    "            continue\n",
    "\n",
    "        def format_value(x):\n",
    "            if pd.isna(x):\n",
    "                return r'\\textemdash{}'  # Represent NaNs as a dash\n",
    "            if metric_name == 'Valid':\n",
    "                percent = x * 100\n",
    "                formatted = \"100\\\\%\" if percent == 100 else f\"{percent:.1f}\\\\%\"\n",
    "            else:\n",
    "                formatted = f\"{x:.3f}\"\n",
    "            return f\"\\\\textbf{{{formatted}}}\" if x == best_val else formatted\n",
    "\n",
    "        formatted_df[col] = values.apply(format_value)\n",
    "\n",
    "    return formatted_df\n",
    "\n",
    "def to_latex(formatted_pivot):\n",
    "    n_cols = formatted_pivot.shape[1] + 1\n",
    "    col_fmt = 'c' * n_cols\n",
    "\n",
    "    latex = formatted_pivot.to_latex(\n",
    "        escape=False,\n",
    "        multicolumn=True,\n",
    "        multicolumn_format='c',\n",
    "        column_format=col_fmt\n",
    "    )\n",
    "\n",
    "    return latex\n",
    "\n",
    "def make_category_pivot(df, category):\n",
    "    df = df[df.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])].copy()\n",
    "\n",
    "    # Define mappings\n",
    "    category_mapping = {\n",
    "        'inverse_design': 'Inverse Design',\n",
    "        'material_understanding': 'Material Understanding',\n",
    "        'reconstruction': 'Reconstruction'\n",
    "    }\n",
    "\n",
    "    metric_mapping = {\n",
    "        'Average Normalized Error': 'Error',\n",
    "        'Chamfer Distance': 'CD'\n",
    "    }\n",
    "\n",
    "    task_mapping = {\n",
    "        'multiview_and_code_material_understanding': '4 View + Code',\n",
    "        'single_view_material_understanding': '1 View',\n",
    "        '4_target_inverse_design': '4 Target',\n",
    "        '2_view_reconstruction': '2 View',\n",
    "        '6_target_inverse_design': '6 Target',\n",
    "        '3_view_reconstruction': '3 View',\n",
    "        '1_view_reconstruction': '1 View',\n",
    "        '5_target_inverse_design': '5 Target',\n",
    "        '3_target_inverse_design': '3 Target',\n",
    "        '2_target_inverse_design': '2 Target',\n",
    "        '4_view_reconstruction': '4 View',\n",
    "        '1_target_inverse_design': '1 Target'\n",
    "    }\n",
    "\n",
    "    df = df[df.Category == category].copy()\n",
    "\n",
    "    # Apply mappings\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric'] = df['metric'].replace(metric_mapping)\n",
    "    df['Task'] = df['Task'].replace(task_mapping)\n",
    "\n",
    "    # Rename the column 'metric' to 'Metric'\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "\n",
    "    pivot_table = pd.pivot_table(\n",
    "    df,\n",
    "    index='Model',\n",
    "    columns=['Task', 'Metric'],\n",
    "    values='value',\n",
    "    aggfunc='mean'\n",
    "    )\n",
    "    return pivot_table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b9274574",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_parquet('/data/metagen-data/v3/workspace/benchmark_results.parquet', engine='fastparquet')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f24f036",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Makes the main-paper table:\n",
    "\n",
    "main_table_unformatted = make_pivot(df) # Makes the table itself\n",
    "main_table_formatted = highlight_best(main_table_unformatted) # Converts to strings and does highlighting, etc\n",
    "main_table_latex = to_latex(main_table_formatted) # Converts to latex for insertion into the paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "29bbe720",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>Category</th>\n",
       "      <th colspan=\"2\" halign=\"left\">Inverse Design</th>\n",
       "      <th colspan=\"2\" halign=\"left\">Material Understanding</th>\n",
       "      <th colspan=\"3\" halign=\"left\">Reconstruction</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Metric</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>CD</th>\n",
       "      <th>IoU</th>\n",
       "      <th>Valid</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Model</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>LLaVAOmniTask</th>\n",
       "      <td>0.011240</td>\n",
       "      <td>0.919167</td>\n",
       "      <td>0.024208</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.033738</td>\n",
       "      <td>0.489803</td>\n",
       "      <td>0.828933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaVASingleTask</th>\n",
       "      <td>0.036247</td>\n",
       "      <td>0.818505</td>\n",
       "      <td>0.017629</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.028892</td>\n",
       "      <td>0.524159</td>\n",
       "      <td>0.838000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaLite</th>\n",
       "      <td>0.059632</td>\n",
       "      <td>0.026638</td>\n",
       "      <td>0.199985</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.118643</td>\n",
       "      <td>0.050810</td>\n",
       "      <td>0.193333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaOmniTask</th>\n",
       "      <td>0.025935</td>\n",
       "      <td>0.914268</td>\n",
       "      <td>0.031570</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.044518</td>\n",
       "      <td>0.333645</td>\n",
       "      <td>0.871733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaSingleTask</th>\n",
       "      <td>0.032133</td>\n",
       "      <td>0.791815</td>\n",
       "      <td>0.152805</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.058993</td>\n",
       "      <td>0.205483</td>\n",
       "      <td>0.848000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OpenAIO3</th>\n",
       "      <td>0.038314</td>\n",
       "      <td>0.247308</td>\n",
       "      <td>0.077307</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.053406</td>\n",
       "      <td>0.146535</td>\n",
       "      <td>0.546285</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Category        Inverse Design           Material Understanding        \\\n",
       "Metric                   Error     Valid                  Error Valid   \n",
       "Model                                                                   \n",
       "LLaVAOmniTask         0.011240  0.919167               0.024208   1.0   \n",
       "LLaVASingleTask       0.036247  0.818505               0.017629   1.0   \n",
       "NovaLite              0.059632  0.026638               0.199985   1.0   \n",
       "NovaOmniTask          0.025935  0.914268               0.031570   1.0   \n",
       "NovaSingleTask        0.032133  0.791815               0.152805   1.0   \n",
       "OpenAIO3              0.038314  0.247308               0.077307   1.0   \n",
       "\n",
       "Category        Reconstruction                      \n",
       "Metric                      CD       IoU     Valid  \n",
       "Model                                               \n",
       "LLaVAOmniTask         0.033738  0.489803  0.828933  \n",
       "LLaVASingleTask       0.028892  0.524159  0.838000  \n",
       "NovaLite              0.118643  0.050810  0.193333  \n",
       "NovaOmniTask          0.044518  0.333645  0.871733  \n",
       "NovaSingleTask        0.058993  0.205483  0.848000  \n",
       "OpenAIO3              0.053406  0.146535  0.546285  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main_table_unformatted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "486594e1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>Category</th>\n",
       "      <th colspan=\"2\" halign=\"left\">Inverse Design</th>\n",
       "      <th colspan=\"2\" halign=\"left\">Material Understanding</th>\n",
       "      <th colspan=\"3\" halign=\"left\">Reconstruction</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Metric</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>CD</th>\n",
       "      <th>IoU</th>\n",
       "      <th>Valid</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Model</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>LLaVAOmniTask</th>\n",
       "      <td>\\textbf{0.011}</td>\n",
       "      <td>\\textbf{91.9\\%}</td>\n",
       "      <td>0.024</td>\n",
       "      <td>\\textbf{100\\%}</td>\n",
       "      <td>0.034</td>\n",
       "      <td>0.490</td>\n",
       "      <td>82.9\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaVASingleTask</th>\n",
       "      <td>0.036</td>\n",
       "      <td>81.9\\%</td>\n",
       "      <td>\\textbf{0.018}</td>\n",
       "      <td>\\textbf{100\\%}</td>\n",
       "      <td>\\textbf{0.029}</td>\n",
       "      <td>\\textbf{0.524}</td>\n",
       "      <td>83.8\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaLite</th>\n",
       "      <td>0.060</td>\n",
       "      <td>2.7\\%</td>\n",
       "      <td>0.200</td>\n",
       "      <td>\\textbf{100\\%}</td>\n",
       "      <td>0.119</td>\n",
       "      <td>0.051</td>\n",
       "      <td>19.3\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaOmniTask</th>\n",
       "      <td>0.026</td>\n",
       "      <td>91.4\\%</td>\n",
       "      <td>0.032</td>\n",
       "      <td>\\textbf{100\\%}</td>\n",
       "      <td>0.045</td>\n",
       "      <td>0.334</td>\n",
       "      <td>\\textbf{87.2\\%}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaSingleTask</th>\n",
       "      <td>0.032</td>\n",
       "      <td>79.2\\%</td>\n",
       "      <td>0.153</td>\n",
       "      <td>\\textbf{100\\%}</td>\n",
       "      <td>0.059</td>\n",
       "      <td>0.205</td>\n",
       "      <td>84.8\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OpenAIO3</th>\n",
       "      <td>0.038</td>\n",
       "      <td>24.7\\%</td>\n",
       "      <td>0.077</td>\n",
       "      <td>\\textbf{100\\%}</td>\n",
       "      <td>0.053</td>\n",
       "      <td>0.147</td>\n",
       "      <td>54.6\\%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Category         Inverse Design                  Material Understanding  \\\n",
       "Metric                    Error            Valid                  Error   \n",
       "Model                                                                     \n",
       "LLaVAOmniTask    \\textbf{0.011}  \\textbf{91.9\\%}                  0.024   \n",
       "LLaVASingleTask           0.036           81.9\\%         \\textbf{0.018}   \n",
       "NovaLite                  0.060            2.7\\%                  0.200   \n",
       "NovaOmniTask              0.026           91.4\\%                  0.032   \n",
       "NovaSingleTask            0.032           79.2\\%                  0.153   \n",
       "OpenAIO3                  0.038           24.7\\%                  0.077   \n",
       "\n",
       "Category                         Reconstruction                  \\\n",
       "Metric                    Valid              CD             IoU   \n",
       "Model                                                             \n",
       "LLaVAOmniTask    \\textbf{100\\%}           0.034           0.490   \n",
       "LLaVASingleTask  \\textbf{100\\%}  \\textbf{0.029}  \\textbf{0.524}   \n",
       "NovaLite         \\textbf{100\\%}           0.119           0.051   \n",
       "NovaOmniTask     \\textbf{100\\%}           0.045           0.334   \n",
       "NovaSingleTask   \\textbf{100\\%}           0.059           0.205   \n",
       "OpenAIO3         \\textbf{100\\%}           0.053           0.147   \n",
       "\n",
       "Category                          \n",
       "Metric                     Valid  \n",
       "Model                             \n",
       "LLaVAOmniTask             82.9\\%  \n",
       "LLaVASingleTask           83.8\\%  \n",
       "NovaLite                  19.3\\%  \n",
       "NovaOmniTask     \\textbf{87.2\\%}  \n",
       "NovaSingleTask            84.8\\%  \n",
       "OpenAIO3                  54.6\\%  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main_table_formatted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "374204db",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{cccccccc}\n",
      "\\toprule\n",
      "Category & \\multicolumn{2}{c}{Inverse Design} & \\multicolumn{2}{c}{Material Understanding} & \\multicolumn{3}{c}{Reconstruction} \\\\\n",
      "Metric & Error & Valid & Error & Valid & CD & IoU & Valid \\\\\n",
      "Model &  &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "LLaVAOmniTask & \\textbf{0.011} & \\textbf{91.9\\%} & 0.024 & \\textbf{100\\%} & 0.034 & 0.490 & 82.9\\% \\\\\n",
      "LLaVASingleTask & 0.036 & 81.9\\% & \\textbf{0.018} & \\textbf{100\\%} & \\textbf{0.029} & \\textbf{0.524} & 83.8\\% \\\\\n",
      "NovaLite & 0.060 & 2.7\\% & 0.200 & \\textbf{100\\%} & 0.119 & 0.051 & 19.3\\% \\\\\n",
      "NovaOmniTask & 0.026 & 91.4\\% & 0.032 & \\textbf{100\\%} & 0.045 & 0.334 & \\textbf{87.2\\%} \\\\\n",
      "NovaSingleTask & 0.032 & 79.2\\% & 0.153 & \\textbf{100\\%} & 0.059 & 0.205 & 84.8\\% \\\\\n",
      "OpenAIO3 & 0.038 & 24.7\\% & 0.077 & \\textbf{100\\%} & 0.053 & 0.147 & 54.6\\% \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(main_table_latex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "13022a4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "Z = 1.96           # two–sided 95 % normal quantile\n",
    "USE_METRICS = ['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error']\n",
    "\n",
    "category_mapping = {\n",
    "    'inverse_design':          'Inverse Design',\n",
    "    'material_understanding':  'Material Understanding',\n",
    "    'reconstruction':          'Reconstruction'\n",
    "}\n",
    "metric_mapping = {\n",
    "    'Average Normalized Error': 'Error',\n",
    "    'Chamfer Distance':         'CD'\n",
    "}\n",
    "task_mapping = {\n",
    "    'multiview_and_code_material_understanding': '4 View + Code',\n",
    "    'single_view_material_understanding':        '1 View',\n",
    "    '4_target_inverse_design':                   '4 Target',\n",
    "    '2_view_reconstruction':                     '2 View',\n",
    "    '6_target_inverse_design':                   '6 Target',\n",
    "    '3_view_reconstruction':                     '3 View',\n",
    "    '1_view_reconstruction':                     '1 View',\n",
    "    '5_target_inverse_design':                   '5 Target',\n",
    "    '3_target_inverse_design':                   '3 Target',\n",
    "    '2_target_inverse_design':                   '2 Target',\n",
    "    '4_view_reconstruction':                     '4 View',\n",
    "    '1_target_inverse_design':                   '1 Target',\n",
    "}\n",
    "\n",
    "def _prep(df):\n",
    "    df = df[df.metric.isin(USE_METRICS)].copy()\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric']   = df['metric'].replace(metric_mapping)\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "    return df\n",
    "\n",
    "def make_pivot(df):\n",
    "    \"\"\"\n",
    "    Return two pivot tables with identical shape:\n",
    "    * mean_df : the ordinary means\n",
    "    * ci_df   : half‑width of the 95 % confidence interval, i.e. 1.96·σ/√n\n",
    "    \"\"\"\n",
    "    df = _prep(df)\n",
    "\n",
    "    grp = df.groupby(['Model', 'Category', 'Metric'])['value']\n",
    "    stats = grp.agg(['mean', 'std', 'count']).reset_index()\n",
    "    stats['ci95'] = Z * stats['std'] / np.sqrt(stats['count'])\n",
    "\n",
    "    mean_df = stats.pivot(index='Model',\n",
    "                          columns=['Category', 'Metric'],\n",
    "                          values='mean')\n",
    "    ci_df   = stats.pivot(index='Model',\n",
    "                          columns=['Category', 'Metric'],\n",
    "                          values='ci95')\n",
    "    return mean_df, ci_df\n",
    "\n",
    "def make_category_pivot(df, category):\n",
    "    \"\"\"\n",
    "    Same, but the column MultiIndex becomes (Task, Metric) after filtering\n",
    "    on one high‑level category.\n",
    "    \"\"\"\n",
    "    df = _prep(df)\n",
    "    df = df[df.Category == category].copy()\n",
    "    df['Task'] = df['Task'].replace(task_mapping)\n",
    "\n",
    "    grp = df.groupby(['Model', 'Task', 'Metric'])['value']\n",
    "    stats = grp.agg(['mean', 'std', 'count']).reset_index()\n",
    "    stats['ci95'] = Z * stats['std'] / np.sqrt(stats['count'])\n",
    "\n",
    "    mean_df = stats.pivot(index='Model',\n",
    "                          columns=['Task', 'Metric'],\n",
    "                          values='mean')\n",
    "    ci_df   = stats.pivot(index='Model',\n",
    "                          columns=['Task', 'Metric'],\n",
    "                          values='ci95')\n",
    "    return mean_df, ci_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2d7397d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "minimize_metrics = {'Error', 'CD'}\n",
    "maximize_metrics = {'IoU', 'Valid'}\n",
    "\n",
    "def highlight_best(mean_df, ci_df):\n",
    "    \"\"\"\n",
    "    Combine mean and ci into one string per cell, choose the optimum\n",
    "    mean for each metric, and bold it.\n",
    "    \"\"\"\n",
    "    out = pd.DataFrame(index=mean_df.index, columns=mean_df.columns)\n",
    "\n",
    "    for col in mean_df.columns:\n",
    "        metric = col[1]         # (Category|Task, Metric)\n",
    "        means  = mean_df[col]\n",
    "        cis    = ci_df[col]\n",
    "\n",
    "        # which mean is the \"best\"?\n",
    "        if metric in minimize_metrics:\n",
    "            best_val = means.min(skipna=True)\n",
    "        elif metric in maximize_metrics:\n",
    "            best_val = means.max(skipna=True)\n",
    "        else:\n",
    "            best_val = np.nan\n",
    "\n",
    "        def fmt(m, c):\n",
    "            if pd.isna(m):\n",
    "                return r'\\textemdash{}'\n",
    "            # Assemble the \"mean ± ci\" part\n",
    "            if metric == 'Valid':\n",
    "                m_p = m * 100\n",
    "                c_p = c * 100\n",
    "                base = f\"{m_p:.1f}\\\\% ± {c_p:.1f}\\\\%\"\n",
    "            else:\n",
    "                base = f\"{m:.3f} ± {c:.3f}\"\n",
    "            return rf\"\\textbf{{{base}}}\" if m == best_val else base\n",
    "\n",
    "        out[col] = [fmt(m, c) for m, c in zip(means, cis)]\n",
    "\n",
    "    return out\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4fbc8381",
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_latex(formatted):\n",
    "    col_fmt = 'c' * (formatted.shape[1] + 1)        # one extra for the row index\n",
    "    return formatted.to_latex(escape=False,\n",
    "                              multicolumn=True,\n",
    "                              multicolumn_format='c',\n",
    "                              column_format=col_fmt)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7fb65184",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{cccccccc}\n",
      "\\toprule\n",
      "Category & \\multicolumn{2}{c}{Inverse Design} & \\multicolumn{2}{c}{Material Understanding} & \\multicolumn{3}{c}{Reconstruction} \\\\\n",
      "Metric & Error & Valid & Error & Valid & CD & IoU & Valid \\\\\n",
      "Model &  &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "LLaVAOmniTask & \\textbf{0.011 ± 0.002} & \\textbf{91.9\\% ± 0.9\\%} & 0.024 ± 0.004 & \\textbf{100.0\\% ± 0.0\\%} & 0.034 ± 0.001 & 0.490 ± 0.008 & 82.9\\% ± 0.9\\% \\\\\n",
      "LLaVASingleTask & 0.036 ± 0.007 & 81.9\\% ± 3.2\\% & \\textbf{0.018 ± 0.004} & \\textbf{100.0\\% ± 0.0\\%} & \\textbf{0.029 ± 0.003} & \\textbf{0.524 ± 0.030} & 83.8\\% ± 3.2\\% \\\\\n",
      "NovaLite & 0.060 ± 0.023 & 2.7\\% ± 0.6\\% & 0.200 ± 0.005 & \\textbf{100.0\\% ± 0.0\\%} & 0.119 ± 0.003 & 0.051 ± 0.003 & 19.3\\% ± 0.9\\% \\\\\n",
      "NovaOmniTask & 0.026 ± 0.002 & 91.4\\% ± 1.0\\% & 0.032 ± 0.005 & \\textbf{100.0\\% ± 0.0\\%} & 0.045 ± 0.001 & 0.334 ± 0.007 & \\textbf{87.2\\% ± 0.8\\%} \\\\\n",
      "NovaSingleTask & 0.032 ± 0.007 & 79.2\\% ± 3.4\\% & 0.153 ± 0.006 & \\textbf{100.0\\% ± 0.0\\%} & 0.059 ± 0.003 & 0.205 ± 0.020 & 84.8\\% ± 3.2\\% \\\\\n",
      "OpenAIO3 & 0.038 ± 0.006 & 24.7\\% ± 1.5\\% & 0.077 ± 0.005 & \\textbf{100.0\\% ± 0.0\\%} & 0.053 ± 0.001 & 0.147 ± 0.004 & 54.6\\% ± 1.1\\% \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "mean_piv, ci_piv = make_pivot(df)                 # or make_category_pivot(...)\n",
    "formatted        = highlight_best(mean_piv, ci_piv)\n",
    "latex_code       = to_latex(formatted)\n",
    "print(latex_code)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "89fa184b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>Category</th>\n",
       "      <th colspan=\"2\" halign=\"left\">Inverse Design</th>\n",
       "      <th colspan=\"2\" halign=\"left\">Material Understanding</th>\n",
       "      <th colspan=\"3\" halign=\"left\">Reconstruction</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Metric</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>CD</th>\n",
       "      <th>IoU</th>\n",
       "      <th>Valid</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Model</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>LLaVAOmniTask</th>\n",
       "      <td>\\textbf{0.011 ± 0.002}</td>\n",
       "      <td>\\textbf{91.9\\% ± 0.9\\%}</td>\n",
       "      <td>0.024 ± 0.004</td>\n",
       "      <td>\\textbf{100.0\\% ± 0.0\\%}</td>\n",
       "      <td>0.034 ± 0.001</td>\n",
       "      <td>0.490 ± 0.008</td>\n",
       "      <td>82.9\\% ± 0.9\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaVASingleTask</th>\n",
       "      <td>0.036 ± 0.007</td>\n",
       "      <td>81.9\\% ± 3.2\\%</td>\n",
       "      <td>\\textbf{0.018 ± 0.004}</td>\n",
       "      <td>\\textbf{100.0\\% ± 0.0\\%}</td>\n",
       "      <td>\\textbf{0.029 ± 0.003}</td>\n",
       "      <td>\\textbf{0.524 ± 0.030}</td>\n",
       "      <td>83.8\\% ± 3.2\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaLite</th>\n",
       "      <td>0.060 ± 0.023</td>\n",
       "      <td>2.7\\% ± 0.6\\%</td>\n",
       "      <td>0.200 ± 0.005</td>\n",
       "      <td>\\textbf{100.0\\% ± 0.0\\%}</td>\n",
       "      <td>0.119 ± 0.003</td>\n",
       "      <td>0.051 ± 0.003</td>\n",
       "      <td>19.3\\% ± 0.9\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaOmniTask</th>\n",
       "      <td>0.026 ± 0.002</td>\n",
       "      <td>91.4\\% ± 1.0\\%</td>\n",
       "      <td>0.032 ± 0.005</td>\n",
       "      <td>\\textbf{100.0\\% ± 0.0\\%}</td>\n",
       "      <td>0.045 ± 0.001</td>\n",
       "      <td>0.334 ± 0.007</td>\n",
       "      <td>\\textbf{87.2\\% ± 0.8\\%}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaSingleTask</th>\n",
       "      <td>0.032 ± 0.007</td>\n",
       "      <td>79.2\\% ± 3.4\\%</td>\n",
       "      <td>0.153 ± 0.006</td>\n",
       "      <td>\\textbf{100.0\\% ± 0.0\\%}</td>\n",
       "      <td>0.059 ± 0.003</td>\n",
       "      <td>0.205 ± 0.020</td>\n",
       "      <td>84.8\\% ± 3.2\\%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OpenAIO3</th>\n",
       "      <td>0.038 ± 0.006</td>\n",
       "      <td>24.7\\% ± 1.5\\%</td>\n",
       "      <td>0.077 ± 0.005</td>\n",
       "      <td>\\textbf{100.0\\% ± 0.0\\%}</td>\n",
       "      <td>0.053 ± 0.001</td>\n",
       "      <td>0.147 ± 0.004</td>\n",
       "      <td>54.6\\% ± 1.1\\%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Category                 Inverse Design                           \\\n",
       "Metric                            Error                    Valid   \n",
       "Model                                                              \n",
       "LLaVAOmniTask    \\textbf{0.011 ± 0.002}  \\textbf{91.9\\% ± 0.9\\%}   \n",
       "LLaVASingleTask           0.036 ± 0.007           81.9\\% ± 3.2\\%   \n",
       "NovaLite                  0.060 ± 0.023            2.7\\% ± 0.6\\%   \n",
       "NovaOmniTask              0.026 ± 0.002           91.4\\% ± 1.0\\%   \n",
       "NovaSingleTask            0.032 ± 0.007           79.2\\% ± 3.4\\%   \n",
       "OpenAIO3                  0.038 ± 0.006           24.7\\% ± 1.5\\%   \n",
       "\n",
       "Category         Material Understanding                            \\\n",
       "Metric                            Error                     Valid   \n",
       "Model                                                               \n",
       "LLaVAOmniTask             0.024 ± 0.004  \\textbf{100.0\\% ± 0.0\\%}   \n",
       "LLaVASingleTask  \\textbf{0.018 ± 0.004}  \\textbf{100.0\\% ± 0.0\\%}   \n",
       "NovaLite                  0.200 ± 0.005  \\textbf{100.0\\% ± 0.0\\%}   \n",
       "NovaOmniTask              0.032 ± 0.005  \\textbf{100.0\\% ± 0.0\\%}   \n",
       "NovaSingleTask            0.153 ± 0.006  \\textbf{100.0\\% ± 0.0\\%}   \n",
       "OpenAIO3                  0.077 ± 0.005  \\textbf{100.0\\% ± 0.0\\%}   \n",
       "\n",
       "Category                 Reconstruction                          \\\n",
       "Metric                               CD                     IoU   \n",
       "Model                                                             \n",
       "LLaVAOmniTask             0.034 ± 0.001           0.490 ± 0.008   \n",
       "LLaVASingleTask  \\textbf{0.029 ± 0.003}  \\textbf{0.524 ± 0.030}   \n",
       "NovaLite                  0.119 ± 0.003           0.051 ± 0.003   \n",
       "NovaOmniTask              0.045 ± 0.001           0.334 ± 0.007   \n",
       "NovaSingleTask            0.059 ± 0.003           0.205 ± 0.020   \n",
       "OpenAIO3                  0.053 ± 0.001           0.147 ± 0.004   \n",
       "\n",
       "Category                                  \n",
       "Metric                             Valid  \n",
       "Model                                     \n",
       "LLaVAOmniTask             82.9\\% ± 0.9\\%  \n",
       "LLaVASingleTask           83.8\\% ± 3.2\\%  \n",
       "NovaLite                  19.3\\% ± 0.9\\%  \n",
       "NovaOmniTask     \\textbf{87.2\\% ± 0.8\\%}  \n",
       "NovaSingleTask            84.8\\% ± 3.2\\%  \n",
       "OpenAIO3                  54.6\\% ± 1.1\\%  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "formatted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ba041aa",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
