{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8ecd8f35",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing 80 files...\n",
      "\n",
      "% -----------------------------------------------------------------------\n",
      "% TABLE: Best performance\n",
      "% -----------------------------------------------------------------------\n",
      "\\begin{table}[ht]\n",
      "    \\centering\n",
      "    \\caption{Performance comparison on Lorenz-96 ($d=50$). Strategy: Best performance}\n",
      "    \\label{tab:lorenz96_50_Best_performance}\n",
      "    \n",
      "    \\newcommand{\\res}[2]{\\shortstack{#1 \\\\ {\\scriptsize (#2)}}}\n",
      "    \n",
      "    \\resizebox{\\columnwidth}{!}{%\n",
      "    \\begin{tabular}{llcccc}\n",
      "        \\toprule\n",
      "        &  & \\multicolumn{2}{c}{F = 10} & \\multicolumn{2}{c}{F = 40}  \\\\\n",
      "        \\cmidrule(lr){3-4} \\cmidrule(lr){5-6} \n",
      "        Penalty & Metric & T=500 & T=1000 & T=500 & T=1000 \\\\\n",
      "        \\midrule\n",
      "        \\multirow{2}{*}{Jacob-L1}\n",
      "          & AUROC & \\res{0.694}{0.007} & \\res{0.774}{0.031} & \\res{0.746}{0.032} & \\res{0.941}{0.122} \\\\\n",
      "          \\addlinespace[0.5em]\n",
      "          & AUPRC & \\res{0.428}{0.009} & \\res{0.551}{0.043} & \\res{0.500}{0.041} & \\res{0.889}{0.217} \\\\\n",
      "        \\midrule\n",
      "                \\multirow{2}{*}{Jacob-F}\n",
      "          & AUROC & \\res{0.652}{0.015} & \\res{0.686}{0.014} & \\res{0.727}{0.035} & \\res{0.747}{0.024} \\\\\n",
      "          \\addlinespace[0.5em]\n",
      "          & AUPRC & \\res{0.363}{0.009} & \\res{0.394}{0.005} & \\res{0.444}{0.028} & \\res{0.477}{0.028} \\\\\n",
      "        \\midrule\n",
      "                \\multirow{2}{*}{Shapley}\n",
      "          & AUROC & \\res{0.717}{0.011} & \\res{0.802}{0.010} & \\res{0.745}{0.022} & \\res{0.984}{0.015} \\\\\n",
      "          \\addlinespace[0.5em]\n",
      "          & AUPRC & \\res{0.439}{0.016} & \\res{0.555}{0.015} & \\res{0.489}{0.035} & \\res{0.956}{0.023} \\\\\n",
      "        \\midrule\n",
      "                \\multirow{2}{*}{F-Shap}\n",
      "          & AUROC & \\res{0.667}{0.018} & \\res{0.697}{0.017} & \\res{0.726}{0.017} & \\res{0.775}{0.025} \\\\\n",
      "          \\addlinespace[0.5em]\n",
      "          & AUPRC & \\res{0.374}{0.013} & \\res{0.394}{0.011} & \\res{0.436}{0.025} & \\res{0.495}{0.032} \\\\\n",
      "        \\bottomrule\n",
      "    \\end{tabular}%\n",
      "    }\n",
      "\\end{table}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import glob\n",
    "import os\n",
    "import re\n",
    "\n",
    "# --- Configuration ---\n",
    "DATA_DIR = './simulation_Lorenz96'       # Directory containing your result files\n",
    "FILE_PATTERN = '*.csv'        # File pattern to match\n",
    "TARGET_DIMENSION = 50         # Set to 10 or 50\n",
    "\n",
    "# Output filename for the best configurations\n",
    "OUTPUT_CSV_MIN_LOSS = 'best_performance.csv' \n",
    "\n",
    "# The Master List of Series (Order matters!)\n",
    "SERIES_NAMES = [\n",
    "    'D10_T500_F10', 'D10_T500_F40', \n",
    "    'D10_T1000_F10', 'D10_T1000_F40', \n",
    "    'D10_T2000_F10', 'D10_T2000_F40', \n",
    "    'D50_T500_F10', 'D50_T500_F40', \n",
    "    'D50_T1000_F10', 'D50_T1000_F40', \n",
    "    'D50_T2000_F10', 'D50_T2000_F40'\n",
    "]\n",
    "\n",
    "# Method Display Names for the Table\n",
    "METHOD_DISPLAY_NAMES = {\n",
    "    'Jacob_L1': 'Jacob-L1',\n",
    "    'Jacob_F': 'Jacob-F',\n",
    "    'Shapley': 'Shapley',\n",
    "    'Fast_Shap': 'F-Shap'\n",
    "}\n",
    "METHOD_ORDER = ['Jacob_L1', 'Jacob_F', 'Shapley', 'Fast_Shap']\n",
    "\n",
    "def parse_filename_and_params(filepath):\n",
    "    basename = os.path.basename(filepath)\n",
    "    name_no_ext = os.path.splitext(basename)[0]\n",
    "    parts = name_no_ext.split('_')\n",
    "    \n",
    "    if len(parts) < 6: return None\n",
    "    \n",
    "    try:\n",
    "        series_id = int(parts[2])\n",
    "    except ValueError:\n",
    "        return None \n",
    "\n",
    "    list_index = series_id - 1\n",
    "    if list_index < 0 or list_index >= len(SERIES_NAMES):\n",
    "        return None \n",
    "\n",
    "    series_str = SERIES_NAMES[list_index]\n",
    "    match = re.search(r'D(\\d+)_T(\\d+)_F(\\d+)', series_str)\n",
    "    if not match: return None\n",
    "    \n",
    "    params = {'D': int(match.group(1)), 'T': int(match.group(2)), 'F': int(match.group(3))}\n",
    "\n",
    "    return {\n",
    "        'series_str': series_str,\n",
    "        'subject': parts[3],\n",
    "        'model': parts[4],\n",
    "        'method': '_'.join(parts[5:]),\n",
    "        'filename': basename,\n",
    "        **params \n",
    "    }\n",
    "\n",
    "def process_files(data_dir, pattern):\n",
    "    files = glob.glob(os.path.join(data_dir, pattern))\n",
    "    rows_min_loss = []   \n",
    "    \n",
    "    print(f\"Processing {len(files)} files...\")\n",
    "\n",
    "    for f in files:\n",
    "        meta = parse_filename_and_params(f)\n",
    "        if not meta: continue\n",
    "        \n",
    "        try:\n",
    "            df = pd.read_csv(f)\n",
    "            \n",
    "            # Ensure metrics are numeric\n",
    "            cols_to_check = ['val_loss', 'AUROC', 'AUPRC']\n",
    "            for c in cols_to_check:\n",
    "                if c in df.columns:\n",
    "                    df[c] = pd.to_numeric(df[c], errors='coerce')\n",
    "            \n",
    "            if df.empty or 'val_loss' not in df.columns: \n",
    "                continue\n",
    "\n",
    "            # --- STRICT SELECTION: MIN VAL LOSS ---\n",
    "            # Sort by val_loss ascending and pick the very first row.\n",
    "            best_candidate = df.sort_values(by='val_loss', ascending=True).iloc[0]\n",
    "            \n",
    "            # Store results\n",
    "            row_data = best_candidate.to_dict()\n",
    "            row_data.update(meta) \n",
    "            rows_min_loss.append(row_data)\n",
    "\n",
    "        except Exception as e:\n",
    "            print(f\"Error processing {os.path.basename(f)}: {e}\")\n",
    "            continue\n",
    "\n",
    "    return pd.DataFrame(rows_min_loss)\n",
    "\n",
    "def save_best_to_csv(df, filename):\n",
    "    \"\"\"\n",
    "    Saves the dataframe of selected best runs to CSV.\n",
    "    Organizes columns so important identifiers come first.\n",
    "    \"\"\"\n",
    "    if df.empty:\n",
    "        print(f\"No data to save for {filename}\")\n",
    "        return\n",
    "\n",
    "    # 1. Define column order priorities\n",
    "    meta_cols = ['method', 'D', 'T', 'F', 'subject', 'series_str', 'filename']\n",
    "    metric_cols = ['AUROC', 'AUPRC', 'val_loss', 'epoch']\n",
    "    \n",
    "    # 2. Filter for columns that actually exist\n",
    "    meta_cols = [c for c in meta_cols if c in df.columns]\n",
    "    metric_cols = [c for c in metric_cols if c in df.columns]\n",
    "    \n",
    "    # 3. Get everything else (hyperparams etc.)\n",
    "    other_cols = [c for c in df.columns if c not in meta_cols and c not in metric_cols]\n",
    "    \n",
    "    # 4. Construct final column list\n",
    "    final_cols = meta_cols + metric_cols + other_cols\n",
    "    \n",
    "    # 5. Reorder and Save\n",
    "    df_sorted = df[final_cols]\n",
    "    # print(f\"Saving {len(df)} 'Best Config' rows to {filename}...\")\n",
    "    df_sorted.to_csv(filename, index=False)\n",
    "\n",
    "def get_aggregated_stats(df_raw):\n",
    "    \"\"\"Aggregates for the LaTeX table\"\"\"\n",
    "    if df_raw.empty: return pd.DataFrame()\n",
    "    return df_raw.groupby(['D', 'F', 'T', 'method']).agg(\n",
    "        auroc_mean=('AUROC', 'mean'),\n",
    "        auroc_std=('AUROC', 'std'),\n",
    "        auprc_mean=('AUPRC', 'mean'),\n",
    "        auprc_std=('AUPRC', 'std')\n",
    "    ).reset_index()\n",
    "\n",
    "def generate_latex_table(df, target_d, strategy_name):\n",
    "    df_sub = df[df['D'] == target_d].copy()\n",
    "    if df_sub.empty:\n",
    "        print(f\"No data found for Dimension D={target_d}\")\n",
    "        return\n",
    "\n",
    "    unique_Fs = sorted(df_sub['F'].unique())\n",
    "    unique_Ts = sorted(df_sub['T'].unique())\n",
    "    if not unique_Fs or not unique_Ts: return\n",
    "\n",
    "    # Header\n",
    "    header_top = \"        &  \"\n",
    "    header_mid = \"\"\n",
    "    col_counter = 3\n",
    "    for f_val in unique_Fs:\n",
    "        header_top += f\"& \\\\multicolumn{{{len(unique_Ts)}}}{{c}}{{F = {f_val}}} \"\n",
    "        header_mid += f\"\\\\cmidrule(lr){{{col_counter}-{col_counter + len(unique_Ts) - 1}}} \"\n",
    "        col_counter += len(unique_Ts)\n",
    "        \n",
    "    header_bot = \"        Penalty & Metric\"\n",
    "    for _ in unique_Fs:\n",
    "        for t_val in unique_Ts:\n",
    "            header_bot += f\" & T={t_val}\"\n",
    "\n",
    "    latex_header = f\"{header_top} \\\\\\\\\\n        {header_mid}\\n{header_bot} \\\\\\\\\"\n",
    "\n",
    "    # Body\n",
    "    rows = []\n",
    "    for method in METHOD_ORDER:\n",
    "        disp = METHOD_DISPLAY_NAMES.get(method, method)\n",
    "        line_roc = f\"        \\\\multirow{{2}}{{*}}{{{disp}}}\\n          & AUROC\"\n",
    "        line_prc = \"          & AUPRC\"\n",
    "        \n",
    "        for f_val in unique_Fs:\n",
    "            for t_val in unique_Ts:\n",
    "                item = df_sub[(df_sub['method'] == method) & (df_sub['F'] == f_val) & (df_sub['T'] == t_val)]\n",
    "                \n",
    "                if not item.empty:\n",
    "                    m_roc = item.iloc[0]['auroc_mean']\n",
    "                    s_roc = item.iloc[0]['auroc_std']\n",
    "                    m_prc = item.iloc[0]['auprc_mean']\n",
    "                    s_prc = item.iloc[0]['auprc_std']\n",
    "                    line_roc += f\" & \\\\res{{{m_roc:.3f}}}{{{s_roc:.3f}}}\"\n",
    "                    line_prc += f\" & \\\\res{{{m_prc:.3f}}}{{{s_prc:.3f}}}\"\n",
    "                else:\n",
    "                    line_roc += \" & --\"\n",
    "                    line_prc += \" & --\"\n",
    "        rows.append(f\"{line_roc} \\\\\\\\\\n          \\\\addlinespace[0.5em]\\n{line_prc} \\\\\\\\\")\n",
    "\n",
    "    latex_body = \"\\n        \\\\midrule\\n        \".join(rows)\n",
    "    total_cols = 2 + (len(unique_Fs) * len(unique_Ts))\n",
    "    col_def = \"ll\" + \"c\" * (total_cols - 2)\n",
    "\n",
    "    print(r\"\"\"\n",
    "% -----------------------------------------------------------------------\n",
    "% TABLE: \"\"\" + strategy_name + r\"\"\"\n",
    "% -----------------------------------------------------------------------\n",
    "\\begin{table}[ht]\n",
    "    \\centering\n",
    "    \\caption{Performance comparison on Lorenz-96 ($d=\"\"\" + str(target_d) + r\"\"\"$). Strategy: \"\"\" + strategy_name + r\"\"\"}\n",
    "    \\label{tab:lorenz96_\"\"\" + str(target_d) + r\"\"\"_\"\"\" + strategy_name.replace(\" \", \"_\") + r\"\"\"}\n",
    "    \n",
    "    \\newcommand{\\res}[2]{\\shortstack{#1 \\\\ {\\scriptsize (#2)}}}\n",
    "    \n",
    "    \\resizebox{\\columnwidth}{!}{%\n",
    "    \\begin{tabular}{\"\"\" + col_def + r\"\"\"}\n",
    "        \\toprule\n",
    "\"\"\" + latex_header + r\"\"\"\n",
    "        \\midrule\n",
    "\"\"\" + latex_body + r\"\"\"\n",
    "        \\bottomrule\n",
    "    \\end{tabular}%\n",
    "    }\n",
    "\\end{table}\n",
    "\"\"\")\n",
    "\n",
    "# --- Main Execution ---\n",
    "\n",
    "df_results = process_files(DATA_DIR, FILE_PATTERN)\n",
    "\n",
    "# 2. SAVE the best configs to CSV\n",
    "# save_best_to_csv(df_results, OUTPUT_CSV_MIN_LOSS)\n",
    "\n",
    "# 3. Aggregate stats\n",
    "stats_agg = get_aggregated_stats(df_results)\n",
    "\n",
    "# 4. Print Table\n",
    "if not stats_agg.empty:\n",
    "    generate_latex_table(stats_agg, target_d=TARGET_DIMENSION, strategy_name=\"Best performance\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "MPS",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
