{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "23f83e6f",
   "metadata": {},
   "source": [
    "# Compile Results (Clean)\n",
    "\n",
    "Este notebook arma tablas de **worst-group accuracy** por *dataset × correlación × método* de forma limpia y extensible.\n",
    "- Selecciona automáticamente la mejor **época por validación** (max `worst_group_acc` en `val.csv`) y usa la fila correspondiente de `test.csv`.\n",
    "- Promedia sobre *seeds* (y opcionalmente añade `± std (n)`).\n",
    "- Exporta CSV y Markdown de la tabla final."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5b3a2ef9",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CUB/erm_50/model_outputs_111/test.csv\n",
      "CUB/erm_50/model_outputs_333/test.csv\n",
      "CUB/erm_50/model_outputs_222/test.csv\n",
      "CUB/erm_625/model_outputs_111/test.csv\n",
      "CUB/erm_625/model_outputs_333/test.csv\n",
      "CUB/erm_625/model_outputs_222/test.csv\n",
      "CUB/erm_75/model_outputs_111/test.csv\n",
      "CUB/erm_75/model_outputs_333/test.csv\n",
      "CUB/erm_75/model_outputs_222/test.csv\n",
      "CUB/erm_875/model_outputs_111/test.csv\n",
      "CUB/erm_875/model_outputs_333/test.csv\n",
      "CUB/erm_875/model_outputs_222/test.csv\n",
      "CUB/erm_95/model_outputs_111/test.csv\n",
      "CUB/erm_95/model_outputs_333/test.csv\n",
      "CUB/erm_95/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_625/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_625/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_625/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_75/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_75/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_75/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_875/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_875/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_875/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_95/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_95/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_95/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_gdro_625/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_gdro_625/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_gdro_75/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_gdro_75/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_gdro_75/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_gdro_875/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_gdro_875/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_gdro_875/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_gdro_95/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_gdro_95/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_gdro_95/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_rw_50/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_rw_50/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_rw_50/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_rw_625/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_rw_625/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_rw_625/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_rw_75/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_rw_75/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_rw_75/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_rw_875/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_rw_875/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_rw_875/model_outputs_222/test.csv\n",
      "CUB/erm_dfr_rw_95/model_outputs_111/test.csv\n",
      "CUB/erm_dfr_rw_95/model_outputs_333/test.csv\n",
      "CUB/erm_dfr_rw_95/model_outputs_222/test.csv\n",
      "CUB/erm_gdro_50/model_outputs_111/test.csv\n",
      "CUB/erm_gdro_50/model_outputs_333/test.csv\n",
      "CUB/erm_gdro_50/model_outputs_222/test.csv\n",
      "CUB/erm_gdro_625/model_outputs_111/test.csv\n",
      "CUB/erm_gdro_625/model_outputs_333/test.csv\n",
      "CUB/erm_gdro_625/model_outputs_222/test.csv\n",
      "CUB/erm_gdro_75/model_outputs_111/test.csv\n",
      "CUB/erm_gdro_75/model_outputs_333/test.csv\n",
      "CUB/erm_gdro_75/model_outputs_222/test.csv\n",
      "CUB/erm_gdro_875/model_outputs_111/test.csv\n",
      "CUB/erm_gdro_875/model_outputs_333/test.csv\n",
      "CUB/erm_gdro_875/model_outputs_222/test.csv\n",
      "CUB/erm_gdro_95/model_outputs_111/test.csv\n",
      "CUB/erm_gdro_95/model_outputs_333/test.csv\n",
      "CUB/erm_gdro_95/model_outputs_222/test.csv\n",
      "CUB/erm_rw_50/model_outputs_111/test.csv\n",
      "CUB/erm_rw_50/model_outputs_333/test.csv\n",
      "CUB/erm_rw_50/model_outputs_222/test.csv\n",
      "CUB/erm_rw_625/model_outputs_111/test.csv\n",
      "CUB/erm_rw_625/model_outputs_333/test.csv\n",
      "CUB/erm_rw_625/model_outputs_222/test.csv\n",
      "CUB/erm_rw_75/model_outputs_111/test.csv\n",
      "CUB/erm_rw_75/model_outputs_333/test.csv\n",
      "CUB/erm_rw_75/model_outputs_222/test.csv\n",
      "CUB/erm_rw_875/model_outputs_111/test.csv\n",
      "CUB/erm_rw_875/model_outputs_333/test.csv\n",
      "CUB/erm_rw_875/model_outputs_222/test.csv\n",
      "CUB/erm_rw_95/model_outputs_111/test.csv\n",
      "CUB/erm_rw_95/model_outputs_333/test.csv\n",
      "CUB/erm_rw_95/model_outputs_222/test.csv\n",
      "CUB/gdro_50/model_outputs_111/test.csv\n",
      "CUB/gdro_50/model_outputs_333/test.csv\n",
      "CUB/gdro_50/model_outputs_222/test.csv\n",
      "CUB/gdro_625/model_outputs_111/test.csv\n",
      "CUB/gdro_625/model_outputs_333/test.csv\n",
      "CUB/gdro_625/model_outputs_222/test.csv\n",
      "CUB/gdro_75/model_outputs_111/test.csv\n",
      "CUB/gdro_75/model_outputs_333/test.csv\n",
      "CUB/gdro_75/model_outputs_222/test.csv\n",
      "CUB/gdro_875/model_outputs_111/test.csv\n",
      "CUB/gdro_875/model_outputs_333/test.csv\n",
      "CUB/gdro_875/model_outputs_222/test.csv\n",
      "CUB/gdro_95/model_outputs_111/test.csv\n",
      "CUB/gdro_95/model_outputs_333/test.csv\n",
      "CUB/gdro_95/model_outputs_222/test.csv\n",
      "CUB/rw_erm_50/model_outputs_111/test.csv\n",
      "CUB/rw_erm_50/model_outputs_333/test.csv\n",
      "CUB/rw_erm_50/model_outputs_222/test.csv\n",
      "CUB/rw_erm_625/model_outputs_111/test.csv\n",
      "CUB/rw_erm_625/model_outputs_333/test.csv\n",
      "CUB/rw_erm_625/model_outputs_222/test.csv\n",
      "CUB/rw_erm_75/model_outputs_111/test.csv\n",
      "CUB/rw_erm_75/model_outputs_333/test.csv\n",
      "CUB/rw_erm_75/model_outputs_222/test.csv\n",
      "CUB/rw_erm_875/model_outputs_111/test.csv\n",
      "CUB/rw_erm_875/model_outputs_333/test.csv\n",
      "CUB/rw_erm_875/model_outputs_222/test.csv\n",
      "CUB/rw_erm_95/model_outputs_111/test.csv\n",
      "CUB/rw_erm_95/model_outputs_333/test.csv\n",
      "CUB/rw_erm_95/model_outputs_222/test.csv\n",
      "CelebA/erm/model_outputs_111/test.csv\n",
      "CelebA/erm/model_outputs_333/test.csv\n",
      "CelebA/erm/model_outputs_222/test.csv\n",
      "CelebA/erm_dfr/model_outputs_111/test.csv\n",
      "CelebA/erm_dfr/model_outputs_333/test.csv\n",
      "CelebA/erm_dfr/model_outputs_222/test.csv\n",
      "CelebA/erm_dfr_gdro/model_outputs_111/test.csv\n",
      "CelebA/erm_dfr_gdro/model_outputs_333/test.csv\n",
      "CelebA/erm_dfr_gdro/model_outputs_222/test.csv\n",
      "CelebA/erm_dfr_rw/model_outputs_111/test.csv\n",
      "CelebA/erm_dfr_rw/model_outputs_333/test.csv\n",
      "CelebA/erm_dfr_rw/model_outputs_222/test.csv\n",
      "CelebA/erm_gdro/model_outputs_111/test.csv\n",
      "CelebA/erm_gdro/model_outputs_333/test.csv\n",
      "CelebA/erm_gdro/model_outputs_222/test.csv\n",
      "CelebA/erm_rw/model_outputs_111/test.csv\n",
      "CelebA/erm_rw/model_outputs_333/test.csv\n",
      "CelebA/erm_rw/model_outputs_222/test.csv\n",
      "CelebA/gdro/model_outputs_111/test.csv\n",
      "CelebA/gdro/model_outputs_333/test.csv\n",
      "CelebA/gdro/model_outputs_222/test.csv\n",
      "CelebA/rw_erm/model_outputs_111/test.csv\n",
      "CelebA/rw_erm/model_outputs_333/test.csv\n",
      "CelebA/rw_erm/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_0.0/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_0.0/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_0.0/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_gdro_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_dfr_rw_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.0/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.0/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.0/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_gdro_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_rw_0.0/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_rw_0.0/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_rw_0.0/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_rw_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_rw_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_rw_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_rw_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_rw_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_rw_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_rw_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_rw_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_rw_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/erm_rw_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/erm_rw_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/erm_rw_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/gdro_0.0/model_outputs_111/test.csv\n",
      "MNISTCIFAR/gdro_0.0/model_outputs_333/test.csv\n",
      "MNISTCIFAR/gdro_0.0/model_outputs_222/test.csv\n",
      "MNISTCIFAR/gdro_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/gdro_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/gdro_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/gdro_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/gdro_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/gdro_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/gdro_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/gdro_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/gdro_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/gdro_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/gdro_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/gdro_0.9/model_outputs_222/test.csv\n",
      "MNISTCIFAR/rw_erm_0.0/model_outputs_111/test.csv\n",
      "MNISTCIFAR/rw_erm_0.0/model_outputs_333/test.csv\n",
      "MNISTCIFAR/rw_erm_0.0/model_outputs_222/test.csv\n",
      "MNISTCIFAR/rw_erm_0.25/model_outputs_111/test.csv\n",
      "MNISTCIFAR/rw_erm_0.25/model_outputs_333/test.csv\n",
      "MNISTCIFAR/rw_erm_0.25/model_outputs_222/test.csv\n",
      "MNISTCIFAR/rw_erm_0.5/model_outputs_111/test.csv\n",
      "MNISTCIFAR/rw_erm_0.5/model_outputs_333/test.csv\n",
      "MNISTCIFAR/rw_erm_0.5/model_outputs_222/test.csv\n",
      "MNISTCIFAR/rw_erm_0.75/model_outputs_111/test.csv\n",
      "MNISTCIFAR/rw_erm_0.75/model_outputs_333/test.csv\n",
      "MNISTCIFAR/rw_erm_0.75/model_outputs_222/test.csv\n",
      "MNISTCIFAR/rw_erm_0.9/model_outputs_111/test.csv\n",
      "MNISTCIFAR/rw_erm_0.9/model_outputs_333/test.csv\n",
      "MNISTCIFAR/rw_erm_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/erm_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/erm_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/erm_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/erm_dfr_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/erm_dfr_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/erm_dfr_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/erm_dfr_gdro_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/erm_dfr_gdro_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/erm_dfr_gdro_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/erm_dfr_rw_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/erm_dfr_rw_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/erm_dfr_rw_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/erm_gdro_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/erm_gdro_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/erm_rw_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/erm_rw_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/gdro_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/gdro_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/gdro_0.9/model_outputs_222/test.csv\n",
      "MultiNLI/rw_erm_0.9/model_outputs_111/test.csv\n",
      "MultiNLI/rw_erm_0.9/model_outputs_333/test.csv\n",
      "MultiNLI/rw_erm_0.9/model_outputs_222/test.csv\n",
      "civilcomments/erm_0.9/model_outputs_111/test.csv\n",
      "civilcomments/erm_0.9/model_outputs_333/test.csv\n",
      "civilcomments/erm_0.9/model_outputs_222/test.csv\n",
      "civilcomments/erm_dfr_0.9/model_outputs_111/test.csv\n",
      "civilcomments/erm_dfr_0.9/model_outputs_333/test.csv\n",
      "civilcomments/erm_dfr_0.9/model_outputs_222/test.csv\n",
      "civilcomments/erm_dfr_gdro_0.9/model_outputs_111/test.csv\n",
      "civilcomments/erm_dfr_gdro_0.9/model_outputs_333/test.csv\n",
      "civilcomments/erm_dfr_gdro_0.9/model_outputs_222/test.csv\n",
      "civilcomments/erm_dfr_rw_0.9/model_outputs_111/test.csv\n",
      "civilcomments/erm_dfr_rw_0.9/model_outputs_333/test.csv\n",
      "civilcomments/erm_dfr_rw_0.9/model_outputs_222/test.csv\n",
      "civilcomments/erm_gdro_0.9/model_outputs_111/test.csv\n",
      "civilcomments/erm_gdro_0.9/model_outputs_333/test.csv\n",
      "civilcomments/erm_gdro_0.9/model_outputs_222/test.csv\n",
      "civilcomments/erm_rw_0.9/model_outputs_111/test.csv\n",
      "civilcomments/erm_rw_0.9/model_outputs_333/test.csv\n",
      "civilcomments/erm_rw_0.9/model_outputs_222/test.csv\n",
      "civilcomments/gdro_0.9/model_outputs_111/test.csv\n",
      "civilcomments/gdro_0.9/model_outputs_333/test.csv\n",
      "civilcomments/gdro_0.9/model_outputs_222/test.csv\n",
      "civilcomments/rw_erm_0.9/model_outputs_111/test.csv\n",
      "civilcomments/rw_erm_0.9/model_outputs_333/test.csv\n",
      "civilcomments/rw_erm_0.9/model_outputs_222/test.csv\n",
      "method        dataset  correlacion               erm            rw_erm  \\\n",
      "0                 CUB         0.00  89.20 ± 0.39 (3)  89.20 ± 0.50 (3)   \n",
      "1                 CUB         0.25  89.56 ± 0.68 (3)  89.87 ± 1.53 (3)   \n",
      "2                 CUB         0.50  87.44 ± 1.04 (3)  88.84 ± 0.99 (3)   \n",
      "3                 CUB         0.75  80.74 ± 0.80 (3)  88.06 ± 1.64 (3)   \n",
      "4                 CUB         0.90  71.96 ± 0.82 (3)  86.31 ± 0.30 (3)   \n",
      "5              CelebA         0.90  46.48 ± 1.16 (3)  86.48 ± 2.25 (3)   \n",
      "6          MNISTCIFAR         0.00  88.22 ± 1.29 (3)  88.35 ± 1.45 (3)   \n",
      "7          MNISTCIFAR         0.25  82.11 ± 0.41 (3)  86.72 ± 2.24 (3)   \n",
      "8          MNISTCIFAR         0.50  78.08 ± 0.67 (3)  88.50 ± 0.69 (3)   \n",
      "9          MNISTCIFAR         0.75  54.86 ± 5.04 (3)  84.26 ± 1.13 (3)   \n",
      "10         MNISTCIFAR         0.90  29.91 ± 3.22 (3)  84.72 ± 1.14 (3)   \n",
      "11           MultiNLI         0.90  69.45 ± 2.18 (3)  67.42 ± 3.62 (3)   \n",
      "12      civilcomments         0.90  66.14 ± 2.42 (3)  80.00 ± 2.35 (3)   \n",
      "\n",
      "method              gdro           erm_dfr          erm_gdro  \\\n",
      "0       90.50 ± 0.41 (3)               NaN  86.71 ± 0.50 (3)   \n",
      "1       89.50 ± 3.08 (3)  87.33 ± 0.74 (3)  86.45 ± 0.78 (3)   \n",
      "2       90.13 ± 0.50 (3)  87.80 ± 0.50 (3)  84.48 ± 0.36 (3)   \n",
      "3       87.59 ± 0.39 (3)  84.58 ± 1.63 (3)  78.50 ± 1.22 (3)   \n",
      "4       85.87 ± 0.86 (3)  80.54 ± 1.74 (3)  73.68 ± 3.06 (3)   \n",
      "5       88.52 ± 0.32 (3)  85.00 ± 1.47 (3)  90.37 ± 0.32 (3)   \n",
      "6       88.09 ± 1.23 (3)               NaN  88.62 ± 0.93 (3)   \n",
      "7       86.59 ± 2.03 (3)  87.95 ± 0.70 (3)  86.72 ± 1.17 (3)   \n",
      "8       88.34 ± 0.58 (3)  87.15 ± 1.20 (3)  88.72 ± 0.80 (3)   \n",
      "9       83.52 ± 0.93 (3)  83.00 ± 3.01 (3)  84.13 ± 0.64 (3)   \n",
      "10      82.96 ± 0.73 (3)  79.83 ± 1.55 (3)  77.95 ± 2.04 (3)   \n",
      "11      76.13 ± 1.23 (3)  68.73 ± 1.29 (3)  76.83 ± 1.36 (2)   \n",
      "12      79.89 ± 3.34 (3)  77.46 ± 5.97 (3)  81.87 ± 1.02 (3)   \n",
      "\n",
      "method            erm_rw      erm_dfr_gdro        erm_dfr_rw  \n",
      "0       83.59 ± 1.04 (3)               NaN  80.17 ± 1.36 (3)  \n",
      "1       86.45 ± 0.41 (3)  90.58 ± 0.55 (2)  88.32 ± 1.77 (3)  \n",
      "2       82.71 ± 0.16 (3)  90.13 ± 0.74 (3)  85.15 ± 2.73 (3)  \n",
      "3       74.35 ± 1.06 (3)  88.32 ± 0.56 (3)  81.25 ± 2.21 (3)  \n",
      "4       65.21 ± 1.90 (3)  82.83 ± 1.98 (3)  70.63 ± 1.19 (3)  \n",
      "5       86.48 ± 0.85 (3)  89.71 ± 0.28 (3)  85.56 ± 0.56 (3)  \n",
      "6       88.89 ± 1.01 (3)               NaN               NaN  \n",
      "7       86.86 ± 1.31 (3)  88.22 ± 0.61 (3)  88.22 ± 0.46 (3)  \n",
      "8       88.85 ± 1.02 (3)  87.01 ± 2.06 (3)  87.42 ± 0.93 (3)  \n",
      "9       84.38 ± 0.21 (3)  85.54 ± 0.00 (3)  84.34 ± 0.40 (3)  \n",
      "10      81.70 ± 0.98 (3)  80.59 ± 0.61 (3)  78.71 ± 1.84 (3)  \n",
      "11      71.04 ± 0.31 (2)  70.24 ± 2.31 (3)  68.99 ± 0.99 (3)  \n",
      "12      81.51 ± 2.18 (3)  77.86 ± 5.09 (3)  76.92 ± 5.78 (3)  \n"
     ]
    }
   ],
   "source": [
    "from pathlib import Path\n",
    "import wg_results\n",
    "\n",
    "ROOTS = [Path(\".\")]\n",
    "ONLY_METHODS = [\"erm\",\"rw_erm\", \"gdro\", \"erm_dfr\",\"erm_gdro\",\"erm_rw\",\"erm_dfr_gdro\",\"erm_dfr_rw\"]  # exactos, sin colapsar\n",
    "\n",
    "DATASET_CORRS = {\n",
    "    \"MNISTCIFAR\": [0.0, 0.25, 0.5, 0.75, 0.9],\n",
    "    \"CUB\":        [0.0, 0.25, 0.5, 0.75, 0.9],\n",
    "    \"CelebA\":     [0.9],\n",
    "    \"MultiNLI\":   [0.9],\n",
    "    \"civilcomments\": [0.9],\n",
    "}\n",
    "\n",
    "raw = wg_results.crawl_experiments(\n",
    "    ROOTS, split=\"test\", selection=\"val_worst\",\n",
    "    allowed_methods=ONLY_METHODS,\n",
    "    dataset_corrs=DATASET_CORRS,\n",
    "    # corr_token_maps opcional; CUB ya tiene default:\n",
    "    # corr_token_maps={\"CUB\":{\"50\":0.0,\"625\":0.25,\"75\":0.5,\"875\":0.75,\"95\":0.9,\"100\":1.0}},\n",
    "    dataset_allowlist=[\n",
    "    \"MNISTCIFAR\",\"CUB\",\"CelebA\",\"MultiNLI\",\"civilcomments\"]  # opcional\n",
    ")\n",
    "agg = wg_results.aggregate_by_seed(raw)\n",
    "table = wg_results.pivot_table(agg, methods_order=ONLY_METHODS, value=\"worst_acc\", include_std=True, as_percent=True)\n",
    "print(table)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "bcfa3b12-e3ae-47b9-b77a-9707d7efbafd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd\n",
    "from typing import List, Optional, Dict\n",
    "\n",
    "def pivot_to_latex(\n",
    "    piv: pd.DataFrame,\n",
    "    *,\n",
    "    # Tabla / entorno\n",
    "    caption: Optional[str] = None,\n",
    "    label: Optional[str] = None,\n",
    "    table_env: str = \"table*\",\n",
    "    tabcolsep_pt: int = 1,\n",
    "    arraystretch: str = \"1.0\",\n",
    "    booktabs: bool = True,\n",
    "    # Columnas clave del pivote\n",
    "    dataset_col: str = \"dataset\",\n",
    "    corr_col: str = \"correlacion\",\n",
    "    # Selección y orden de columnas (métodos)\n",
    "    columns: Optional[List[str]] = None,            # ← elige qué métodos/columnas mostrar\n",
    "    column_labels: Optional[Dict[str, str]] = None, # ← etiquetas a mostrar por columna (opcional)\n",
    "    # Orden de filas\n",
    "    dataset_order: Optional[List[str]] = None,      # ← orden explícito de datasets\n",
    "    corr_order: Optional[List[float]] = None,       # ← orden explícito de correlaciones (e.g., [0.0,0.25,0.5,0.75,0.9])\n",
    "    # Formato de celdas\n",
    "    percent: bool = True,\n",
    "    small_std_macro: Optional[str] = r\"\\st\",        # define p.ej.: \\newcommand{\\st}[1]{\\textcolor{black}{\\scriptsize(#1)}}\n",
    "    show_n: bool = False,                           # muestra (n) en pequeño\n",
    "    use_multirow: bool = True,                      # dataset una sola vez por bloque\n",
    ") -> str:\n",
    "    \"\"\"\n",
    "    Renderiza a LaTeX el DataFrame 'piv' (salida de tu pivot_table):\n",
    "      filas = (dataset, correlación)  |  columnas = métodos\n",
    "    - Multirow por dataset (opcional).\n",
    "    - booktabs y separación con \\\\midrule entre datasets.\n",
    "    - Celdas aceptan \"mean ± std (n)\" o valores numéricos; std en pequeño con 'small_std_macro'.\n",
    "    - 'columns' permite seleccionar y ordenar métodos/columnas a mostrar; 'column_labels' renombra en header.\n",
    "    - 'dataset_order' y 'corr_order' controlan el orden de bloques/filas.\n",
    "    \"\"\"\n",
    "    assert dataset_col in piv.columns and corr_col in piv.columns, \\\n",
    "        \"piv debe tener columnas 'dataset' y 'correlacion'\"\n",
    "\n",
    "    # --- columnas de método a mostrar ---\n",
    "    all_method_cols = [c for c in piv.columns if c not in (dataset_col, corr_col)]\n",
    "    if columns is None:\n",
    "        method_cols = all_method_cols\n",
    "    else:\n",
    "        method_cols = [c for c in columns if c in all_method_cols]  # respeta orden dado\n",
    "\n",
    "    # --- orden de datasets ---\n",
    "    df = piv.copy()\n",
    "    if dataset_order is None:\n",
    "        dataset_order = list(dict.fromkeys(df[dataset_col].tolist()))\n",
    "    # Categórico para mantener el orden deseado\n",
    "    df[dataset_col] = pd.Categorical(df[dataset_col], categories=dataset_order, ordered=True)\n",
    "\n",
    "    # --- orden de correlaciones ---\n",
    "    if corr_order is not None:\n",
    "        df[corr_col] = pd.Categorical(df[corr_col], categories=corr_order, ordered=True)\n",
    "    else:\n",
    "        # intenta ordenar numéricamente si no se especifica\n",
    "        try:\n",
    "            df[corr_col] = df[corr_col].astype(float)\n",
    "        except Exception:\n",
    "            pass\n",
    "\n",
    "    df = df.sort_values([dataset_col, corr_col], kind=\"mergesort\")\n",
    "\n",
    "    # --- formateo de celdas ---\n",
    "    pm_pat  = re.compile(r\"^\\s*(-?\\d+(?:\\.\\d+)?)\\s*±\\s*(-?\\d+(?:\\.\\d+)?)\\s*(?:\\((\\d+)\\))?\\s*$\")\n",
    "    num_pat = re.compile(r\"^\\s*-?\\d+(?:\\.\\d+)?\\s*$\")\n",
    "\n",
    "    def fmt_cell(x):\n",
    "        if pd.isna(x):\n",
    "            return \"-\"\n",
    "        s = str(x).strip()\n",
    "        # ya trae % -> intenta envolver std si es '±'\n",
    "        if \"%\" in s:\n",
    "            m = pm_pat.match(s.replace(\"%\",\"\").strip())\n",
    "            if m:\n",
    "                mean, std, n = m.groups()\n",
    "                tail = (f\"{small_std_macro}({float(std):.2f})\" if small_std_macro\n",
    "                        else \"{\\\\small (\" + f\"{float(std):.2f}\" + \")}\")\n",
    "                n_part = f\" {small_std_macro}({n})\" if (show_n and n is not None) else \"\"\n",
    "                return f\"{float(mean):.2f}\\\\% {tail}{n_part}\"\n",
    "            return s\n",
    "        # \"mean ± std (n)\"\n",
    "        m = pm_pat.match(s)\n",
    "        if m:\n",
    "            mean, std, n = m.groups()\n",
    "            mean_part = f\"{float(mean):.2f}\\\\%\" if percent else f\"{float(mean):.4f}\"\n",
    "            tail = (f\"{small_std_macro}({float(std):.2f})\" if small_std_macro\n",
    "                    else \"{\\\\small (\" + f\"{float(std):.2f}\" + \")}\")\n",
    "            n_part = f\" {small_std_macro}({n})\" if (show_n and n is not None) else \"\"\n",
    "            return f\"{mean_part} {tail}{n_part}\"\n",
    "        # número simple\n",
    "        if num_pat.match(s):\n",
    "            val = float(s)\n",
    "            return f\"{val:.2f}\\\\%\" if percent else f\"{val:.4f}\"\n",
    "        return s\n",
    "\n",
    "    # --- helpers ---\n",
    "    def esc(name: str) -> str:\n",
    "        return str(name).replace('_', r'\\_')\n",
    "\n",
    "    # --- encabezados ---\n",
    "    col_align = \"l l \" + \" \".join([\"c\"] * len(method_cols))\n",
    "    lines = []\n",
    "    lines.append(f\"\\\\begin{{{table_env}}}[t]\")\n",
    "    if caption: lines.append(f\"    \\\\caption{{{caption}}}\")\n",
    "    if label:   lines.append(f\"    \\\\label{{{label}}}\")\n",
    "    lines.append(\"    \\\\centering\")\n",
    "    lines.append(\"\\\\begin{small}\")\n",
    "    lines.append(f\"\\\\setlength{{\\\\tabcolsep}}{{{tabcolsep_pt}pt}}\")\n",
    "    lines.append(f\"\\\\renewcommand{{\\\\arraystretch}}{{{arraystretch}}}\")\n",
    "    lines.append(f\"\\\\begin{{tabular}}{{{col_align}}}\")\n",
    "    if booktabs: lines.append(\"\\\\toprule\")\n",
    "\n",
    "    header = [dataset_col.capitalize(), \"Correlación\"]\n",
    "    if column_labels:\n",
    "        header += [column_labels.get(c, c) for c in method_cols]\n",
    "    else:\n",
    "        header += method_cols\n",
    "    hdr_line = \" & \".join([(\"\\\\textbf{\"+esc(h)+\"}\") for h in header]) + r\" \\\\\"\n",
    "    lines.append(hdr_line)\n",
    "    if booktabs: lines.append(\"\\\\midrule\")\n",
    "\n",
    "    # --- cuerpo con multirow correcto (sin & extra) ---\n",
    "    groups = list(df.groupby(dataset_col, sort=False))\n",
    "    for gi, (ds, block) in enumerate(groups):\n",
    "        block = block.sort_values(corr_col, kind=\"mergesort\")\n",
    "        n_rows = len(block)\n",
    "        ds_tex = esc(ds)\n",
    "        for ridx, (_, row) in enumerate(block.iterrows()):\n",
    "            # dataset cell\n",
    "            if use_multirow:\n",
    "                ds_cell = f\"\\\\multirow{{{n_rows}}}{{*}}{{{ds_tex}}}\" if ridx == 0 else \"\"\n",
    "            else:\n",
    "                ds_cell = ds_tex\n",
    "            # corr cell (muestra como venga; si quieres un formateo especial, ajusta aquí)\n",
    "            corr_str = str(row[corr_col])\n",
    "            # method cells\n",
    "            meth_cells = [fmt_cell(row.get(c, float('nan'))) for c in method_cols]\n",
    "            # ensamblar la fila (nota: si ds_cell == \"\" la fila empieza directamente con '&', lo cual es correcto)\n",
    "            cells = [ds_cell, corr_str] + meth_cells\n",
    "            line = \" & \".join(cells) + r\" \\\\\"\n",
    "            lines.append(line)\n",
    "        # separador entre datasets (no después del último)\n",
    "        if booktabs and gi < len(groups) - 1:\n",
    "            lines.append(\"\\\\midrule\")\n",
    "\n",
    "    if booktabs: lines.append(\"\\\\bottomrule\")\n",
    "    lines.append(\"\\\\end{tabular}\")\n",
    "    lines.append(\"\\\\end{small}\")\n",
    "    lines.append(f\"\\\\end{{{table_env}}}\")\n",
    "    return \"\\n\".join(lines)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7c4a3ad3-076e-42aa-a16c-5db7bf0b3a14",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{table*}[t]\n",
      "    \\caption{Worst Group Accuracy por dataset/correlación}\n",
      "    \\label{tab:wg_summary}\n",
      "    \\centering\n",
      "\\begin{small}\n",
      "\\setlength{\\tabcolsep}{1pt}\n",
      "\\renewcommand{\\arraystretch}{1.0}\n",
      "\\begin{tabular}{l l c c c c c c c c}\n",
      "\\toprule\n",
      "\\textbf{Dataset} & \\textbf{Correlación} & \\textbf{ERM} & \\textbf{RW} & \\textbf{GDRO} & \\textbf{RW-FT} & \\textbf{GDRO-FT} & \\textbf{SUBG} & \\textbf{RW+} & \\textbf{GDRO+} \\\\\n",
      "\\midrule\n",
      "\\multirow{5}{*}{MNISTCIFAR} & 0.0 & 88.22\\% \\st(1.29) & 88.35\\% \\st(1.45) & 88.09\\% \\st(1.23) & 88.89\\% \\st(1.01) & 88.62\\% \\st(0.93) & - & - & - \\\\\n",
      " & 0.25 & 82.11\\% \\st(0.41) & 86.72\\% \\st(2.24) & 86.59\\% \\st(2.03) & 86.86\\% \\st(1.31) & 86.72\\% \\st(1.17) & 87.95\\% \\st(0.70) & 88.22\\% \\st(0.46) & 88.22\\% \\st(0.61) \\\\\n",
      " & 0.5 & 78.08\\% \\st(0.67) & 88.50\\% \\st(0.69) & 88.34\\% \\st(0.58) & 88.85\\% \\st(1.02) & 88.72\\% \\st(0.80) & 87.15\\% \\st(1.20) & 87.42\\% \\st(0.93) & 87.01\\% \\st(2.06) \\\\\n",
      " & 0.75 & 54.86\\% \\st(5.04) & 84.26\\% \\st(1.13) & 83.52\\% \\st(0.93) & 84.38\\% \\st(0.21) & 84.13\\% \\st(0.64) & 83.00\\% \\st(3.01) & 84.34\\% \\st(0.40) & 85.54\\% \\st(0.00) \\\\\n",
      " & 0.9 & 29.91\\% \\st(3.22) & 84.72\\% \\st(1.14) & 82.96\\% \\st(0.73) & 81.70\\% \\st(0.98) & 77.95\\% \\st(2.04) & 79.83\\% \\st(1.55) & 78.71\\% \\st(1.84) & 80.59\\% \\st(0.61) \\\\\n",
      "\\midrule\n",
      "\\multirow{5}{*}{CUB} & 0.0 & 89.20\\% \\st(0.39) & 89.20\\% \\st(0.50) & 90.50\\% \\st(0.41) & 83.59\\% \\st(1.04) & 86.71\\% \\st(0.50) & - & 80.17\\% \\st(1.36) & - \\\\\n",
      " & 0.25 & 89.56\\% \\st(0.68) & 89.87\\% \\st(1.53) & 89.50\\% \\st(3.08) & 86.45\\% \\st(0.41) & 86.45\\% \\st(0.78) & 87.33\\% \\st(0.74) & 88.32\\% \\st(1.77) & 90.58\\% \\st(0.55) \\\\\n",
      " & 0.5 & 87.44\\% \\st(1.04) & 88.84\\% \\st(0.99) & 90.13\\% \\st(0.50) & 82.71\\% \\st(0.16) & 84.48\\% \\st(0.36) & 87.80\\% \\st(0.50) & 85.15\\% \\st(2.73) & 90.13\\% \\st(0.74) \\\\\n",
      " & 0.75 & 80.74\\% \\st(0.80) & 88.06\\% \\st(1.64) & 87.59\\% \\st(0.39) & 74.35\\% \\st(1.06) & 78.50\\% \\st(1.22) & 84.58\\% \\st(1.63) & 81.25\\% \\st(2.21) & 88.32\\% \\st(0.56) \\\\\n",
      " & 0.9 & 71.96\\% \\st(0.82) & 86.31\\% \\st(0.30) & 85.87\\% \\st(0.86) & 65.21\\% \\st(1.90) & 73.68\\% \\st(3.06) & 80.54\\% \\st(1.74) & 70.63\\% \\st(1.19) & 82.83\\% \\st(1.98) \\\\\n",
      "\\midrule\n",
      "\\multirow{1}{*}{CelebA} & 0.9 & 46.48\\% \\st(1.16) & 86.48\\% \\st(2.25) & 88.52\\% \\st(0.32) & 86.48\\% \\st(0.85) & 90.37\\% \\st(0.32) & 85.00\\% \\st(1.47) & 85.56\\% \\st(0.56) & 89.71\\% \\st(0.28) \\\\\n",
      "\\midrule\n",
      "\\multirow{1}{*}{MultiNLI} & 0.9 & 69.45\\% \\st(2.18) & 67.42\\% \\st(3.62) & 76.13\\% \\st(1.23) & 71.04\\% \\st(0.31) & 76.83\\% \\st(1.36) & 68.73\\% \\st(1.29) & 68.99\\% \\st(0.99) & 70.24\\% \\st(2.31) \\\\\n",
      "\\midrule\n",
      "\\multirow{1}{*}{civilcomments} & 0.9 & 66.14\\% \\st(2.42) & 80.00\\% \\st(2.35) & 79.89\\% \\st(3.34) & 81.51\\% \\st(2.18) & 81.87\\% \\st(1.02) & 77.46\\% \\st(5.97) & 76.92\\% \\st(5.78) & 77.86\\% \\st(5.09) \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\\end{small}\n",
      "\\end{table*}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_2560593/1970280198.py:128: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
      "  groups = list(df.groupby(dataset_col, sort=False))\n"
     ]
    }
   ],
   "source": [
    "cols = [\"erm\", \"rw_erm\", \"gdro\", \"erm_rw\", \"erm_gdro\", \"erm_dfr\", \"erm_dfr_rw\", \"erm_dfr_gdro\"]  # solo estas columnas del pivote\n",
    "labels = {\"erm\": \"ERM\",\"rw_erm\": \"RW\", \"gdro\": \"GDRO\",\"erm_dfr\": \"SUBG\", \"erm_dfr_rw\": \"RW+\", \"erm_dfr_gdro\": \"GDRO+\", \"erm_rw\":\"RW-FT\", \"erm_gdro\": \"GDRO-FT\"}  # opcional\n",
    "\n",
    "tex = pivot_to_latex(\n",
    "    table,\n",
    "    caption=\"Worst Group Accuracy por dataset/correlación\",\n",
    "    label=\"tab:wg_summary\",\n",
    "    columns=cols,\n",
    "    column_labels=labels,\n",
    "    dataset_order=[\"MNISTCIFAR\",\"CUB\",\"CelebA\",\"MultiNLI\",\"civilcomments\"],  # orden explícito\n",
    "    corr_order=[0.0,0.25,0.5,0.75,0.9],           # orden explícito\n",
    "    small_std_macro=r\"\\st\",\n",
    "    use_multirow=True\n",
    ")\n",
    "print(tex)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6ef370b5-5f3e-4b1a-9db5-0adb8fc26e88",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CUB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>method</th>\n",
       "      <th>dataset</th>\n",
       "      <th>correlacion</th>\n",
       "      <th>erm</th>\n",
       "      <th>rw</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>CUB</td>\n",
       "      <td>0.0</td>\n",
       "      <td>87.816336</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CUB</td>\n",
       "      <td>0.1</td>\n",
       "      <td>64.429654</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>CUB</td>\n",
       "      <td>0.25</td>\n",
       "      <td>71.786357</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>CUB</td>\n",
       "      <td>0.5</td>\n",
       "      <td>76.289394</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>CUB</td>\n",
       "      <td>0.75</td>\n",
       "      <td>79.361373</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>CUB</td>\n",
       "      <td>0.9</td>\n",
       "      <td>78.748969</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>CUB</td>\n",
       "      <td>1.0</td>\n",
       "      <td>84.272903</td>\n",
       "      <td>88.477395</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "method dataset correlacion        erm         rw\n",
       "0          CUB         0.0  87.816336        NaN\n",
       "1          CUB         0.1  64.429654        NaN\n",
       "2          CUB        0.25  71.786357        NaN\n",
       "3          CUB         0.5  76.289394        NaN\n",
       "4          CUB        0.75  79.361373        NaN\n",
       "5          CUB         0.9  78.748969        NaN\n",
       "6          CUB         1.0  84.272903  88.477395"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pathlib import Path\n",
    "from wg_results import crawl_experiments, aggregate_by_seed, pivot_table\n",
    "\n",
    "ROOTS = [Path(\"CUB\")]\n",
    "SPLIT = \"test\"\n",
    "\n",
    "# Solo quiero considerar estos métodos (tras alias):\n",
    "ONLY_METHODS = [\"erm\", \"rw\", \"groupdro\", \"subg\", \"gdro+\",\"rw+\"]  # <- allowlist\n",
    "\n",
    "# Alias para normalizar nombres de carpeta:\n",
    "ALIASES_EXACT = {\n",
    "    \"rw\": \"rw_erm\",\n",
    "    \"groupdro\": \"gdro\",\n",
    "    \"erm\": \"erm\",\n",
    "    \"subg\": \"erm_dfr\",\n",
    "    \"gdro+\": \"erm_dfr_gdro\",\n",
    "    \"rw+\": \"erm_dfr_rw\"\n",
    "}\n",
    "ALIASES_REGEX = [\n",
    "    (r\"^jtt_.*$\", \"jtt\"),\n",
    "    (r\"^subg_.*$\", \"subg\"),\n",
    "]\n",
    "\n",
    "# Opcional: lista “conocida” de métodos (limita la detección a estos).\n",
    "# Si la dejas None, no te restringes a un set fijo.\n",
    "KNOWN = [\"erm\", \"rw\", \"groupdro\", \"subg\", \"gdro+\",\"rw+\"]   # o None\n",
    "\n",
    "raw = crawl_experiments(\n",
    "    ROOTS,\n",
    "    split=SPLIT,\n",
    "    selection=\"val_worst\",\n",
    "    method_aliases_exact=ALIASES_EXACT,\n",
    "    method_aliases_regex=ALIASES_REGEX,\n",
    "    allowed_methods=ONLY_METHODS,      # << solo estos métodos pasan\n",
    "    known_methods=KNOWN                # << opcional; pon None para no restringir\n",
    ")\n",
    "\n",
    "agg = aggregate_by_seed(raw)\n",
    "table = pivot_table(agg, methods_order=ONLY_METHODS, value=\"worst_acc\", include_std=False, as_percent=True)\n",
    "table\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "31fee687-5758-44f5-901e-cb0843f55349",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace1/araymond/svdrop/results\n"
     ]
    }
   ],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "aa5bb834",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'dataset'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_2200524/3926510305.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# 1) Crawler: recorrer carpetas y recoger resultados por experimento\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mraw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcrawl_experiments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mROOTS\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msplit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mSPLIT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mSELECTION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallowed_methods\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mONLY_METHODS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mraw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"dataset\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"correlacion\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"method\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"seed\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"epoch\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"last\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mraw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mraw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/pyenv/versions/mini/lib/python3.10/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, by, axis, ascending, inplace, kind, na_position, ignore_index, key)\u001b[0m\n\u001b[1;32m   7175\u001b[0m                 \u001b[0;34mf\"Length of ascending ({len(ascending)})\"\u001b[0m  \u001b[0;31m# type: ignore[arg-type]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   7176\u001b[0m                 \u001b[0;34mf\" != length of by ({len(by)})\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   7177\u001b[0m             )\n\u001b[1;32m   7178\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 7179\u001b[0;31m             \u001b[0mkeys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_label_or_level_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mby\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   7180\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   7181\u001b[0m             \u001b[0;31m# need to rewrap columns in Series to apply key function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   7182\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/pyenv/versions/mini/lib/python3.10/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m-> 7179\u001b[0;31m         \u001b[0;34m...\u001b[0m     \u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margsort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex_natsorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"time\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/pyenv/versions/mini/lib/python3.10/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m   1907\u001b[0m             \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mxs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mother_axes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1908\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_level_reference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1909\u001b[0m             \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_level_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1910\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1911\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1912\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1913\u001b[0m         \u001b[0;31m# Check for duplicates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1914\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'dataset'"
     ]
    }
   ],
   "source": [
    "# 1) Crawler: recorrer carpetas y recoger resultados por experimento\n",
    "raw = crawl_experiments(ROOTS, split=SPLIT, selection=SELECTION, allowed_methods=ONLY_METHODS)\n",
    "raw.sort_values([\"dataset\",\"correlacion\",\"method\",\"seed\",\"epoch\"], inplace=True, na_position=\"last\")\n",
    "raw.reset_index(drop=True, inplace=True)\n",
    "raw.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e304a84",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2) Agregar sobre seeds: promedio y std\n",
    "agg = aggregate_by_seed(raw)\n",
    "agg.sort_values([\"dataset\",\"correlacion\",\"method\"], inplace=True, na_position=\"last\")\n",
    "agg.reset_index(drop=True, inplace=True)\n",
    "agg.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "602cfd77",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3) Tabla final: (dataset, correlación) × métodos, usando worst_acc\n",
    "table = pivot_table(agg, methods_order=METHODS_ORDER, value=\"worst_acc\",\n",
    "                    include_std=INCLUDE_STD, as_percent=AS_PERCENT)\n",
    "table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab6ae28a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4) Guardar CSV y Markdown\n",
    "paths = save_outputs(table, OUT_PREFIX)\n",
    "paths"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
