{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5bf269e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import glob\n",
    "\n",
    "def extract_best_per_series(base_path, dataset_name, series_ids, lags):\n",
    "    all_results = []\n",
    "    \n",
    "    for series_id in series_ids:\n",
    "        # Initialize best results for this series across all lags\n",
    "        best_auroc_row = None\n",
    "        best_auprc_row = None\n",
    "        best_auroc_value = -np.inf\n",
    "        best_auprc_value = -np.inf\n",
    "        \n",
    "        for lag in lags:\n",
    "            # Construct filename pattern for this lag\n",
    "            filename_pattern = f\"real_data_{dataset_name}_{series_id}_{lag}*.csv\"\n",
    "            file_path = os.path.join(base_path, filename_pattern)\n",
    "            \n",
    "            # Find matching files\n",
    "            matching_files = glob.glob(file_path)\n",
    "            \n",
    "            if not matching_files:\n",
    "                print(f\"No files found for pattern: {filename_pattern}\")\n",
    "                continue\n",
    "            \n",
    "            for file in matching_files:\n",
    "                try:\n",
    "                    # Read CSV file\n",
    "                    df = pd.read_csv(file)\n",
    "                    \n",
    "                    # Convert AUROC and AUPRC to numeric, coerce errors to NaN\n",
    "                    df['AUROC'] = pd.to_numeric(df['AUROC'], errors='coerce')\n",
    "                    df['AUPRC'] = pd.to_numeric(df['AUPRC'], errors='coerce')\n",
    "                    \n",
    "                    # Filter out rows with NaN in both AUROC and AUPRC\n",
    "                    valid_df = df[df['AUROC'].notna() | df['AUPRC'].notna()].copy()\n",
    "                    \n",
    "                    if len(valid_df) == 0:\n",
    "                        continue\n",
    "                    \n",
    "                    # Extract base filename without extension\n",
    "                    basename = os.path.splitext(os.path.basename(file))[0]\n",
    "                    \n",
    "                    # Find best AUROC for this file (across all lags)\n",
    "                    if valid_df['AUROC'].notna().any():\n",
    "                        file_best_auroc_idx = valid_df['AUROC'].idxmax()\n",
    "                        file_best_auroc = valid_df.loc[file_best_auroc_idx].copy()\n",
    "                        file_auroc_value = file_best_auroc['AUROC']\n",
    "                        \n",
    "                        if file_auroc_value > best_auroc_value:\n",
    "                            best_auroc_value = file_auroc_value\n",
    "                            best_auroc_row = file_best_auroc\n",
    "                            best_auroc_row['filename'] = basename\n",
    "                            best_auroc_row['metric'] = 'Best_AUROC'\n",
    "                            best_auroc_row['series_id'] = series_id\n",
    "                            best_auroc_row['lag'] = lag  # Store the lag that gave this result\n",
    "                    \n",
    "                    # Find best AUPRC for this file (across all lags)\n",
    "                    if valid_df['AUPRC'].notna().any():\n",
    "                        file_best_auprc_idx = valid_df['AUPRC'].idxmax()\n",
    "                        file_best_auprc = valid_df.loc[file_best_auprc_idx].copy()\n",
    "                        file_auprc_value = file_best_auprc['AUPRC']\n",
    "                        \n",
    "                        if file_auprc_value > best_auprc_value:\n",
    "                            best_auprc_value = file_auprc_value\n",
    "                            best_auprc_row = file_best_auprc\n",
    "                            best_auprc_row['filename'] = basename\n",
    "                            best_auprc_row['metric'] = 'Best_AUPRC'\n",
    "                            best_auprc_row['series_id'] = series_id\n",
    "                            best_auprc_row['lag'] = lag  # Store the lag that gave this result\n",
    "                    \n",
    "                except Exception as e:\n",
    "                    print(f\"Error processing file {file}: {e}\")\n",
    "                    continue\n",
    "        \n",
    "        # Add best results for this series to the list\n",
    "        if best_auroc_row is not None:\n",
    "            all_results.append(best_auroc_row)\n",
    "        if best_auprc_row is not None:\n",
    "            all_results.append(best_auprc_row)\n",
    "    \n",
    "    # Combine all results into a single DataFrame\n",
    "    if all_results:\n",
    "        results_df = pd.DataFrame(all_results)\n",
    "        \n",
    "        # Define the order of columns (include lag in hyperparameters)\n",
    "        metadata_cols = ['series_id', 'metric']\n",
    "        \n",
    "        hyperparam_cols = ['lag', 'lr', 'hidden_dim', 'layers', 'dropout', 'ind_lambda', \n",
    "                          'int_lambda', 'weight_decay', 'model_type', 'penalty_type', \n",
    "                          'importance_type']\n",
    "        \n",
    "        result_cols = ['AUROC', 'AUPRC', 'val_loss']\n",
    "        \n",
    "        other_cols = ['filename', 'worker', 'error']\n",
    "        \n",
    "        # Create ordered column list\n",
    "        col_order = metadata_cols + result_cols + hyperparam_cols + other_cols\n",
    "        \n",
    "        # Only include columns that exist\n",
    "        existing_cols = [col for col in col_order if col in results_df.columns]\n",
    "        results_df = results_df[existing_cols]\n",
    "        \n",
    "        # Sort the results by series and metric\n",
    "        results_df = results_df.sort_values(['series_id', 'metric'])\n",
    "        \n",
    "        return results_df\n",
    "    else:\n",
    "        return pd.DataFrame()\n",
    "\n",
    "def create_summary_table(results_df):\n",
    "    \"\"\"\n",
    "    Create a clean summary table showing best results for each series.\n",
    "    \"\"\"\n",
    "    if results_df.empty:\n",
    "        return pd.DataFrame()\n",
    "    \n",
    "    # Create a pivot table for easy comparison\n",
    "    summary_data = []\n",
    "    \n",
    "    for series_id in results_df['series_id'].unique():\n",
    "        series_data = {'Series': series_id}\n",
    "        \n",
    "        # Get best AUROC for this series\n",
    "        auroc_row = results_df[(results_df['series_id'] == series_id) & \n",
    "                              (results_df['metric'] == 'Best_AUROC')]\n",
    "        if not auroc_row.empty:\n",
    "            row = auroc_row.iloc[0]\n",
    "            series_data['Best_AUROC'] = row.get('AUROC', np.nan)\n",
    "            series_data['AUROC_Lag'] = row.get('lag', 'N/A')\n",
    "            series_data['AUROC_LR'] = row.get('lr', 'N/A')\n",
    "            series_data['AUROC_Hidden'] = row.get('hidden_dim', 'N/A')\n",
    "            series_data['AUROC_Layers'] = row.get('layers', 'N/A')\n",
    "        \n",
    "        # Get best AUPRC for this series\n",
    "        auprc_row = results_df[(results_df['series_id'] == series_id) & \n",
    "                              (results_df['metric'] == 'Best_AUPRC')]\n",
    "        if not auprc_row.empty:\n",
    "            row = auprc_row.iloc[0]\n",
    "            series_data['Best_AUPRC'] = row.get('AUPRC', np.nan)\n",
    "            series_data['AUPRC_Lag'] = row.get('lag', 'N/A')\n",
    "            series_data['AUPRC_LR'] = row.get('lr', 'N/A')\n",
    "            series_data['AUPRC_Hidden'] = row.get('hidden_dim', 'N/A')\n",
    "            series_data['AUPRC_Layers'] = row.get('layers', 'N/A')\n",
    "        \n",
    "        summary_data.append(series_data)\n",
    "    \n",
    "    summary_df = pd.DataFrame(summary_data)\n",
    "    \n",
    "    # Reorder columns for better readability\n",
    "    col_order = ['Series', 'Best_AUROC', 'AUROC_Lag', 'AUROC_LR', 'AUROC_Hidden', 'AUROC_Layers',\n",
    "                 'Best_AUPRC', 'AUPRC_Lag', 'AUPRC_LR', 'AUPRC_Hidden', 'AUPRC_Layers']\n",
    "    \n",
    "    # Only include columns that exist\n",
    "    col_order = [col for col in col_order if col in summary_df.columns]\n",
    "    \n",
    "    return summary_df[col_order]\n",
    "\n",
    "def main(dataset_name, series_ids, lags):\n",
    "    base_path = \"./real_data_Shap/\"  # Change this to your actual path\n",
    "    # Extract results (best per series across all lags)\n",
    "    best_results = extract_best_per_series(base_path, dataset_name, series_ids, lags)\n",
    "    \n",
    "    # Save to CSV\n",
    "    if not best_results.empty:\n",
    "        output_file = \"best_results_per_series.csv\"\n",
    "        best_results.to_csv(output_file, index=False)\n",
    "        print(f\"Results saved to {output_file}\")\n",
    "        \n",
    "        # Display detailed summary\n",
    "        print(\"\\n\" + \"=\"*100)\n",
    "        print(\"BEST RESULTS PER SERIES (LAG TREATED AS HYPERPARAMETER)\")\n",
    "        print(\"=\"*100)\n",
    "        \n",
    "        # Display each series result\n",
    "        # for series_id in sorted(best_results['series_id'].unique()):\n",
    "        #     print(f\"\\n{'='*80}\")\n",
    "        #     print(f\"SERIES {series_id}\")\n",
    "        #     print('='*80)\n",
    "            \n",
    "        #     series_results = best_results[best_results['series_id'] == series_id]\n",
    "            \n",
    "        #     for _, row in series_results.iterrows():\n",
    "        #         print(f\"\\n{row['metric']}:\")\n",
    "        #         print(f\"  Score: AUROC={row.get('AUROC', 'N/A'):.4f}, AUPRC={row.get('AUPRC', 'N/A'):.4f}\")\n",
    "        #         print(f\"  Best Lag: {row.get('lag', 'N/A')}\")\n",
    "        #         print(f\"  Validation Loss: {row.get('val_loss', 'N/A'):.4f}\")\n",
    "                \n",
    "        #         # Print hyperparameters\n",
    "        #         print(f\"  Hyperparameters:\")\n",
    "        #         print(f\"    Learning Rate: {row.get('lr', 'N/A')}\")\n",
    "        #         print(f\"    Hidden Dim: {row.get('hidden_dim', 'N/A')}\")\n",
    "        #         print(f\"    Layers: {row.get('layers', 'N/A')}\")\n",
    "        #         print(f\"    Dropout: {row.get('dropout', 'N/A')}\")\n",
    "        #         print(f\"    ind_lambda: {row.get('ind_lambda', 'N/A')}\")\n",
    "        #         print(f\"    int_lambda: {row.get('int_lambda', 'N/A')}\")\n",
    "        #         print(f\"    Weight Decay: {row.get('weight_decay', 'N/A')}\")\n",
    "        #         print(f\"    Model Type: {row.get('model_type', 'N/A')}\")\n",
    "        #         print(f\"    Penalty Type: {row.get('penalty_type', 'N/A')}\")\n",
    "        #         print(f\"    Importance Type: {row.get('importance_type', 'N/A')}\")\n",
    "                \n",
    "        #         print(f\"  File: {row.get('filename', 'N/A')}\")\n",
    "        \n",
    "        # Create and display compact summary table\n",
    "        print(\"\\n\" + \"=\"*100)\n",
    "        print(\"COMPACT SUMMARY TABLE (BEST PER SERIES)\")\n",
    "        print(\"=\"*100)\n",
    "        \n",
    "        summary_table = create_summary_table(best_results)\n",
    "        \n",
    "        if not summary_table.empty:\n",
    "            # Format numeric columns\n",
    "            for col in summary_table.columns:\n",
    "                if 'AUROC' in col or 'AUPRC' in col:\n",
    "                    summary_table[col] = summary_table[col].apply(\n",
    "                        lambda x: f\"{x:.4f}\" if pd.notnull(x) else \"N/A\"\n",
    "                    )\n",
    "            \n",
    "            print(summary_table.to_string(index=False))\n",
    "            \n",
    "            # Save the compact summary\n",
    "            summary_table.to_csv(\"compact_summary_per_series.csv\", index=False)\n",
    "            print(f\"\\nCompact summary saved to compact_summary_per_series.csv\")\n",
    "        \n",
    "    else:\n",
    "        print(\"No results found!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "aea8e598",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No files found for pattern: real_data_DREAM3_3_5*.csv\n",
      "No files found for pattern: real_data_DREAM3_4_5*.csv\n",
      "No files found for pattern: real_data_DREAM3_5_5*.csv\n",
      "Results saved to best_results_per_series.csv\n",
      "\n",
      "====================================================================================================\n",
      "BEST RESULTS PER SERIES (LAG TREATED AS HYPERPARAMETER)\n",
      "====================================================================================================\n",
      "\n",
      "====================================================================================================\n",
      "COMPACT SUMMARY TABLE (BEST PER SERIES)\n",
      "====================================================================================================\n",
      " Series Best_AUROC AUROC_Lag AUROC_LR AUROC_Hidden AUROC_Layers Best_AUPRC AUPRC_Lag AUPRC_LR AUPRC_Hidden AUPRC_Layers\n",
      "      1     0.6997    2.0000   0.0010     300.0000       1.0000     0.0352    2.0000   0.0005     300.0000       0.0000\n",
      "      2     0.7023    2.0000   0.0050     300.0000       1.0000     0.0458    5.0000   0.0005     400.0000       0.0000\n",
      "      3     0.6646    3.0000   0.0050     300.0000       0.0000     0.0862    3.0000   0.0010     200.0000       0.0000\n",
      "      4     0.6113    3.0000   0.0050     300.0000       3.0000     0.0901    3.0000   0.0050     200.0000       3.0000\n",
      "      5     0.5955    3.0000   0.0005     400.0000       5.0000     0.0882    3.0000   0.0010     400.0000       3.0000\n",
      "\n",
      "Compact summary saved to compact_summary_per_series.csv\n"
     ]
    }
   ],
   "source": [
    "main('DREAM3', [1, 2, 3, 4, 5], [2, 3, 5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "98dae264",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results saved to best_results_per_series.csv\n",
      "\n",
      "====================================================================================================\n",
      "BEST RESULTS PER SERIES (LAG TREATED AS HYPERPARAMETER)\n",
      "====================================================================================================\n",
      "\n",
      "====================================================================================================\n",
      "COMPACT SUMMARY TABLE (BEST PER SERIES)\n",
      "====================================================================================================\n",
      " Series Best_AUROC AUROC_Lag AUROC_LR AUROC_Hidden AUROC_Layers Best_AUPRC AUPRC_Lag AUPRC_LR AUPRC_Hidden AUPRC_Layers\n",
      "      1     0.7207    3.0000   0.0005      80.0000       4.0000     0.6843    3.0000   0.0010      60.0000       3.0000\n",
      "      2     0.9471    2.0000   0.0010     144.0000       1.0000     0.8989    2.0000   0.0010     144.0000       1.0000\n",
      "      3     0.7952    3.0000   0.0010      20.0000       2.0000     0.6222    2.0000   0.0005      20.0000       5.0000\n",
      "\n",
      "Compact summary saved to compact_summary_per_series.csv\n"
     ]
    }
   ],
   "source": [
    "main('CausalTime', [1, 2, 3], [2, 3, 5])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "MPS",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
