{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cell-0",
   "metadata": {},
   "source": [
    "# Multi-Seed Robustness Check - Bayesian LSTM\n",
    "\n",
    "This notebook runs all 6 models with 4 different seeds to check result stability.\n",
    "\n",
    "**Models:**\n",
    "- Deterministic (Baseline)\n",
    "- Full-Rank Bayesian\n",
    "- Low-Rank Bayesian\n",
    "- Low-Rank (SVD Init)\n",
    "- Rank-1 Bayesian\n",
    "- Deep Ensemble"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import gc\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-2",
   "metadata": {},
   "source": [
    "## Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Experiment configuration\n",
    "SEEDS = [42, 123, 456, 2026]  # 4 seeds total\n",
    "CHECKPOINT_DIR = \"checkpoints/multi_seed\"  \n",
    "RESULTS_DIR = \"multi_seed_results\"\n",
    "EXISTING_MODELS_DIR = \"checkpoints\"  \n",
    "# Model names for tracking\n",
    "MODEL_NAMES = [\n",
    "    \"Deterministic\",\n",
    "    \"Full-Rank Bayesian\",\n",
    "    \"Low-Rank Bayesian\",\n",
    "    \"Low-Rank (SVD Init)\",\n",
    "    \"Rank-1 Bayesian\",\n",
    "    \"Deep Ensemble\"\n",
    "]\n",
    "\n",
    "# Option to reuse existing seed=42 models\n",
    "REUSE_SEED_42 = False # Set to False to retrain everything\n",
    "\n",
    "# Create directories\n",
    "os.makedirs(CHECKPOINT_DIR, exist_ok=True)\n",
    "os.makedirs(RESULTS_DIR, exist_ok=True)\n",
    "\n",
    "print(f\"Running {len(SEEDS)} seeds: {SEEDS}\")\n",
    "print(f\"Total experiments: {len(SEEDS)} x {len(MODEL_NAMES)} = {len(SEEDS) * len(MODEL_NAMES)}\")\n",
    "print(f\"\\nCheckpoint directory: {CHECKPOINT_DIR}\")\n",
    "print(f\"Results directory: {RESULTS_DIR}\")\n",
    "print(f\"\\nReuse existing seed=42 models: {REUSE_SEED_42}\")\n",
    "if REUSE_SEED_42:\n",
    "    print(f\"  Loading from: {EXISTING_MODELS_DIR}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-4",
   "metadata": {},
   "source": [
    "## Environment Setup & Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Environment configuration (MUST run first)\n",
    "from modules.config import configure_environment, SEED\n",
    "\n",
    "configure_environment()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "from modules.training import (\n",
    "    set_seed,  clear_model_cache\n",
    ")\n",
    "from modules.evaluation import (\n",
    "    inverse_transform_predictions\n",
    ")\n",
    "from modules.data_utils import load_and_preprocess_data, load_ood_data\n",
    "from modules.config import (\n",
    "    LSTM_HIDDEN, NUM_LAYERS, BATCH_SIZE, EPOCHS, LEARNING_RATE,\n",
    "    KL_SCALE, RANK, CONFIDENCE_LEVEL, DATA_DIR,\n",
    "    ENSEMBLE_SIZE, ENSEMBLE_EPOCHS\n",
    ")\n",
    "NUM_MC_SAMPLES=250\n",
    "print(\"\\nAll modules imported successfully!\")\n",
    "print(f\"\\nConfiguration:\")\n",
    "print(f\"  LSTM_HIDDEN: {LSTM_HIDDEN}\")\n",
    "print(f\"  NUM_LAYERS: {NUM_LAYERS}\")\n",
    "print(f\"  BATCH_SIZE: {BATCH_SIZE}\")\n",
    "print(f\"  EPOCHS: {EPOCHS}\")\n",
    "print(f\"  ENSEMBLE_EPOCHS: {ENSEMBLE_EPOCHS}\")\n",
    "print(f\"  ENSEMBLE_SIZE: {ENSEMBLE_SIZE}\")\n",
    "print(f\"  NUM_MC_SAMPLES: {NUM_MC_SAMPLES}\")\n",
    "print(f\"  RANK: {RANK}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-7",
   "metadata": {},
   "source": [
    "## Memory Management Utilities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def aggressive_memory_cleanup():\n",
    "    \"\"\"\n",
    "    Aggressive memory cleanup to prevent OOM errors.\n",
    "    \"\"\"\n",
    "    # Clear Keras session\n",
    "    tf.keras.backend.clear_session()\n",
    "    \n",
    "    # Force garbage collection\n",
    "    gc.collect()\n",
    "    \n",
    "    # Additional TensorFlow cleanup\n",
    "    try:\n",
    "        tf.compat.v1.reset_default_graph()\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "    print(\"Memory cleaned\")\n",
    "\n",
    "def get_gpu_memory_info():\n",
    "    \"\"\"Get current GPU memory usage.\"\"\"\n",
    "    try:\n",
    "        gpus = tf.config.list_physical_devices('GPU')\n",
    "        if gpus:\n",
    "            memory_info = tf.config.experimental.get_memory_info('GPU:0')\n",
    "            return memory_info\n",
    "    except:\n",
    "        pass\n",
    "    return None\n",
    "\n",
    "print(\"Memory management utilities defined\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-9",
   "metadata": {},
   "source": [
    "## Checkpoint & Progress Tracking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94483b84",
   "metadata": {},
   "outputs": [],
   "source": [
    "from modules.config import ExperimentTracker\n",
    "\n",
    "tracker = ExperimentTracker(\n",
    "    checkpoint_dir=RESULTS_DIR,\n",
    "    seeds=SEEDS,\n",
    "    model_names=MODEL_NAMES\n",
    ")\n",
    "\n",
    "print(\"Experiment tracker initialized\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-11",
   "metadata": {},
   "source": [
    "## Load Data (Once)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-12",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and preprocess data using data_utils module\n",
    "(X_train, X_val, X_test,\n",
    " y_train, y_val, y_test,\n",
    " scaler_X, scaler_y, meta) = load_and_preprocess_data(data_dir=DATA_DIR)\n",
    "\n",
    "# Extract dimensions\n",
    "N_train, T, F = X_train.shape\n",
    "print(f\"\\nData dimensions: {N_train} samples, {T} timesteps, {F} features\")\n",
    "\n",
    "# Inverse transform test labels for evaluation\n",
    "y_test_original = inverse_transform_predictions(y_test, scaler_y)\n",
    "\n",
    "# Load OOD data for OOD detection evaluation\n",
    "X_ood, y_ood, y_ood_original = load_ood_data(scaler_X, scaler_y)\n",
    "print(f\"OOD data: {X_ood.shape[0]} samples\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-13",
   "metadata": {},
   "source": [
    "## Model Training & Evaluation Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-14",
   "metadata": {},
   "outputs": [],
   "source": [
    "from modules.training import load_existing_model, train_single_model\n",
    "\n",
    "# Load existing model\n",
    "model, var_layers = load_existing_model(\n",
    "    model_name=\"Low-Rank Bayesian\",\n",
    "    models_dir=EXISTING_MODELS_DIR,\n",
    "    input_size=F,\n",
    "    sequence_length=T,\n",
    "    lstm_hidden_size=LSTM_HIDDEN,\n",
    "    num_lstm_layers=NUM_LAYERS,\n",
    "    ranks=[14, 20],\n",
    "    seed=42,\n",
    "    reuse_seed_42=REUSE_SEED_42\n",
    ")\n",
    "\n",
    "# Train from scratch\n",
    "model, var_layers, training_time = train_single_model(\n",
    "    model_name=\"Low-Rank Bayesian\",\n",
    "    X_train=X_train, y_train=y_train,\n",
    "    X_val=X_val, y_val=y_val,\n",
    "    input_size=F,\n",
    "    sequence_length=T,\n",
    "    lstm_hidden_size=LSTM_HIDDEN,\n",
    "    num_lstm_layers=NUM_LAYERS,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    epochs=EPOCHS,\n",
    "    learning_rate=LEARNING_RATE,\n",
    "    kl_scale=KL_SCALE,\n",
    "    seed=SEED\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-15",
   "metadata": {},
   "outputs": [],
   "source": [
    "from modules.evaluation import evaluate_model\n",
    "\n",
    "metrics = evaluate_model(\n",
    "    model=model,\n",
    "    var_layers=var_layers,\n",
    "    model_name=\"Low-Rank Bayesian\",\n",
    "    X_test=X_test,\n",
    "    y_test_original=y_test_original,\n",
    "    X_val=X_val,\n",
    "    y_val=y_val,\n",
    "    X_ood=X_ood,\n",
    "    scaler_y=scaler_y,\n",
    "    confidence_level=CONFIDENCE_LEVEL,\n",
    "    num_mc_samples=NUM_MC_SAMPLES,\n",
    "    seed=SEED,\n",
    "    set_seed_fn=set_seed,\n",
    "    clear_cache_fn=clear_model_cache\n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-16",
   "metadata": {},
   "source": [
    "## Main Experiment Loop\n",
    "\n",
    "Run all experiments with checkpoint/resume capability."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-17",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print initial status\n",
    "summary = tracker.get_summary()\n",
    "print(f\"\\n{'='*80}\")\n",
    "print(f\"Multi-Seed Robustness Check - Bayesian LSTM\")\n",
    "print(f\"{'='*80}\")\n",
    "print(f\"Seeds: {SEEDS}\")\n",
    "print(f\"Models: {len(MODEL_NAMES)}\")\n",
    "print(f\"Total experiments: {summary['total']}\")\n",
    "print(f\"Already completed: {summary['completed']}\")\n",
    "print(f\"Remaining: {summary['remaining']}\")\n",
    "if summary['reused'] > 0:\n",
    "    print(f\"Reused from seed=42: {summary['reused']}\")\n",
    "print(f\"{'='*80}\\n\")\n",
    "\n",
    "experiment_num = summary['completed']\n",
    "total_experiments = summary['total']\n",
    "\n",
    "# Run experiments\n",
    "for seed in SEEDS:\n",
    "    print(f\"\\n{'#'*80}\")\n",
    "    print(f\"# SEED: {seed}\")\n",
    "    print(f\"{'#'*80}\\n\")\n",
    "    \n",
    "    # Set seed at beginning of each seed iteration\n",
    "    set_seed(seed)\n",
    "    \n",
    "    for model_name in MODEL_NAMES:\n",
    "        # Skip if already completed\n",
    "        if tracker.is_completed(seed, model_name):\n",
    "            print(f\"Skipping {model_name} (seed={seed}) - already completed\")\n",
    "            continue\n",
    "        \n",
    "        experiment_num += 1\n",
    "        print(f\"\\n[{experiment_num}/{total_experiments}] Starting: {model_name} (seed={seed})\")\n",
    "        \n",
    "        try:\n",
    "            metrics, training_time, reused = run_single_experiment(\n",
    "                seed=seed,\n",
    "                model_name=model_name\n",
    "            )\n",
    "            tracker.save_result(seed, model_name, metrics, training_time, reused=reused)\n",
    "            \n",
    "            # Print updated progress\n",
    "            summary = tracker.get_summary()\n",
    "            print(f\"\\nProgress: {summary['completed']}/{summary['total']} ({summary['progress_pct']:.1f}%)\")\n",
    "            print(f\"   Remaining: {summary['remaining']} experiments\")\n",
    "            \n",
    "        except Exception as e:\n",
    "            print(f\"\\nERROR in {model_name} (seed={seed}):\")\n",
    "            print(f\"   {e}\")\n",
    "            print(f\"\\nProgress saved. You can resume later by running this cell again.\")\n",
    "            import traceback\n",
    "            traceback.print_exc()\n",
    "            raise\n",
    "\n",
    "print(f\"\\n{'='*80}\")\n",
    "print(\"ALL EXPERIMENTS COMPLETED!\")\n",
    "print(f\"{'='*80}\")\n",
    "summary = tracker.get_summary()\n",
    "print(f\"Total experiments: {summary['total']}\")\n",
    "print(f\"Reused seed=42: {summary['reused']}\")\n",
    "print(f\"Newly trained: {summary['total'] - summary['reused']}\")\n",
    "print(f\"\\nResults saved in: {CHECKPOINT_DIR}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-18",
   "metadata": {},
   "source": [
    "## Results Aggregation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-19",
   "metadata": {},
   "outputs": [],
   "source": [
    "from modules.evaluation import load_aggregated_results\n",
    "from modules.visualization import plot_multi_seed_boxplots\n",
    "\n",
    "# Load from JSON\n",
    "aggregated_results = load_aggregated_results('multi_seed_results/aggregated_results.json')\n",
    "\n",
    "# Plot\n",
    "plot_multi_seed_boxplots(\n",
    "    aggregated_results,\n",
    "    model_names=MODEL_NAMES,\n",
    "    seeds=SEEDS,\n",
    "    save_path='figures/multi_seed_boxplots.png'\n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-20",
   "metadata": {},
   "source": [
    "## Summary Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-21",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create summary DataFrame\n",
    "summary_data = []\n",
    "\n",
    "for model_name in MODEL_NAMES:\n",
    "    if model_name not in aggregated_results:\n",
    "        continue\n",
    "    \n",
    "    row = {'Model': model_name}\n",
    "    for metric_name, stats in aggregated_results[model_name].items():\n",
    "        row[f\"{metric_name}_mean\"] = stats['mean']\n",
    "        row[f\"{metric_name}_std\"] = stats['std']\n",
    "    summary_data.append(row)\n",
    "\n",
    "summary_df = pd.DataFrame(summary_data)\n",
    "\n",
    "# Save to CSV\n",
    "summary_csv = Path(RESULTS_DIR) / f\"summary_{len(SEEDS)}_seeds.csv\"\n",
    "summary_df.to_csv(summary_csv, index=False)\n",
    "print(f\"\\nSummary saved to: {summary_csv}\")\n",
    "\n",
    "# Display key metrics\n",
    "print(\"\\n\" + \"=\"*80)\n",
    "print(f\"Summary: Mean +/- Std ({len(SEEDS)} seeds)\")\n",
    "print(\"=\"*80 + \"\\n\")\n",
    "\n",
    "# Key metrics\n",
    "key_metrics = ['MAE', 'RMSE', 'R2', 'NLL', 'ECE', 'PICP', 'AUROC_OOD']\n",
    "display_cols = ['Model']\n",
    "\n",
    "for m in key_metrics:\n",
    "    mean_col = f\"{m}_mean\"\n",
    "    std_col = f\"{m}_std\"\n",
    "    if mean_col in summary_df.columns:\n",
    "        display_cols.append(mean_col)\n",
    "    if std_col in summary_df.columns:\n",
    "        display_cols.append(std_col)\n",
    "\n",
    "display_cols = [c for c in display_cols if c in summary_df.columns]\n",
    "\n",
    "if len(display_cols) > 1:\n",
    "    print(summary_df[display_cols].to_string(index=False))\n",
    "else:\n",
    "    print(summary_df.to_string(index=False))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cell-30",
   "metadata": {},
   "source": [
    "## Final Summary\n",
    "**Results saved in:**\n",
    "- Checkpoints: `checkpoints/multi_seed/`\n",
    "- Results: `multi_seed_results/`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-31",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"=\"*70)\n",
    "print(\"EXPERIMENT COMPLETE - FINAL SUMMARY\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "print(\"\\nSaved Files:\")\n",
    "print(f\"  - {CHECKPOINT_DIR}/progress.json (experiment progress)\")\n",
    "print(f\"  - {CHECKPOINT_DIR}/results.pkl (raw results)\")\n",
    "print(f\"  - {RESULTS_DIR}/aggregated_results.json\")\n",
    "print(f\"  - {RESULTS_DIR}/summary_{len(SEEDS)}_seeds.csv\")\n",
    "print(f\"  - {RESULTS_DIR}/stability_analysis.csv\")\n",
    "print(f\"  - {RESULTS_DIR}/latex_table.txt\")\n",
    "print(f\"  - {RESULTS_DIR}/results_across_{len(SEEDS)}_seeds.png\")\n",
    "\n",
    "print(\"\\n\" + \"=\"*70)\n",
    "print(\"Multi-seed robustness check execution complete!\")\n",
    "print(\"=\"*70)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "bnn-gpu115",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
