{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0f32866",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "import os, sys\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "import datetime\n",
    "import torch\n",
    "import matplotlib.colors as mcolors\n",
    "from tqdm import tqdm\n",
    "\n",
    "# Add parent directory to system path\n",
    "current_dir = os.getcwd()\n",
    "parent_dir = os.path.dirname(current_dir)\n",
    "sys.path.append(os.path.join(parent_dir, 'core'))\n",
    "\n",
    "# Import from core and utils directory \n",
    "from algorithms import OnlineMA, OnlineMARegret, OnlineMC\n",
    "from load_utils import cdf_from_quantiles\n",
    "from utils import rolling_mean, rolling_vec_norm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d48c04a",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": [
    "raw_data_folder = './raw_data'\n",
    "quantiles_folder = './woAR'\n",
    "\n",
    "# Map quantiles_i.csv to Task j\n",
    "MAP = {i: i+4 for i in range(1, 12)}\n",
    "\n",
    "def load_raw_task(task_num):\n",
    "    if task_num == 15:\n",
    "        # special case for solution to task 15 files \n",
    "        solution_load_path = os.path.join(raw_data_folder, 'Solution to Task 15', 'solution15_L.csv')\n",
    "        solution_temp_path = os.path.join(raw_data_folder, 'Solution to Task 15', 'solution15_L_temperature.csv')\n",
    "        data_load = pd.read_csv(solution_load_path)\n",
    "        data_temp = pd.read_csv(solution_temp_path)\n",
    "        data_temp['TEMP_AVG'] = data_temp[['w3','w9']].mean(axis=1)\n",
    "        out = pd.DataFrame({\n",
    "            'LOAD': data_temp['LOAD'],\n",
    "            'TEMP_AVG': data_temp['TEMP_AVG']\n",
    "        })\n",
    "        return out.reset_index(drop=True)\n",
    "    # Task 4-14\n",
    "    task_solution_path = os.path.join(raw_data_folder, f'Task {task_num}', f'L{task_num}-train.csv')\n",
    "    df = pd.read_csv(task_solution_path)\n",
    "    df = df[df['LOAD'].notna()].copy()\n",
    "    df['TEMP_AVG'] = df[['w3','w9']].mean(axis=1)\n",
    "    return df[['LOAD','TEMP_AVG']].reset_index(drop=True)\n",
    "\n",
    "# Align quantiles with raw task data\n",
    "aligned = []\n",
    "for i in range(1, 13):\n",
    "    quantiles_path = os.path.join(quantiles_folder, f'quantiles_{i}.csv')\n",
    "    quantiles_df = pd.read_csv(quantiles_path)\n",
    "    quantiles = quantiles_df.values  # (T, 99)\n",
    "    if i == 12:\n",
    "        raw_df = load_raw_task(15)\n",
    "    else:\n",
    "        raw_df = load_raw_task(MAP[i])\n",
    "    T = min(len(raw_df), len(quantiles))\n",
    "    raw_df = raw_df.iloc[:T].reset_index(drop=True)\n",
    "    quantiles = quantiles[:T, :]\n",
    "   \n",
    "    # Build baseline p_tilde = P(LOAD >= c)\n",
    "    c = 150.0\n",
    "    p_tilde = 1 - np.array([cdf_from_quantiles(q_row)(c -1e-9) for q_row in quantiles], dtype=float)\n",
    "\n",
    "    y_t = (raw_df['LOAD'].values >= c).astype(float)\n",
    "\n",
    "    # Temperature bin groups [-inf,20],(20,40],...,[80,inf)\n",
    "    bins = [-np.inf, 20, 40, 60, 80, np.inf]\n",
    "    g_idx = pd.cut(raw_df['TEMP_AVG'], bins=bins, right=True, include_lowest=True, labels=False).astype(int)\n",
    "    num_groups = len(bins) - 1\n",
    "    g_mat = np.eye(num_groups, dtype=float)[g_idx]\n",
    "  \n",
    "    aligned.append({\n",
    "        'file_idx': i,\n",
    "        'num_groups': num_groups,\n",
    "        'y': y_t,\n",
    "        'g_mat': g_mat,\n",
    "        'p_tilde': p_tilde,\n",
    "    })\n",
    "\n",
    "y_all = np.concatenate([hourly['y'] for hourly in aligned], axis=0)\n",
    "p_all = np.concatenate([hourly['p_tilde'] for hourly in aligned], axis=0)\n",
    "g_all = np.vstack([hourly['g_mat'] for hourly in aligned])\n",
    "\n",
    "print({'T_total': len(y_all), 'groups': g_all.shape[1],})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0bfedf17",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": [
    "window = 336\n",
    "window_plot = 336\n",
    "eta = 0.5\n",
    "loss = \"squared\"\n",
    "\n",
    "# Build arrays for 0..T-1\n",
    "T = len(y_all)\n",
    "X = np.ones((T, 1), dtype=float)\n",
    "G = g_all.astype(float)\n",
    "num_groups = G.shape[1]\n",
    "\n",
    "# Online learners \n",
    "maonly = OnlineMA(d=X.shape[1], m=num_groups, eta=eta, window_size=window, gamma_pred=0.0, loss=loss)\n",
    "maonly_nonadaptive = OnlineMA(d=X.shape[1], m=num_groups, eta=eta, window_size=window, gamma_pred=0.0, loss=loss, num_time_steps=T, adaptive=False)\n",
    "mareg = OnlineMARegret(d=X.shape[1], m=num_groups, eta=eta, window_size=window, gamma_pred=0.0, loss=loss)\n",
    "mareg_nonadaptive = OnlineMARegret(d=X.shape[1], m=num_groups, eta=eta, window_size=window, gamma_pred=0.0, loss=loss, num_time_steps=T, adaptive=False)\n",
    "\n",
    "# Track metrics\n",
    "ptilde_baseline_ma_losses = [] \n",
    "maonly_baseline_ma_losses, maonly_baseline_lreg = [], []\n",
    "maonly_nonadaptive_ma_losses, maonly_nonadaptive_lreg = [], []\n",
    "mareg_ma_losses, mareg_l_reg = [], []\n",
    "mareg_nonadaptive_ma_losses, mareg_nonadaptive_l_reg = [], []\n",
    "\n",
    "for t in tqdm(range(T)):\n",
    "    x_t = X[t]                  # (1,)\n",
    "    y_t_scalar = float(y_all[t])\n",
    "    g_t = G[t]                  # (m,)\n",
    "    pt_t = float(p_all[t])\n",
    "\n",
    "    # Baseline from p̃: MA vector at step t\n",
    "    vec_ptilde_t = g_t * (y_t_scalar - pt_t)   # (m,)\n",
    "    ptilde_baseline_ma_losses.append(np.concatenate([vec_ptilde_t, -vec_ptilde_t]))\n",
    "\n",
    "    # Baseline: MA-only baseline (no regret term)\n",
    "    metrics = maonly.update(x_t, y_t_scalar, g_t, p_tilde=pt_t)\n",
    "    maonly_baseline_ma_losses.append(metrics[\"ma_losses\"])  \n",
    "    maonly_baseline_lreg.append(metrics[\"l_reg\"])           \n",
    "\n",
    "    # Baseline: Non-adaptive MA-only\n",
    "    metrics = maonly_nonadaptive.update(x_t, y_t_scalar, g_t, p_tilde=pt_t)\n",
    "    maonly_nonadaptive_ma_losses.append(metrics[\"ma_losses\"])  \n",
    "    maonly_nonadaptive_lreg.append(metrics[\"l_reg\"])           \n",
    "\n",
    "    # Baseline: Non-adaptive MA+Reg\n",
    "    metrics = mareg_nonadaptive.update(x_t, y_t_scalar, g_t, p_tilde=pt_t)\n",
    "    mareg_nonadaptive_ma_losses.append(metrics[\"ma_losses\"])   \n",
    "    mareg_nonadaptive_l_reg.append(metrics[\"l_reg\"])           \n",
    "\n",
    "    # Model update: MA+Reg with adaptive \\eta\n",
    "    metrics = mareg.update(x_t, y_t_scalar, g_t, p_tilde=pt_t)\n",
    "    mareg_ma_losses.append(metrics[\"ma_losses\"])               \n",
    "    mareg_l_reg.append(metrics[\"l_reg\"])                       \n",
    "\n",
    "ptilde_baseline_ma_l2 = rolling_vec_norm(ptilde_baseline_ma_losses, window_plot, norm=\"l2\")\n",
    "ptilde_baseline_ma_linf = rolling_vec_norm(ptilde_baseline_ma_losses, window_plot, norm=\"l_infty\")\n",
    "maonly_baseline_ma_l2 = rolling_vec_norm(maonly_baseline_ma_losses, window_plot, norm=\"l2\")\n",
    "maonly_baseline_ma_linf = rolling_vec_norm(maonly_baseline_ma_losses, window_plot, norm=\"l_infty\")\n",
    "maonly_baseline_lreg = rolling_mean(maonly_baseline_lreg, window_plot)\n",
    "maonly_nonadaptive_ma_l2 = rolling_vec_norm(maonly_nonadaptive_ma_losses, window_plot, norm=\"l2\")\n",
    "maonly_nonadaptive_ma_linf = rolling_vec_norm(maonly_nonadaptive_ma_losses, window_plot, norm=\"l_infty\")\n",
    "maonly_nonadaptive_lreg = rolling_mean(maonly_nonadaptive_lreg, window_plot)\n",
    "mareg_nonadaptive_ma_l2 = rolling_vec_norm(mareg_nonadaptive_ma_losses, window_plot, norm=\"l2\")\n",
    "mareg_nonadaptive_ma_linf = rolling_vec_norm(mareg_nonadaptive_ma_losses, window_plot, norm=\"l_infty\")\n",
    "mareg_nonadaptive_l_reg = rolling_mean(mareg_nonadaptive_l_reg, window_plot)\n",
    "mareg_ma_l2 = rolling_vec_norm(mareg_ma_losses, window_plot, norm=\"l2\")\n",
    "mareg_ma_linf = rolling_vec_norm(mareg_ma_losses, window_plot, norm=\"l_infty\")\n",
    "mareg_lreg = rolling_mean(mareg_l_reg, window_plot)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6530be50",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": [
    "# Plot L_infty\n",
    "plt.figure(figsize=(12, 5))\n",
    "plt.plot(ptilde_baseline_ma_linf, label='p~ baseline')\n",
    "plt.plot(maonly_baseline_ma_linf, label='MA-only')\n",
    "plt.plot(maonly_nonadaptive_ma_linf, label='MA-only (non-adaptive)')\n",
    "plt.plot(mareg_nonadaptive_ma_linf, label='MA+Reg (non-adaptive)')\n",
    "plt.plot(mareg_ma_linf, label='MA+Reg')\n",
    "plt.xlabel(\"t\")\n",
    "plt.ylabel(r\"$L_\\infty$ MA error\")\n",
    "plt.title(f\"GEFCom-Load: Rolling ||MA||_inf over time (w_alg={window}, w_plot={window_plot})\")\n",
    "plt.legend(loc='upper right')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Plot L2\n",
    "plt.figure(figsize=(12, 5))\n",
    "plt.plot(ptilde_baseline_ma_l2, label='p~ baseline')\n",
    "plt.plot(maonly_baseline_ma_l2, label='MA-only adaptive')\n",
    "plt.plot(maonly_nonadaptive_ma_l2, label='MA-only (non-adaptive)')\n",
    "plt.plot(mareg_nonadaptive_ma_l2, label='MA+Reg (non-adaptive)')\n",
    "plt.plot(mareg_ma_l2, label='MA+Reg')\n",
    "plt.xlabel(\"t\")\n",
    "plt.ylabel(r\"$L_2$ MA error\")\n",
    "plt.title(f\"GEFCom-Load: Rolling ||MA||_2 over time (w_alg={window}, w_plot={window_plot})\")\n",
    "plt.legend(loc='upper right')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Plot L_reg \n",
    "plt.figure(figsize=(12, 5))\n",
    "plt.plot(maonly_baseline_lreg, label='MA-only')\n",
    "plt.plot(maonly_nonadaptive_lreg, label='MA-only (non-adaptive)')\n",
    "plt.plot(mareg_nonadaptive_l_reg, label='MA+Reg (non-adaptive)')\n",
    "plt.plot(mareg_lreg, label='MA+Reg')\n",
    "plt.xlabel(\"t\")\n",
    "plt.ylabel(\"regret\")\n",
    "plt.title(f\"GEFCom-Load: Regret over time (w_alg={window}, w_plot={window_plot})\")\n",
    "plt.legend(loc='upper right')\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
