{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training L2 and L3 models\n",
    "This notebook shows how to train the L2 and L3 models on the OOF predictions stored in `artifacts/`. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from autogluon.common.loaders import load_pkl\n",
    "\n",
    "import atse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Available datasets: ['borealis', 'monash_fred_md', 'monash_nn5_weekly']\n"
     ]
    }
   ],
   "source": [
    "artifacts_dir = Path(\"../artifacts/\")\n",
    "available_datasets = sorted([f.stem for f in sorted(artifacts_dir.glob(\"*.pkl\"))])\n",
    "print(f\"Available datasets: {available_datasets}\")\n",
    "dataset_name = available_datasets[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_artifact(dataset_name: str, artifacts_dir: Path = artifacts_dir) -> dict:\n",
    "    available_artifacts = [f.stem for f in sorted(artifacts_dir.glob(\"*.pkl\"))]\n",
    "    if dataset_name not in available_artifacts:\n",
    "        raise ValueError(\n",
    "            f\"Cached predictions unavailable for {dataset_name}. Available datasets: {available_artifacts}\"\n",
    "        )\n",
    "    return load_pkl.load(str(artifacts_dir / f\"{dataset_name}.pkl\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = atse.data.process_simulation_artifact(load_artifact(dataset_name))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some statistics about the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset: monash_nn5_weekly\n",
      "\t Number of folds: 5\n",
      "\t Number of items: 111\n",
      "\t Prediction length: 8\n",
      "\t Shortest time series: 113\n",
      "\t Longest time series: 113\n",
      "\t Shortest input time series: 65\n",
      "\t Number of models: 3\n"
     ]
    }
   ],
   "source": [
    "print(f\"Dataset: {dataset_name}\")\n",
    "n_folds = len(data[\"y_val\"])\n",
    "print(f\"\\t Number of folds: {n_folds}\")\n",
    "n_items = len(data[\"y_val\"][0].index.get_level_values(0).unique())\n",
    "print(f\"\\t Number of items: {n_items}\")\n",
    "n_times = len(data[\"y_val\"][0].index.get_level_values(1).unique())\n",
    "n_pred = data[\"prediction_length\"]\n",
    "print(f\"\\t Prediction length: {n_pred}\")\n",
    "y = data[\"y_test\"]\n",
    "ts_lengths = [len(y.loc[i]) for i in y.index.get_level_values(0).unique()]\n",
    "shortest_ts = min(ts_lengths)\n",
    "print(f\"\\t Shortest time series: {shortest_ts}\")\n",
    "longest_ts = max(ts_lengths)\n",
    "print(f\"\\t Longest time series: {longest_ts}\")\n",
    "shortest_input = shortest_ts - n_pred - n_folds * n_pred\n",
    "print(f\"\\t Shortest input time series: {shortest_input}\")\n",
    "n_models = len(data[\"models\"])\n",
    "print(f\"\\t Number of models: {n_models}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>val_score</th>\n",
       "      <th>test_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Chronos[bolt_base]</td>\n",
       "      <td>-0.833186</td>\n",
       "      <td>-0.714044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>DynamicOptimizedTheta</td>\n",
       "      <td>-0.899415</td>\n",
       "      <td>-0.784589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SeasonalNaive</td>\n",
       "      <td>-1.202952</td>\n",
       "      <td>-1.020590</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   model  val_score  test_score\n",
       "2     Chronos[bolt_base]  -0.833186   -0.714044\n",
       "1  DynamicOptimizedTheta  -0.899415   -0.784589\n",
       "0          SeasonalNaive  -1.202952   -1.020590"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def score(y_val, y_test, y_val_preds, y_test_preds, data=data):\n",
    "    Metric = atse.get_ag_metric(data[\"eval_metric\"])\n",
    "    if np.isnan(y_test_preds).any().any():\n",
    "        return np.nan, np.nan\n",
    "    test_loss = atse.compute_loss(Metric, y_test, y_test_preds, data)\n",
    "    val_loss = atse.compute_loss(Metric, y_val, y_val_preds, data)\n",
    "    return {\"val_score\": -val_loss, \"test_score\": -test_loss}\n",
    "\n",
    "\n",
    "models = data[\"models\"]\n",
    "score_val, score_test = [], []\n",
    "for model in models:\n",
    "    scores = score(data[\"y_val\"], data[\"y_test\"], data[\"y_val_preds\"][model], data[\"y_test_preds\"][model])\n",
    "    score_val.append(scores[\"val_score\"])\n",
    "    score_test.append(scores[\"test_score\"])\n",
    "leaderboard_base = pd.DataFrame({\"model\": models, \"val_score\": score_val, \"test_score\": score_test})\n",
    "leaderboard_base.sort_values(by=\"val_score\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from atse import SimpleAverage, BestValidationModel, GreedyEnsemble, LinearEnsemble, PerformanceWeightedAverage\n",
    "from atse import AGTabularStackerModel\n",
    "from atse import StackedEnsemble\n",
    "\n",
    "metric = atse.metrics.get_metric(data[\"eval_metric\"])  # this is needed in the construction of each ensmemble model\n",
    "\n",
    "\n",
    "def train_and_eval(ensemble_model):\n",
    "    ensemble_model.fit_ensemble(\n",
    "        model_predictions_per_window=data[\"y_val_preds\"],\n",
    "        labels_per_window=data[\"y_val\"],\n",
    "    )\n",
    "    y_test_pred = ensemble_model.predict(data[\"y_test_preds\"], data=data[\"y_test_in\"])\n",
    "    y_val_pred = [\n",
    "        ensemble_model.predict(y_val_preds, data=y_val_in)\n",
    "        for y_val_preds, y_val_in in zip(atse.val_preds_to_folds(data[\"y_val_preds\"]), data[\"y_val_in\"])\n",
    "    ]\n",
    "    return score(data[\"y_val\"], data[\"y_test\"], y_val_pred, y_test_pred, data=data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Simple stacker models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.9269438295548514, 'test_score': -0.7832360988962868}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = SimpleAverage(metric=metric, target=data[\"target\"], kind=\"mean\")\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8331862088519086, 'test_score': -0.7140442567955622}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = BestValidationModel(metric=metric, target=data[\"target\"])\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/100 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 100/100 [00:00<00:00, 273.19it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8331862088519086, 'test_score': -0.7140442567955622}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = GreedyEnsemble(metric=metric, target=data[\"target\"], ensemble_size=100, tqdm=True)\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8757297898786669, 'test_score': -0.7442774097527168}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = PerformanceWeightedAverage(\n",
    "    metric=metric,\n",
    "    target=data[\"target\"],\n",
    "    kind=\"exp\",\n",
    "    normalize_losses=True,  # normalize losses to sum up to 1 before averaging?\n",
    ")\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear models\n",
    "The main ingredients are: \n",
    "- `weights_per` to specify the dimensions along which we want separate weights, e.g. `\"m\"`, `\"miq\"`, or `\"mtqq\"` (where `qq` denotes weights across-quantiles)\n",
    "- `weight_transform` denotes the activation function, which ensures that some desired property of the weights hold, one of `\"softmax\"` (default), `\"square\"` (positive weights), or `None`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "loss: 0.8332 | max_grad: 9.99e-06 | rel loss diff: 1.19e-08 | lr: 1.00e-01:   2%|▏         | 1804/100000 [00:10<09:31, 171.75it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8331961298002766, 'test_score': -0.7140442832007112}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LinearEnsemble(\n",
    "    metric=metric,\n",
    "    target=data[\"target\"],\n",
    "    weights_per=\"m\",\n",
    "    tqdm=True,\n",
    ")\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "loss: 0.9269 | max_grad: 2.40e-01 | rel loss diff: inf | lr: 1.00e-01:   0%|          | 0/100000 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "loss: 0.8283 | max_grad: 3.02e-03 | rel loss diff: 7.35e-09 | lr: 1.25e-02:   2%|▏         | 1926/100000 [00:11<09:25, 173.31it/s] \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8282885189028608, 'test_score': -0.7002264224272012}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LinearEnsemble(\n",
    "    metric=metric,\n",
    "    target=data[\"target\"],\n",
    "    tqdm=True,\n",
    "    weights_per=\"mq\",\n",
    "    weight_transform=\"square\",\n",
    ")\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "loss: 5.6480 | max_grad: 6.56e-01 | rel loss diff: 9.38e-02 | lr: 1.00e-01:   0%|          | 0/100000 [00:00<?, ?it/s] "
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "loss: 0.8183 | max_grad: 9.68e-04 | rel loss diff: 8.17e-09 | lr: 1.56e-03:   3%|▎         | 3219/100000 [00:18<09:03, 178.08it/s] \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8182812641504642, 'test_score': -0.6980669206339173}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LinearEnsemble(\n",
    "    metric=metric,\n",
    "    target=data[\"target\"],\n",
    "    tqdm=True,\n",
    "    weights_per=\"mqq\",\n",
    "    weight_transform=\"square\",\n",
    ")\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Nonlinear models\n",
    "The main ingredients are: \n",
    "- `stacker_model_str` specifies the nonlinear model, which will be retreived from `autogluon.tabular.trainer.model_presets.presets.MODEL_TYPES`. Examples include `\"GBM\"` or `\"REALMLP\"`\n",
    "- `early_stopping` specifies whether to use the last validation window for early stopping\n",
    "- `scaler`: Whether to scale the data. `standard` for standard scaling or `None` for no scaling.\n",
    "- `hyperparameters` can be used to specify a dict of hyperparameters to be passed to the stacker model, e.g. `hyperparameters={\"num_iterations\": 1000}` to specify a maximum number of steps to GBM."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.4000331668185499, 'test_score': -0.7292740661576943}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = AGTabularStackerModel(\n",
    "    metric=metric,\n",
    "    target=data[\"target\"],\n",
    "    stacker_model_str=\"GBM\",\n",
    "    early_stopping=False,\n",
    "    scaler=\"standard\",\n",
    "    hyperparameters={\"num_boost_round\": 1000},\n",
    ")\n",
    "train_and_eval(model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Multi-layer stacking\n",
    "The main ingredients are:\n",
    "- `stacker_model` specifies the L3 model as a tuple \n",
    "- `base_models` specifies the L2 models as a list of tuples `[(model_name1, model_kwargs1), (model_name2, model_kwargs2), ...]`\n",
    "- `base_model_kwargs` can be used for convenience to pass a set of shared kwargs to the base models\n",
    "- `retrain` specifies whether to retrain the base models or not after fitting the L3 model (true by default)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train 3 base models\n",
      "[1/3] (0, 'SimpleAverage(median)')\n",
      "[2/3] (1, 'GreedyEnsemble(100)')\n",
      "[3/3] (2, \"AGTabularStackerModel(GBM, standard, {'num_boost_round': 100})\")\n",
      "Train stacker model (GreedyEnsemble(100))\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 100/100 [00:00<00:00, 818.80it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Retrain 2 base models\n",
      "[1/2]: (1, 'GreedyEnsemble(100)')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2/2]: (2, \"AGTabularStackerModel(GBM, standard, {'num_boost_round': 100})\")\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'val_score': -0.8091129788421817, 'test_score': -0.7096627583435238}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = StackedEnsemble(\n",
    "    metric=metric,\n",
    "    target=data[\"target\"],\n",
    "    stacker_model=(\"GreedyEnsemble\", {\"tqdm\": True}),\n",
    "    base_models=[\n",
    "        [\"SimpleAverage\", {\"kind\": \"median\"}],\n",
    "        [\"GreedyEnsemble\", {\"ensemble_size\": 100}],\n",
    "        [\"AGTabularStackerModel\", {\"stacker_model_str\": \"GBM\", \"scaler\": \"standard\", \"hyperparameters\": {\"num_boost_round\": 100}}],\n",
    "    ],\n",
    "    retrain=True,\n",
    "    verbose=True,  # set to False to turn of some of the print statements\n",
    ")\n",
    "train_and_eval(model)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "atse",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
