{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7f0bb6b5-548a-4644-b837-7a7831d91a9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "\n",
    "from transformers import AutoModelForCausalLM\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "\n",
    "\n",
    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "MAX_CONTEXT = 2880\n",
    "NUM_SAMPLES = 20\n",
    "\n",
    "\n",
    "def load_and_norm(path):\n",
    "    df = pd.read_csv(path, index_col=\"date\")\n",
    "    value_col = df.columns[0]\n",
    "    scaler = StandardScaler()\n",
    "    df[value_col] = scaler.fit_transform(df[[value_col]])\n",
    "    return df\n",
    "\n",
    "model_path = ''\n",
    "\n",
    "class SundialModel:\n",
    "    def __init__(self, name=model_path):\n",
    "        self.model = AutoModelForCausalLM.from_pretrained(\n",
    "            name,\n",
    "            trust_remote_code=True\n",
    "        ).to(DEVICE)\n",
    "        self.model.eval()\n",
    "\n",
    "    def predict(self, context, horizon):\n",
    "        if len(context) > MAX_CONTEXT:\n",
    "            context = context[-MAX_CONTEXT:]\n",
    "\n",
    "        seqs = torch.tensor(\n",
    "            context, dtype=torch.float32, device=DEVICE\n",
    "        ).unsqueeze(0)\n",
    "\n",
    "        with torch.no_grad():\n",
    "            output = self.model.generate(\n",
    "                seqs,\n",
    "                max_new_tokens=horizon,\n",
    "                num_samples=NUM_SAMPLES\n",
    "            )\n",
    "\n",
    "        if output.ndim == 3:\n",
    "            samples = output[:, 0, :]\n",
    "        else:\n",
    "            samples = output\n",
    "\n",
    "        samples = samples.cpu().numpy()\n",
    "        median = np.median(samples, axis=0)\n",
    "\n",
    "        return median\n",
    "\n",
    "\n",
    "def evaluate_model(df, model, horizons):\n",
    "    values = df.iloc[:, 0].values\n",
    "    n = len(values)\n",
    "    results = []\n",
    "\n",
    "    for H in horizons:\n",
    "        if H >= n:\n",
    "            results.append(\n",
    "                {\"horizon\": H, \"MSE\": np.nan, \"MAE\": np.nan}\n",
    "            )\n",
    "            continue\n",
    "\n",
    "        print(f\"Running for horizon = [{H}]\")\n",
    "\n",
    "        train = values[:-H]\n",
    "        test = values[-H:]\n",
    "\n",
    "        preds = model.predict(train, H)\n",
    "\n",
    "        L = min(len(test), len(preds))\n",
    "\n",
    "        mse = mean_squared_error(test[-L:], preds[-L:])\n",
    "        mae = mean_absolute_error(test[-L:], preds[-L:])\n",
    "\n",
    "        results.append(\n",
    "            {\n",
    "                \"horizon\": H,\n",
    "                \"MSE\": round(mse, 4),\n",
    "                \"MAE\": round(mae, 4),\n",
    "            }\n",
    "        )\n",
    "\n",
    "    return results\n",
    "\n",
    "\n",
    "def run_benchmark(datasets, model, horizons):\n",
    "    table = []\n",
    "\n",
    "    for name, df in datasets.items():\n",
    "        print(f\"Running [{name}] dataset\")\n",
    "        results = evaluate_model(df, model, horizons)\n",
    "\n",
    "        for r in results:\n",
    "            table.append(\n",
    "                {\n",
    "                    \"Dataset\": name,\n",
    "                    \"Horizon\": r[\"horizon\"],\n",
    "                    \"MSE\": r[\"MSE\"],\n",
    "                    \"MAE\": r[\"MAE\"],\n",
    "                }\n",
    "            )\n",
    "\n",
    "    return pd.DataFrame(table)\n",
    "\n",
    "datasets = {\n",
    "    \"AUS_Elec_Demand\": load_and_norm(\"../energy_processed/australian_electricity_demand_dataset_processed.csv\"),\n",
    "    \"Electricity_Weekly\": load_and_norm(\"../energy_processed/electricity_weekly_dataset_processed.csv\"),\n",
    "\n",
    "    \"ETTh1\": load_and_norm(\"../energy_processed/ETTh1_processed.csv\"),\n",
    "    \"ETTh2\": load_and_norm(\"../energy_processed/ETTh2_processed.csv\"),\n",
    "    \"ETTm1\": load_and_norm(\"../energy_processed/ETTm1_processed.csv\"),\n",
    "    \"ETTm2\": load_and_norm(\"../energy_processed/ETTm2_processed.csv\"),\n",
    "\n",
    "    \"London_SmartMeters\": load_and_norm(\"../energy_processed/london_smart_meters_dataset_subset_processed.csv\"),\n",
    "\n",
    "    \"Solar_10min\": load_and_norm(\"../energy_processed/solar_10_minutes_dataset_processed.csv\"),\n",
    "}\n",
    "\n",
    "model = SundialModel()\n",
    "table = run_benchmark(\n",
    "    datasets=datasets,\n",
    "    model=model,\n",
    "    horizons=[24, 48, 96],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1dce3b78-cd1a-46b8-bb92-7d4a614e6b7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "table.to_csv('../energy_results/Sundial_Transport.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94f9a1a5-47d6-446c-85df-d9ee142bffc3",
   "metadata": {},
   "outputs": [],
   "source": [
    "table.head(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b073b0f6-4fe4-4d93-b1d7-cf3f56e32d73",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
