{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e0994d24-41d8-4400-8ab2-c018512b3afc",
   "metadata": {},
   "source": [
    "## We have a different venv for ToTo model\n",
    "- venv name: tsfm_toto"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77226924-1f33-441b-8170-5d542db87a63",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "\n",
    "from toto.data.util.dataset import MaskedTimeseries\n",
    "from toto.inference.forecaster import TotoForecaster\n",
    "from toto.model.toto import Toto\n",
    "\n",
    "\n",
    "DEVICE = \"cpu\"\n",
    "MAX_CONTEXT = 1024\n",
    "\n",
    "\n",
    "def load_and_norm(path):\n",
    "    df = pd.read_csv(path, index_col=\"date\", parse_dates=True)\n",
    "    value_col = df.columns[0]\n",
    "    scaler = StandardScaler()\n",
    "    df[value_col] = scaler.fit_transform(df[[value_col]])\n",
    "    return df\n",
    "\n",
    "\n",
    "model_path=\"\"\n",
    "\n",
    "class TotoModel:\n",
    "    def __init__(self, name=model_path):\n",
    "        model = Toto.from_pretrained(name).to(DEVICE)\n",
    "        self.forecaster = TotoForecaster(model.model)\n",
    "\n",
    "    def predict(self, values, timestamps, horizon):\n",
    "        if len(values) > MAX_CONTEXT:\n",
    "            values = values[-MAX_CONTEXT:]\n",
    "            timestamps = timestamps[-MAX_CONTEXT:]\n",
    "\n",
    "        series = torch.tensor(values, dtype=torch.float32, device=DEVICE)\n",
    "        series = series.unsqueeze(0).unsqueeze(0)\n",
    "\n",
    "        padding_mask = torch.ones_like(series, dtype=torch.bool)\n",
    "        id_mask = torch.zeros_like(series)\n",
    "\n",
    "        timestamp_seconds = torch.tensor(\n",
    "            timestamps, dtype=torch.long, device=DEVICE\n",
    "        ).unsqueeze(0).unsqueeze(0)\n",
    "\n",
    "        delta_t = int(timestamps[1] - timestamps[0])\n",
    "        time_interval_seconds = torch.tensor(\n",
    "            [[delta_t]], dtype=torch.long, device=DEVICE\n",
    "        )\n",
    "\n",
    "        inputs = MaskedTimeseries(\n",
    "            series=series,\n",
    "            padding_mask=padding_mask,\n",
    "            id_mask=id_mask,\n",
    "            timestamp_seconds=timestamp_seconds,\n",
    "            time_interval_seconds=time_interval_seconds,\n",
    "        )\n",
    "\n",
    "        forecast = self.forecaster.forecast(\n",
    "            inputs,\n",
    "            prediction_length=horizon,\n",
    "            num_samples=16,\n",
    "            samples_per_batch=4,\n",
    "        )\n",
    "\n",
    "        samples = forecast.samples.squeeze(0).squeeze(0).cpu().numpy()\n",
    "        median = np.median(samples, axis=0)\n",
    "\n",
    "        return median\n",
    "\n",
    "\n",
    "def evaluate_model(df, model, horizons):\n",
    "    values = df.iloc[:, 0].values\n",
    "    timestamps = df.index.view(\"int64\") // 10**9\n",
    "\n",
    "    n = len(values)\n",
    "    results = []\n",
    "\n",
    "    for H in horizons:\n",
    "        if H >= n:\n",
    "            results.append(\n",
    "                {\"horizon\": H, \"MSE\": np.nan, \"MAE\": np.nan}\n",
    "            )\n",
    "            continue\n",
    "\n",
    "        print(f\"Running for horizon = [{H}]\")\n",
    "\n",
    "        train_values = values[:-H]\n",
    "        test_values = values[-H:]\n",
    "        train_timestamps = timestamps[:-H]\n",
    "\n",
    "        preds = model.predict(train_values, train_timestamps, H)\n",
    "\n",
    "        L = min(len(test_values), len(preds))\n",
    "\n",
    "        mse = mean_squared_error(test_values[-L:], preds[-L:])\n",
    "        mae = mean_absolute_error(test_values[-L:], preds[-L:])\n",
    "\n",
    "        results.append(\n",
    "            {\n",
    "                \"horizon\": H,\n",
    "                \"MSE\": round(mse, 4),\n",
    "                \"MAE\": round(mae, 4),\n",
    "            }\n",
    "        )\n",
    "\n",
    "    return results\n",
    "\n",
    "\n",
    "def run_benchmark(datasets, model, horizons):\n",
    "    table = []\n",
    "\n",
    "    for name, df in datasets.items():\n",
    "        print(f\"Running [{name}] dataset\")\n",
    "        results = evaluate_model(df, model, horizons)\n",
    "\n",
    "        for r in results:\n",
    "            table.append(\n",
    "                {\n",
    "                    \"Dataset\": name,\n",
    "                    \"Horizon\": r[\"horizon\"],\n",
    "                    \"MSE\": r[\"MSE\"],\n",
    "                    \"MAE\": r[\"MAE\"],\n",
    "                }\n",
    "            )\n",
    "\n",
    "    return pd.DataFrame(table)\n",
    "\n",
    "\n",
    "datasets = {\n",
    "    \"AUS_Elec_Demand\": load_and_norm(\"../energy_processed/australian_electricity_demand_dataset_processed.csv\"),\n",
    "    \"Electricity_Weekly\": load_and_norm(\"../energy_processed/electricity_weekly_dataset_processed.csv\"),\n",
    "\n",
    "    \"ETTh1\": load_and_norm(\"../energy_processed/ETTh1_processed.csv\"),\n",
    "    \"ETTh2\": load_and_norm(\"../energy_processed/ETTh2_processed.csv\"),\n",
    "    \"ETTm1\": load_and_norm(\"../energy_processed/ETTm1_processed.csv\"),\n",
    "    \"ETTm2\": load_and_norm(\"../energy_processed/ETTm2_processed.csv\"),\n",
    "\n",
    "    \"London_SmartMeters\": load_and_norm(\"../energy_processed/london_smart_meters_dataset_subset_processed.csv\"),\n",
    "\n",
    "    \"Solar_10min\": load_and_norm(\"../energy_processed/solar_10_minutes_dataset_processed.csv\"),\n",
    "}\n",
    "\n",
    "\n",
    "model = TotoModel()\n",
    "table = run_benchmark(\n",
    "    model=model,\n",
    "    datasets=datasets,\n",
    "    horizons=[24, 48, 96],\n",
    ")\n",
    "\n",
    "print(table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a8fcbc1-722b-4ba4-9266-d08ef22b74b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "table.to_csv('../energy_results/ToTo_Energy.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6970563b-9173-4316-b8db-eb69c3eb8b22",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
