{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f9ff332-1685-4137-b2ba-7a097c8c93ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "import timesfm\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "\n",
    "\n",
    "# ---------------------------------------------------\n",
    "# 1. Load dataset and normalize data\n",
    "# ---------------------------------------------------\n",
    "def load_and_norm(path):\n",
    "    df = pd.read_csv(path)\n",
    "\n",
    "    df['date'] = pd.to_datetime(df['date'])      # real timestamps\n",
    "    value_col = df.columns[1]                    # only one value column\n",
    "\n",
    "    scaler = StandardScaler()\n",
    "    df[value_col] = scaler.fit_transform(df[[value_col]])\n",
    "\n",
    "    # TimesFM requires ds + y\n",
    "    df = df.rename(columns={'date': 'ds'})\n",
    "    df = df[['ds', value_col]]\n",
    "\n",
    "    return df\n",
    "\n",
    "\n",
    "def infer_frequency(dates):\n",
    "    freq = pd.infer_freq(dates)\n",
    "\n",
    "    if freq is None:\n",
    "        # fallback: compute median delta\n",
    "        deltas = pd.Series(dates).diff().dropna()\n",
    "        median_delta = deltas.median()\n",
    "\n",
    "        if median_delta <= pd.Timedelta(seconds=5):\n",
    "            return \"S\"      # seconds\n",
    "        if median_delta <= pd.Timedelta(minutes=1):\n",
    "            return \"T\"      # min\n",
    "        if median_delta <= pd.Timedelta(hours=1):\n",
    "            return \"H\"      # hour\n",
    "        if median_delta <= pd.Timedelta(days=1):\n",
    "            return \"D\"      # day\n",
    "        if median_delta <= pd.Timedelta(weeks=1):\n",
    "            return \"W\"      # week\n",
    "\n",
    "        return \"D\"\n",
    "\n",
    "    return freq\n",
    "    \n",
    "# ---------------------------------------------------\n",
    "# 2. TimesFM Model Wrapper\n",
    "# ---------------------------------------------------\n",
    "model_path = \"\"\n",
    "class TimesFMModel:\n",
    "    def __init__(\n",
    "        self,\n",
    "        hf_repo=model_path,\n",
    "        context_len=2048,\n",
    "        per_core_batch_size=32,\n",
    "    ):\n",
    "        self.hf_repo = hf_repo\n",
    "        self.context_len = context_len\n",
    "        self.per_core_batch_size = per_core_batch_size\n",
    "\n",
    "    def predict(self, train_values, train_dates, horizon):\n",
    "        if isinstance(train_values, torch.Tensor):\n",
    "            train_values = train_values.cpu().numpy()\n",
    "\n",
    "        # infer frequency automatically\n",
    "        freq = infer_frequency(train_dates)\n",
    "        print(\"*\"*50)\n",
    "        print(freq)\n",
    "        print(\"*\"*50)\n",
    "\n",
    "        df = pd.DataFrame({\n",
    "            \"unique_id\": \"series_1\",\n",
    "            \"ds\": train_dates,\n",
    "            \"y\": train_values\n",
    "        })\n",
    "\n",
    "        tfm = timesfm.TimesFm(\n",
    "            hparams=timesfm.TimesFmHparams(\n",
    "                backend=\"cuda\",\n",
    "                per_core_batch_size=self.per_core_batch_size,\n",
    "                horizon_len=horizon,\n",
    "                num_layers=50,\n",
    "                use_positional_embedding=False,\n",
    "                context_len=self.context_len,\n",
    "            ),\n",
    "            checkpoint=timesfm.TimesFmCheckpoint(\n",
    "                huggingface_repo_id=self.hf_repo\n",
    "            ),\n",
    "        )\n",
    "\n",
    "        # use inferred frequency\n",
    "        forecast_df = tfm.forecast_on_df(\n",
    "            inputs=df,\n",
    "            value_name=\"y\",\n",
    "            freq=freq,\n",
    "        )\n",
    "\n",
    "        preds = (\n",
    "            forecast_df[forecast_df[\"unique_id\"] == \"series_1\"]\n",
    "            .tail(horizon)[\"timesfm\"]\n",
    "            .values\n",
    "        )\n",
    "\n",
    "        return preds\n",
    "\n",
    "\n",
    "# ---------------------------------------------------\n",
    "# 3. Evaluation on each dataset\n",
    "# ---------------------------------------------------\n",
    "def evaluate_model(df, model, horizons):\n",
    "    value_col = df.columns[1]   # second column is the value\n",
    "    values = df[value_col].values\n",
    "    dates  = df[\"ds\"].values\n",
    "\n",
    "    n = len(values)\n",
    "    results = []\n",
    "\n",
    "    for H in horizons:\n",
    "        if H >= n:\n",
    "            results.append({\n",
    "                \"horizon\": H,\n",
    "                \"MSE\": np.nan,\n",
    "                \"MAE\": np.nan,\n",
    "            })\n",
    "            continue\n",
    "\n",
    "        print(f'Running for horizon = [{H}]')\n",
    "\n",
    "        train_values = values[:-H]\n",
    "        train_dates  = dates[:-H]\n",
    "\n",
    "        test_values  = values[-H:]\n",
    "\n",
    "        preds = model.predict(train_values, train_dates, H)\n",
    "\n",
    "        mse = mean_squared_error(test_values, preds)\n",
    "        mae = mean_absolute_error(test_values, preds)\n",
    "\n",
    "        results.append({\n",
    "            \"horizon\": H,\n",
    "            \"MSE\": round(mse, 4),\n",
    "            \"MAE\": round(mae, 4),\n",
    "        })\n",
    "\n",
    "    return results\n",
    "\n",
    "\n",
    "# ---------------------------------------------------\n",
    "# 4. Run benchmark across all datasets\n",
    "# ---------------------------------------------------\n",
    "def run_benchmark(datasets, model, horizons):\n",
    "    table = []\n",
    "\n",
    "    for name, df in datasets.items():\n",
    "        print(f'\\nRunning [{name}] dataset')\n",
    "        results = evaluate_model(df, model, horizons)\n",
    "\n",
    "        for r in results:\n",
    "            table.append({\n",
    "                \"Dataset\": name,\n",
    "                \"Horizon\": r[\"horizon\"],\n",
    "                \"MSE\": r[\"MSE\"],\n",
    "                \"MAE\": r[\"MAE\"],\n",
    "            })\n",
    "\n",
    "    return pd.DataFrame(table)\n",
    "\n",
    "\n",
    "# ---------------------------------------------------\n",
    "# 5. ALL DATASETS YOU PROVIDED\n",
    "# ---------------------------------------------------\n",
    "datasets = {\n",
    "    \"AUS_Elec_Demand\": load_and_norm(\"../energy_processed/australian_electricity_demand_dataset_processed.csv\"),\n",
    "    \"Electricity_Weekly\": load_and_norm(\"../energy_processed/electricity_weekly_dataset_processed.csv\"),\n",
    "\n",
    "    \"ETTh1\": load_and_norm(\"../energy_processed/ETTh1_processed.csv\"),\n",
    "    \"ETTh2\": load_and_norm(\"../energy_processed/ETTh2_processed.csv\"),\n",
    "    \"ETTm1\": load_and_norm(\"../energy_processed/ETTm1_processed.csv\"),\n",
    "    \"ETTm2\": load_and_norm(\"../energy_processed/ETTm2_processed.csv\"),\n",
    "\n",
    "    \"London_SmartMeters\": load_and_norm(\"../energy_processed/london_smart_meters_dataset_subset_processed.csv\"),\n",
    "\n",
    "    \"Solar_10min\": load_and_norm(\"../energy_processed/solar_10_minutes_dataset_processed.csv\"),\n",
    "}\n",
    "\n",
    "\n",
    "# ---------------------------------------------------\n",
    "# 6. Run TimesFM Benchmark\n",
    "# ---------------------------------------------------\n",
    "model = TimesFMModel()\n",
    "table = run_benchmark(datasets, model, horizons=[24, 48, 96])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45bc929b-6bac-4cd3-b3f2-09b52442f773",
   "metadata": {},
   "outputs": [],
   "source": [
    "table.tail(60)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b95df26-eddc-4576-b587-4dade8a10079",
   "metadata": {},
   "outputs": [],
   "source": [
    "table.to_csv('../energy_results/TimesFM_Energy.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3931316b-3016-48a0-b1ab-3a6504c2833b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
