{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "bDnJBq4b3zBQ"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import gpytorch\n",
        "import torch\n",
        "import plotly.graph_objects as go\n",
        "from scipy.stats import norm"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "base_path = \"/content/drive/MyDrive/Experiments/CA_Housing/\"\n",
        "\n",
        "X_train = pd.read_csv(base_path + \"x_train_CAhousing.csv\", header=None, sep='\\s+').values.astype(np.float32)\n",
        "y_train = pd.read_csv(base_path + \"y_train_CAhousing.csv\", header=None, sep='\\s+').values.astype(np.float32).flatten()\n",
        "X_test = pd.read_csv(base_path + \"x_test_CAhousing.csv\", header=None, sep='\\s+').values.astype(np.float32)\n",
        "y_test = pd.read_csv(base_path + \"y_test_CAhousing.csv\", header=None, sep='\\s+').values.astype(np.float32).flatten()\n",
        "\n",
        "X_train_tensor = torch.from_numpy(X_train).float()\n",
        "y_train_tensor = torch.from_numpy(y_train).float()\n",
        "X_test_tensor = torch.from_numpy(X_test).float()\n",
        "y_test_tensor = torch.from_numpy(y_test).float()"
      ],
      "metadata": {
        "id": "pm-QXGF56VZ2"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class SVGPModel(gpytorch.models.ApproximateGP):\n",
        "    def __init__(self, inducing_points):\n",
        "        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(inducing_points.size(0))\n",
        "        variational_strategy = gpytorch.variational.VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)\n",
        "        super(SVGPModel, self).__init__(variational_strategy)\n",
        "        self.mean_module = gpytorch.means.ConstantMean()\n",
        "        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))\n",
        "    def forward(self, x):\n",
        "        mean_x = self.mean_module(x)\n",
        "        covar_x = self.covar_module(x)\n",
        "        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)\n",
        "\n",
        "num_inducing = 100\n",
        "perm = torch.randperm(X_train_tensor.size(0))\n",
        "inducing_points = X_train_tensor[perm[:num_inducing]].clone()\n"
      ],
      "metadata": {
        "id": "qgNs-q427qnj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "likelihood = gpytorch.likelihoods.GaussianLikelihood()\n",
        "model = SVGPModel(inducing_points)\n",
        "\n",
        "model.train()\n",
        "likelihood.train()\n",
        "\n",
        "if torch.cuda.is_available():\n",
        "    model = model.cuda()\n",
        "    likelihood = likelihood.cuda()\n",
        "    X_train_tensor = X_train_tensor.cuda()\n",
        "    y_train_tensor = y_train_tensor.cuda()\n",
        "    X_test_tensor = X_test_tensor.cuda()\n",
        "    y_test_tensor = y_test_tensor.cuda()\n",
        "    inducing_points = inducing_points.cuda()\n"
      ],
      "metadata": {
        "id": "7EFqpBmm8JNN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "optimizer = torch.optim.Adam(model.parameters(), lr=0.1)\n",
        "mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=X_train_tensor.size(0))\n",
        "\n",
        "def train_svgp(num_epochs=100):\n",
        "    for i in range(num_epochs):\n",
        "        optimizer.zero_grad()\n",
        "        output = model(X_train_tensor)\n",
        "        loss = -mll(output, y_train_tensor)\n",
        "        loss.backward()\n",
        "        optimizer.step()\n",
        "        if (i + 1) % 10 == 0:\n",
        "            print(f'Epoch {i + 1}/{num_epochs} - Loss: {loss.item():.3f}')\n"
      ],
      "metadata": {
        "id": "hjoP7LiH8LR1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def predict_svgp(x_new):\n",
        "    model.eval()\n",
        "    likelihood.eval()\n",
        "    with torch.no_grad():\n",
        "        pred_dist = likelihood(model(x_new))\n",
        "        mean = pred_dist.mean\n",
        "        std = pred_dist.stddev\n",
        "    return mean, std"
      ],
      "metadata": {
        "id": "WdTOPhNC8M19"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train_svgp(num_epochs=500)"
      ],
      "metadata": {
        "id": "fajrKUFJ8Q1U"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "pred_mean_svgp, pred_std_svgp = predict_svgp(X_test_tensor)"
      ],
      "metadata": {
        "id": "ujzNBZIz8TAL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def crps_gaussian(y, mu, sigma):\n",
        "    z = (y - mu) / sigma\n",
        "    crps = sigma * (z * (2 * norm.cdf(z) - 1) + 2 * norm.pdf(z) - 1 / np.sqrt(np.pi))\n",
        "    return crps\n",
        "\n",
        "rmse = np.sqrt(np.mean((pred_mean_svgp.cpu().numpy() - y_test)**2))\n",
        "print(f\"RMSE: {rmse}\")\n",
        "\n",
        "crps_values = crps_gaussian(y_test, pred_mean_svgp.cpu().numpy(), pred_std_svgp.cpu().numpy())\n",
        "mean_crps = np.mean(crps_values)\n",
        "print(f\"Mean CRPS: {mean_crps}\")"
      ],
      "metadata": {
        "id": "f4DLmG-C8kvs"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "HoD9uzWK8xb7"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}