{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Vv1gJ53dEnMw"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "from sklearn.neighbors import NearestNeighbors\n",
        "from sklearn.gaussian_process import GaussianProcessRegressor\n",
        "from sklearn.gaussian_process.kernels import Matern, WhiteKernel\n",
        "import matplotlib.pyplot as plt\n",
        "import gpytorch\n",
        "import torch\n",
        "from scipy.stats import norm"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "base_path = \"/content/drive/MyDrive/Experiments/1d_function/\"\n",
        "\n",
        "X_train = pd.read_csv(base_path + \"x_train_1d.csv\", header=None).values  # Convert to NumPy array\n",
        "y_train = pd.read_csv(base_path + \"y_train_1d.csv\", header=None).values.flatten()\n",
        "\n",
        "X_test = pd.read_csv(base_path + \"x_test_1d.csv\", header=None).values\n",
        "y_test = pd.read_csv(base_path + \"y_test_1d.csv\", header=None).values.flatten()\n",
        "\n",
        "X_train_tensor = torch.from_numpy(X_train).float()\n",
        "y_train_tensor = torch.from_numpy(y_train).float()\n",
        "X_test_tensor = torch.from_numpy(X_test).float()\n",
        "y_test_tensor = torch.from_numpy(y_test).float()"
      ],
      "metadata": {
        "id": "WydAjjLMHAVU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "plt.figure(figsize=(10, 6))\n",
        "plt.scatter(X_train, y_train, color='blue', label=\"Training Data\", alpha=0.5, s=10)\n",
        "plt.scatter(X_test, y_test, color='red', label=\"Test Data\", alpha=0.5, s=10)\n",
        "plt.xlabel(\"X\")\n",
        "plt.ylabel(\"Y\")\n",
        "plt.title(\"Training and Test Data Scatter Plot\")\n",
        "plt.legend()\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "IVEWsnVjG00N"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class SVGPModel(gpytorch.models.ApproximateGP):\n",
        "    def __init__(self, inducing_points):\n",
        "        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(inducing_points.size(0))\n",
        "        variational_strategy = gpytorch.variational.VariationalStrategy(\n",
        "            self, inducing_points, variational_distribution, learn_inducing_locations=True\n",
        "        )\n",
        "        super(SVGPModel, self).__init__(variational_strategy)\n",
        "\n",
        "        self.mean_module = gpytorch.means.ZeroMean()\n",
        "        self.covar_module = gpytorch.kernels.ScaleKernel(\n",
        "            gpytorch.kernels.MaternKernel(nu=1.5)\n",
        "        )\n",
        "\n",
        "    def forward(self, x):\n",
        "        mean_x = self.mean_module(x)\n",
        "        covar_x = self.covar_module(x)\n",
        "        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)\n",
        "\n",
        "# Set number of inducing points\n",
        "M = 100\n",
        "\n",
        "# Create inducing points\n",
        "inducing_indices = torch.linspace(0, len(X_train) - 1, M).long()\n",
        "inducing_points = X_train_tensor[inducing_indices].reshape(-1, 1)\n",
        "\n",
        "# Initialize model and likelihood\n",
        "model = SVGPModel(inducing_points)\n",
        "likelihood = gpytorch.likelihoods.GaussianLikelihood()\n"
      ],
      "metadata": {
        "id": "TYg0XnMSGIom"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Training mode\n",
        "model.train()\n",
        "likelihood.train()\n",
        "\n",
        "# Use the GPU if available\n",
        "if torch.cuda.is_available():\n",
        "    model = model.cuda()\n",
        "    likelihood = likelihood.cuda()\n",
        "    X_train_tensor = X_train_tensor.cuda()\n",
        "    y_train_tensor = y_train_tensor.cuda()\n",
        "    X_test_tensor = X_test_tensor.cuda()\n",
        "    y_test_tensor = y_test_tensor.cuda()\n",
        "\n",
        "# Define optimizer and loss function\n",
        "optimizer = torch.optim.Adam([\n",
        "    {'params': model.parameters()},\n",
        "    {'params': likelihood.parameters()}\n",
        "], lr=0.01)\n",
        "\n",
        "mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=y_train_tensor.numel())\n",
        "\n",
        "# Training loop\n",
        "def train(num_epochs=10):\n",
        "    losses = []\n",
        "    for i in range(num_epochs):\n",
        "        optimizer.zero_grad()\n",
        "        output = model(X_train_tensor)\n",
        "        loss = -mll(output, y_train_tensor)\n",
        "        loss.backward()\n",
        "        losses.append(loss.item())\n",
        "        optimizer.step()\n",
        "\n",
        "        if (i+1) % 50 == 0:\n",
        "            print(f'Epoch {i+1}/{num_epochs} - Loss: {loss.item():.3f}')\n",
        "    return losses"
      ],
      "metadata": {
        "id": "UlygodYLMFuA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Function to make predictions\n",
        "def predict(x_new):\n",
        "    model.eval()\n",
        "    likelihood.eval()\n",
        "    with torch.no_grad():\n",
        "        pred_dist = likelihood(model(x_new))\n",
        "        mean = pred_dist.mean\n",
        "        lower, upper = pred_dist.confidence_region()\n",
        "    return mean, lower, upper\n"
      ],
      "metadata": {
        "id": "C2UZh7d4MFv-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def plot_results(x_train, y_train, x_test, y_test, mean, lower, upper):\n",
        "    plt.figure(figsize=(12, 6))\n",
        "\n",
        "    # Plot training data\n",
        "    plt.scatter(x_train.cpu().numpy(), y_train.cpu().numpy(), color='blue', label='Training Data', alpha=0.5)\n",
        "    plt.scatter(x_test.cpu().numpy(), y_test.cpu().numpy(), color='red', label='Test Data', alpha=0.5)\n",
        "\n",
        "    # Sort test points for smooth plotting\n",
        "    x_test_np = x_test.cpu().numpy().flatten()\n",
        "    mean_np = mean.cpu().numpy().flatten()\n",
        "    lower_np = lower.cpu().numpy().flatten()\n",
        "    upper_np = upper.cpu().numpy().flatten()\n",
        "\n",
        "    sort_idx = np.argsort(x_test_np)\n",
        "    x_test_sorted = x_test_np[sort_idx]\n",
        "    mean_sorted = mean_np[sort_idx]\n",
        "    lower_sorted = lower_np[sort_idx]\n",
        "    upper_sorted = upper_np[sort_idx]\n",
        "\n",
        "    # Plot sorted predictions\n",
        "    plt.plot(x_test_sorted, mean_sorted, 'k', label='Predicted Mean', linewidth=2)\n",
        "    plt.fill_between(x_test_sorted, lower_sorted, upper_sorted,\n",
        "                    alpha=0.2, color='k', label='95% Confidence')\n",
        "\n",
        "    plt.xlabel('X')\n",
        "    plt.ylabel('Y')\n",
        "    plt.title('SVGP Predictions')\n",
        "    plt.legend()\n",
        "    plt.show()"
      ],
      "metadata": {
        "id": "nDNffQwAMFyE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Train the model\n",
        "num_epochs = 1000\n",
        "losses = train(num_epochs)"
      ],
      "metadata": {
        "id": "pNqc8g_0MF0I"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "epochs = list(range(1, num_epochs+1))\n",
        "\n",
        "plt.figure(figsize=(8, 5))\n",
        "plt.plot(epochs, losses, marker='o', linestyle='-')\n",
        "plt.xlabel('Epoch')\n",
        "plt.ylabel('Loss')\n",
        "plt.title('Training Loss per Epoch')\n",
        "plt.grid(True)\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "RNfWilnmhZ9v"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Make predictions\n",
        "mean, lower, upper = predict(X_test_tensor)\n",
        "\n",
        "# Plot results\n",
        "plot_results(X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, mean, lower, upper)"
      ],
      "metadata": {
        "id": "EQ4HI9lwMF2W"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "pred_dist = likelihood(model(X_test_tensor))\n",
        "mean = pred_dist.mean\n",
        "std = pred_dist.stddev\n",
        "\n",
        "mean_np = mean.detach().cpu().numpy()\n",
        "std_np = std.detach().cpu().numpy()\n",
        "\n",
        "# Get learned noise variance\n",
        "noise_variance = likelihood.noise.detach().cpu().numpy()\n",
        "print(f\"Noise Variance: {noise_variance}\")"
      ],
      "metadata": {
        "id": "5KidhZWqe6f-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def crps_gaussian(y, mu, sigma):\n",
        "    z = (y - mu) / sigma\n",
        "    crps = sigma * (z * (2 * norm.cdf(z) - 1) + 2 * norm.pdf(z) - 1 / np.sqrt(np.pi))\n",
        "    return crps\n",
        "\n",
        "rmse = np.sqrt(np.mean((mean_np - y_test)**2))\n",
        "print(f\"RMSE: {rmse}\")\n",
        "\n",
        "crps_values = crps_gaussian(y_test, mean_np, std_np)\n",
        "mean_crps = np.mean(crps_values)\n",
        "print(f\"Mean CRPS: {mean_crps}\")"
      ],
      "metadata": {
        "id": "-WAEDJroIG7G"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "HzRkMOBUc_zv"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}