{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "74a30921-2ec1-4620-83af-80402cda5b6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Importing the librarires\n",
    "%run SWC.ipynb\n",
    "%run LDP_BO.ipynb\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import datetime as dt\n",
    "from geopy.distance import geodesic\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "import warnings \n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e21216f-5d7f-4c30-8e85-5f9ff954b301",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ===== To import data, you need to switch to your own path. =====\n",
    "file_path = \"C:\\\\Users\\\\123\\\\Desktop\\\\uber.csv\"\n",
    "df = pd.read_csv(file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cdd6a4f-8f3a-414f-932d-eaa6b1f21534",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ===== Data cleaning =====\n",
    "df.dropna(axis = 0, inplace = True)\n",
    "df.drop(['Unnamed: 0', 'key'], axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d31d578b-d7c1-4c61-8e65-374cc89eb555",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ===== Extraction of time variables =====\n",
    "df[\"pickup_datetime\"] = pd.to_datetime(df[\"pickup_datetime\"])\n",
    "df[\"weekday\"] = df[\"pickup_datetime\"].dt.weekday\n",
    "df[\"hour\"] = df[\"pickup_datetime\"].dt.hour"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "004c169d-81d3-4b79-83e6-d8ef7e2cff12",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ===== Extraction of distances variables =====\n",
    "df = df[(df[\"pickup_latitude\"].between(-90,90)) &\n",
    "        (df[\"dropoff_latitude\"].between(-90,90)) &\n",
    "        (df[\"pickup_longitude\"].between(-180,180))&\n",
    "        (df[\"dropoff_longitude\"].between(-180,180))]\n",
    "distances = []\n",
    "\n",
    "for i in df.index:\n",
    "    pickup_coords = (df[\"pickup_latitude\"][i], df[\"pickup_longitude\"][i])\n",
    "    dropoff_coords = (df[\"dropoff_latitude\"][i], df[\"dropoff_longitude\"][i])\n",
    "    distance = round(geodesic(pickup_coords,dropoff_coords).m,2)\n",
    "    distances.append(distance)\n",
    "    \n",
    "df[\"distance\"] = distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53ae9d85-3c1e-48ef-9b7a-0828a00f9c71",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df[[\"distance\",\"passenger_count\",'weekday','hour']]\n",
    "y = df[\"fare_amount\"]\n",
    "\n",
    "\n",
    "# ===== Data standardization =====\n",
    "std = StandardScaler()\n",
    "first_col_scaled = std.fit_transform(X.iloc[:, [0]])  \n",
    "X.iloc[:, 0] = first_col_scaled.flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d0e6110-b3ec-48cd-979d-8907910b0e41",
   "metadata": {},
   "outputs": [],
   "source": [
    "from gpytorch.mlls import ExactMarginalLogLikelihood\n",
    "import torch\n",
    "import gpytorch\n",
    "\n",
    "# ===== Constructing the Gaussian regression process model to be optimized =====\n",
    "class ExactGPModel(gpytorch.models.ExactGP):\n",
    "    def __init__(self, train_x, train_y, likelihood, lengthscales, outputscale):\n",
    "        super().__init__(train_x, train_y, likelihood)\n",
    "        self.mean_module = gpytorch.means.ConstantMean()\n",
    "        self.covar_module = gpytorch.kernels.ScaleKernel(\n",
    "            gpytorch.kernels.RBFKernel(ard_num_dims=train_x.shape[-1]) \n",
    "        )\n",
    "        self.covar_module.base_kernel.lengthscale = lengthscales \n",
    "        self.covar_module.outputscale = outputscale\n",
    "\n",
    "    def forward(self, x):\n",
    "        mean_x = self.mean_module(x)\n",
    "        covar_x = self.covar_module(x)\n",
    "        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)\n",
    "\n",
    "    def set_train_data(self, x, y, strict=False):\n",
    "        self.train_inputs = (x,)\n",
    "        self.train_targets = y\n",
    "\n",
    "\n",
    "class ObjectiveWithSlidingWindow:\n",
    "    def __init__(self, initial_x=None, initial_y=None, max_size=1000):\n",
    "        self.max_size = max_size\n",
    "        \n",
    "        if initial_x is not None and initial_y is not None:\n",
    "            self.all_train_x = initial_x.clone().detach()\n",
    "            self.all_train_y = initial_y.clone().detach()\n",
    "        else:\n",
    "            self.all_train_x = torch.empty(0)\n",
    "            self.all_train_y = torch.empty(0)\n",
    "\n",
    "    def update_and_get_data(self, new_x, new_y):\n",
    "        new_x = new_x.clone().detach()\n",
    "        if new_x.dim() == 1:\n",
    "            new_x = new_x.unsqueeze(-1)  \n",
    "        new_y = new_y.clone().detach()\n",
    "\n",
    "        if len(self.all_train_x) == 0:\n",
    "            self.all_train_x = new_x\n",
    "            self.all_train_y = new_y\n",
    "        else:\n",
    "            self.all_train_x = torch.cat([self.all_train_x, new_x], dim=0)\n",
    "            self.all_train_y = torch.cat([self.all_train_y, new_y], dim=0)\n",
    "        if len(self.all_train_x) > self.max_size:\n",
    "            self.all_train_x = self.all_train_x[-self.max_size:]\n",
    "            self.all_train_y = self.all_train_y[-self.max_size:]\n",
    "\n",
    "        return self.all_train_x, self.all_train_y\n",
    "\n",
    "    def __call__(self, hyperparams, new_x, new_y, fixed_outputscale=1.0):\n",
    "        new_x = new_x.clone().detach()\n",
    "        new_y = new_y.clone().detach()\n",
    "        \n",
    "        if new_x.dim() == 1:\n",
    "            new_x = new_x.unsqueeze(-1)\n",
    "        \n",
    "        D = new_x.shape[-1] \n",
    "        \n",
    "        if hyperparams.dim() == 1:\n",
    "            hyperparams = hyperparams.unsqueeze(0)\n",
    "        \n",
    "        lengthscales = torch.exp(hyperparams)  \n",
    "        if lengthscales.dim() > 1:\n",
    "            lengthscales = lengthscales.squeeze(0)  \n",
    "        \n",
    "        if len(self.all_train_x) > 0:\n",
    "            if self.all_train_x.dim() == 1:\n",
    "                eval_train_x = self.all_train_x.unsqueeze(-1)\n",
    "                eval_train_y = self.all_train_y\n",
    "            else:\n",
    "                eval_train_x = self.all_train_x\n",
    "                eval_train_y = self.all_train_y\n",
    "            \n",
    "            likelihood = gpytorch.likelihoods.GaussianLikelihood()\n",
    "            likelihood.noise = 1e-4\n",
    "            eval_model = ExactGPModel(\n",
    "                train_x=eval_train_x,\n",
    "                train_y=eval_train_y,\n",
    "                likelihood=likelihood,\n",
    "                lengthscales=lengthscales,\n",
    "                outputscale=fixed_outputscale\n",
    "            )\n",
    "            eval_model.mean_module.constant.data.fill_(eval_train_y.mean().item())\n",
    "    \n",
    "            eval_model.eval()\n",
    "            likelihood.eval()\n",
    "    \n",
    "            with torch.no_grad():\n",
    "                pred_dist = eval_model(new_x)\n",
    "                pred_mean_new = pred_dist.mean\n",
    "                if pred_mean_new.shape != new_y.shape:\n",
    "                    pred_mean_new = pred_mean_new.squeeze()\n",
    "                mse = torch.nn.functional.mse_loss(pred_mean_new, new_y)\n",
    "        else:\n",
    "            pred_mean_new = torch.zeros_like(new_y)\n",
    "            mse = torch.tensor(0.0)\n",
    "        train_x, train_y = self.update_and_get_data(new_x, new_y)\n",
    "        if train_x.dim() == 1:\n",
    "            train_x = train_x.unsqueeze(-1)\n",
    "        likelihood = gpytorch.likelihoods.GaussianLikelihood()\n",
    "        likelihood.noise = 1e-4\n",
    "        train_model = ExactGPModel(\n",
    "            train_x=train_x,\n",
    "            train_y=train_y,\n",
    "            likelihood=likelihood,\n",
    "            lengthscales=lengthscales,\n",
    "            outputscale=fixed_outputscale\n",
    "        )\n",
    "        train_model.mean_module.constant.data.fill_(train_y.mean().item())\n",
    "        train_model.eval()\n",
    "        likelihood.eval()\n",
    "        with torch.no_grad():\n",
    "            output = train_model(train_x)\n",
    "            mll = ExactMarginalLogLikelihood(likelihood, train_model)\n",
    "            loss = -mll(output, train_y)\n",
    "    \n",
    "        return loss.unsqueeze(0), pred_mean_new, mse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bdc0c5f-2574-4854-9f6c-16d23f835289",
   "metadata": {},
   "outputs": [],
   "source": [
    "def ldp_bo(task_id, num, delta_0, B, epsilon, delta, kappa):\n",
    "    params = torch.ones(1,dim)\n",
    "    params_bar = params\n",
    "    model = DerivativeExactGPSEModel(dim)\n",
    "    objective_fri = ObjectiveWithSlidingWindow(max_size=100)\n",
    "    res = objective_fri(params,online_yx[0,].unsqueeze(0),online_yy[0].unsqueeze(0))\n",
    "    f_params = res[0]\n",
    "    model.append_train_data(params, f_params)\n",
    "    acquisition_fcn = GradientInformation(model)\n",
    "    model.posterior(params) \n",
    "    acquisition_fcn.update_theta_i(params)\n",
    "    bounds = torch.tensor([[-delta], [delta]]) + torch.ones(dim)\n",
    "    bayes_matrix = torch.ones(num, dim)\n",
    "    mse_bo = torch.ones(num, 1)\n",
    "    for i in range(num):\n",
    "        new_x, acq_value = optimize_acqf_custom_bo(acquisition_fcn,bounds=bounds,q=1,num_restarts=2,raw_samples=32)\n",
    "        res = objective_fri(params,online_yx[i,].unsqueeze(0),online_yy[i].unsqueeze(0))\n",
    "        new_y = res[0]\n",
    "        model.append_train_data(new_x, new_y)\n",
    "        model.posterior(params)\n",
    "        acquisition_fcn.update_K_xX_dx()\n",
    "        index = dkmppf_sw(model,params,kappa)\n",
    "        train_x = model.train_xs[index,]\n",
    "        train_y = model.train_ys[index]\n",
    "        model.update_train_data(train_x, train_y)\n",
    "        model.posterior(params)\n",
    "        acquisition_fcn.update_K_xX_dx()\n",
    "        with torch.no_grad():\n",
    "                params_grad = model._get_KxX_dx(params) @ model.get_KXX_inv() @ model.train_ys\n",
    "                params_grad_clipped = params_grad * min(1, B / torch.norm(params_grad))\n",
    "                epsilon = 1\n",
    "                noise = torch.randn(dim) * (2 * B / epsilon) * math.sqrt(2 * math.log(1.25 / delta_0))\n",
    "                params = params-0.15*(i+1)**(-0.505)* ( params_grad_clipped+noise)\n",
    "                acquisition_fcn.update_theta_i(params)\n",
    "                params_bar = (i * params_bar + params) / (i+1)\n",
    "                bayes_matrix[i,] = params_bar\n",
    "                bounds = torch.tensor([[-delta], [delta]]) + params\n",
    "                mse_bo[i] = res[2]\n",
    "    return mse_bo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae1961c0-feba-4c01-88d3-899788e0996b",
   "metadata": {},
   "outputs": [],
   "source": [
    "B = 1\n",
    "epsilon = 1 # Privacy parameters\n",
    "delta = 0.2\n",
    "delta_0 = 0.2 # Acquisition function exploration bounds\n",
    "kappa = 0.1 # Compression budget\n",
    "iter = 100 # Number of replications\n",
    "online_yx = torch.tensor(X.values, dtype=torch.float32)\n",
    "online_yy = torch.tensor(y.values, dtype=torch.float32)\n",
    "online_yy = torch.log(online_yy)\n",
    "\n",
    "# Note: n_jobs needs to be replaced with the number of cores on your computer\n",
    "result_dp_bayes = Parallel(n_jobs=50)(delayed(ldp_bo)(x, num=num, delta0=delta0, B=B, epsilon=epsilon, delta=delta,kappa=kappa) for x in range(iter))\n",
    "results_dp_bayes = torch.stack(result_dp_bayes) \n",
    "\n",
    "# Calculate the estimated mean and standard deviation\n",
    "def compute_mean_and_std(results, iter):\n",
    "    mean_result = torch.zeros(num, dim)\n",
    "    for i in range(iter):\n",
    "        mean_result += results[i]\n",
    "    mean_result /= iter\n",
    "    variance = torch.zeros(num, dim)\n",
    "    for i in range(iter):\n",
    "        variance += (results[i] - mean_result) ** 2\n",
    "    variance /= iter  \n",
    "    std_result = torch.sqrt(variance)\n",
    "    return mean_result, std_result\n",
    "\n",
    "mean_dp_bayes,sd_dp_bayes = compute_mean_and_std(results_dp_bayes,iter)\n",
    "final_dp_bayes = pd.DataFrame(torch.cat([mean_dp_bayes,sd_dp_bayes],dim=1))\n",
    "final_dp_bayes.to_excel('dp_bayes.xlsx', index=False) # Output and save the results"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
