{
  "cells": [
    {
      "metadata": {
        "id": "jLQ02YT7nyrj"
      },
      "cell_type": "code",
      "source": [
        "#@title License\n",
        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License."
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "2b-iwumSKjgH"
      },
      "cell_type": "code",
      "source": [
        "# @title Imports\n",
        "import numpy as np\n",
        "from scipy.spatial.distance import cdist\n",
        "from scipy.spatial.distance import pdist\n",
        "from sklearn.metrics.pairwise import rbf_kernel\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "pZ8K_GXzH66I"
      },
      "cell_type": "code",
      "source": [
        "seed=7 # @param {isTemplate: true}\n",
        "\n",
        "_RNG = np.random.default_rng(seed=seed)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "HMYDSTcblN7m"
      },
      "cell_type": "markdown",
      "source": [
        "# MMD Utils"
      ]
    },
    {
      "metadata": {
        "id": "Gf6cEEFt8p2h"
      },
      "cell_type": "code",
      "source": [
        "def ensure_list(input_data):\n",
        "  if not isinstance(input_data, list):\n",
        "    return [input_data]\n",
        "  return input_data\n",
        "\n",
        "\n",
        "def rbf_kernel(samples_x, samples_y, bw=1.0):\n",
        "  \"\"\"Returns squared exponential (or RBF) kernel matrix.\n",
        "\n",
        "  Adapted from Ben Chugg: https://github.com/bchugg/testing-by-betting.\n",
        "\n",
        "  Args:\n",
        "    samples_x: Input data.\n",
        "    samples_y: Input data.\n",
        "    bw: Kernel bandwidth.\n",
        "  \"\"\"\n",
        "  samples_y = samples_x if samples_y is None else samples_y\n",
        "  samples_x = ensure_list(samples_x)\n",
        "  samples_y = ensure_list(samples_y)\n",
        "  samples_x = np.vstack(samples_x)\n",
        "  samples_y = np.vstack(samples_y)\n",
        "\n",
        "  # Matrix euclidean pairwise distances.\n",
        "  distances = cdist(samples_x, samples_y, 'euclidean')\n",
        "  sq_distances = distances * distances\n",
        "  kernel_matrix = np.exp(-sq_distances / (2 * bw * bw))\n",
        "  return kernel_matrix\n",
        "\n",
        "def get_first_crossing_index(wealth_process, significance):\n",
        "  \"\"\"Returns the first index on the wealth_process to exceed 1/significance.\n",
        "\n",
        "  Args:\n",
        "    wealth_process: List of wealth values.\n",
        "    significance: Significance level.\n",
        "  \"\"\"\n",
        "\n",
        "  idx = np.where(np.array(wealth_process) >= 1 / significance)[0]\n",
        "  if len(idx)>0:\n",
        "    # Return the first time step that the process crosses the threshold.\n",
        "    return idx[0]\n",
        "  else:\n",
        "    return len(wealth_process)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "_qpcfagZltQH"
      },
      "cell_type": "markdown",
      "source": [
        "# Main classes for sequential testing\n",
        "\n",
        "Below we include different classes for the Online Newton Step algorithm, Online Gradient Ascent, and a generic One-sided Sequential Two sample test that we then use for privacy audits."
      ]
    },
    {
      "metadata": {
        "id": "XNxKyvPuRXBI"
      },
      "cell_type": "code",
      "source": [
        "class OnlineNewtonStep:\n",
        "  \"\"\"Online Newton Step betting strategy.\n",
        "\n",
        "  See Shekhar and Ramdas (2023), \"Nonparametric Two-Sample Testing by Betting\"\n",
        "  https://arxiv.org/pdf/2112.09162.pdf for more details. For clarity we use\n",
        "  the notation of the manuscript.\n",
        "\n",
        "  Attributes:\n",
        "    tau: Threshold value. See Theorem 3.1. in the manuscript.\n",
        "    scaling_constant: Scaling constant, see Definition 5. in\n",
        "      https://arxiv.org/pdf/2112.09162\n",
        "    sum_grads_squared: Sum of squared z values.\n",
        "    previous_lambda: Previous lambda value.\n",
        "  \"\"\"\n",
        "\n",
        "  def __init__(self, tau) -> None:\n",
        "    self.scaling_constant = 2 / (2 - np.log(3))\n",
        "    self.sum_grads_squared = 1\n",
        "    self.previous_lambda = 0\n",
        "    self.tau = tau\n",
        "\n",
        "  def next_bet(self, payoff_history):\n",
        "    \"\"\"Returns the next bet based on the payoff history.\n",
        "\n",
        "    Args:\n",
        "      payoff_history: List of previous payoffs.\n",
        "    \"\"\"\n",
        "\n",
        "    if not payoff_history:\n",
        "      # Bet 0 for the first time.\n",
        "      return 0\n",
        "    else:\n",
        "      previous_payoff = payoff_history[-1]\n",
        "      z = -previous_payoff / (1 + self.previous_lambda * previous_payoff)\n",
        "      self.sum_grads_squared += z**2\n",
        "      lower_limit = 0\n",
        "      upper_limit = 1 / (8 + 4 * self.tau)\n",
        "      lambd = max(\n",
        "          min(\n",
        "              self.previous_lambda\n",
        "              - self.scaling_constant * z / self.sum_grads_squared,\n",
        "              upper_limit,\n",
        "          ),\n",
        "          lower_limit,\n",
        "      )\n",
        "      self.previous_lambda = lambd\n",
        "      return lambd\n",
        "\n",
        "\n",
        "class OnlineGradientAscent:\n",
        "  \"\"\"Online Gradient Ascent betting strategy.\n",
        "\n",
        "  Attributes:\n",
        "    m_t: Current estimate of the second moment of the gradient.\n",
        "    gradient_second_moments: History of the second moment of the gradient.\n",
        "    history_products: History of the products of the betting strategy.\n",
        "    history_auxiliaryterm: History of the auxiliary term.\n",
        "  \"\"\"\n",
        "\n",
        "  def __init__(self) -> None:\n",
        "\n",
        "    # OGA parameters\n",
        "    self.m_t = 0\n",
        "    self.gradient_second_moments = []\n",
        "    self.history_products = []\n",
        "    self.history_auxiliaryterm = []\n",
        "\n",
        "  def step(self, x_hist, y_hist, bw):\n",
        "    \"\"\"Updates the betting strategy and returns the current MMD estimate.\n",
        "\n",
        "    Args:\n",
        "      x_hist: History of the first set of samples.\n",
        "      y_hist: History of the second set of samples.\n",
        "      bw: Kernel bandwidth.\n",
        "\n",
        "    Returns:\n",
        "      The current MMD estimate.\n",
        "    \"\"\"\n",
        "\n",
        "    x = x_hist[-1]\n",
        "    y = y_hist[-1]\n",
        "    steps = len(x_hist)\n",
        "\n",
        "    # First, update 2nd moments history.\n",
        "    increment = (\n",
        "        rbf_kernel(x, x, bw)[0, 0]\n",
        "        + rbf_kernel(y, y, bw)[0, 0]\n",
        "        - 2 * rbf_kernel(x, y, bw)[0, 0]\n",
        "    )\n",
        "    self.m_t += increment\n",
        "    self.gradient_second_moments.append(self.m_t)\n",
        "\n",
        "    if len(x_hist) == 1:\n",
        "      v_t = 0\n",
        "    else:\n",
        "      kernel_matrix = (\n",
        "          rbf_kernel(x_hist[:-1], x, bw)\n",
        "          - rbf_kernel(x_hist[:-1], y, bw)\n",
        "          + rbf_kernel(y_hist[:-1], y, bw)\n",
        "          - rbf_kernel(y_hist[:-1], x, bw)\n",
        "      ).flatten()\n",
        "      v_t = np.sum(\n",
        "          kernel_matrix\n",
        "          * np.array(self.history_products)\n",
        "          / (2 * np.sqrt(self.gradient_second_moments[:-1]))\n",
        "      )\n",
        "\n",
        "    # auxiliary term\n",
        "    aux_term = v_t / np.sqrt(self.m_t) + increment / (4 * self.m_t)\n",
        "    self.history_auxiliaryterm.append(aux_term)\n",
        "\n",
        "    # update the products of normalizations\n",
        "    s_t = sum(\n",
        "        self.history_auxiliaryterm[i] * self.history_products[i] ** 2\n",
        "        for i in range(steps - 1)\n",
        "    )\n",
        "    gamma_t = np.min([1, 1 / (2 * np.sqrt(s_t + aux_term))])\n",
        "    self.history_products.append(1)\n",
        "    self.history_products = [x * gamma_t for x in self.history_products]\n",
        "\n",
        "    return v_t\n",
        "\n",
        "\n",
        "class OneSidedTwoSampleSequentialTest:\n",
        "  \"\"\"Two-sample test using betting.\n",
        "\n",
        "  Attributes:\n",
        "    wealth: Current wealth.\n",
        "    wealth_hist: History of wealth values.\n",
        "    tau: Threshold value.\n",
        "    online_newton_step: Online Newton Step betting strategy.\n",
        "    online_gradient_ascent: Online Gradient Ascent betting strategy.\n",
        "    bandwidth: Kernel bandwidth.\n",
        "    lambd: Current lambda value.\n",
        "    payoff_history: History of payoffs.\n",
        "    x_hist: History of first set of samples.\n",
        "    y_hist: History of second set of samples.\n",
        "  \"\"\"\n",
        "\n",
        "  def __init__(self, epsilon: float, delta: float, bw: float) -> None:\n",
        "\n",
        "    self.wealth = 1\n",
        "    self.wealth_hist = [1]\n",
        "    self.tau = 2 * (\n",
        "        (1 + delta * np.exp(-epsilon))\n",
        "        * (1 - np.exp(-epsilon))\n",
        "        / (1 + np.exp(-epsilon))\n",
        "        + np.exp(-epsilon) * delta\n",
        "    )\n",
        "    self.online_newton_step = OnlineNewtonStep(self.tau)\n",
        "    self.online_gradient_ascent = OnlineGradientAscent()\n",
        "\n",
        "    self.bandwidth = bw\n",
        "    self.lambd = 0\n",
        "    self.payoff_history = []\n",
        "\n",
        "    # Histories of observed samples.\n",
        "    self.x_hist = []\n",
        "    self.y_hist = []\n",
        "\n",
        "  def step(self, x, y):\n",
        "    \"\"\"Performs a step in the two-sample test.\n",
        "\n",
        "    Args:\n",
        "      x: New sample from the first distribution.\n",
        "      y: New sample from the second distribution.\n",
        "\n",
        "    Returns:\n",
        "      The updated wealth value.\n",
        "    \"\"\"\n",
        "\n",
        "    # Update samples history.\n",
        "    self.x_hist.append(x)\n",
        "    self.y_hist.append(y)\n",
        "\n",
        "    mmd = self.online_gradient_ascent.step(\n",
        "        self.x_hist, self.y_hist, self.bandwidth\n",
        "    )\n",
        "    payoff = mmd - self.tau\n",
        "    self.wealth *= 1 + self.lambd * payoff\n",
        "    self.wealth_hist.append(self.wealth)\n",
        "\n",
        "    self.payoff_history.append(payoff)\n",
        "    self.lambd = self.online_newton_step.next_bet(self.payoff_history)\n",
        "\n",
        "    return self.wealth\n"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "mqRsXsG4DYXp"
      },
      "cell_type": "markdown",
      "source": [
        "# Paper experiments\n",
        "\n"
      ]
    },
    {
      "metadata": {
        "id": "oXAF7BTETUcc"
      },
      "cell_type": "markdown",
      "source": [
        "## Benchmark experiments with Gaussian distributions."
      ]
    },
    {
      "metadata": {
        "id": "uqsR73DIDX20"
      },
      "cell_type": "code",
      "source": [
        "def plot_gaussians_experiments(\n",
        "    epsilon: float,\n",
        "    delta: float,\n",
        "    location: float,\n",
        "    significance: float,\n",
        "    max_observations: int,\n",
        "    initial_seed: int,\n",
        "):\n",
        "  \"\"\"Plots wealth process for a test with one dimensional gaussians.\n",
        "\n",
        "  Args:\n",
        "    epsilon: Privacy parameter.\n",
        "    delta: Privacy parameter.\n",
        "    location: Mean of the first gaussian distribution. The second gaussian\n",
        "      distribution has mean 0.\n",
        "    significance: Significance level.\n",
        "    max_observations: Maximum number of observations to use.\n",
        "    initial_seed: Initial seed for the random number generator.\n",
        "\n",
        "  Returns:\n",
        "    A dictionary containing the rejection probability and the average number of\n",
        "    observations to reject.\n",
        "  \"\"\"\n",
        "  num_observations_to_reject = []\n",
        "\n",
        "  for i in range(10):\n",
        "    seed = initial_seed + i\n",
        "    _RNG = np.random.default_rng(seed=seed)\n",
        "    kernel_mmd_dp = OneSidedTwoSampleSequentialTest(epsilon, delta, bw=1.0)\n",
        "\n",
        "    for j in range(max_observations):\n",
        "\n",
        "      samples_x = _RNG.normal(loc=0, scale=1, size=1)\n",
        "      samples_y = _RNG.normal(loc=location, scale=1, size=1)\n",
        "\n",
        "      # Perform the sequential test\n",
        "      kernel_mmd_dp.step(samples_x, samples_y)\n",
        "\n",
        "    curve = kernel_mmd_dp.wealth_hist\n",
        "    num_observations_to_reject.append(\n",
        "        get_first_crossing_index(curve, significance)\n",
        "    )\n",
        "\n",
        "    plt.plot(np.arange(len(curve)), curve, alpha=0.3, color='k')\n",
        "\n",
        "    plt.axhline(y=1 / significance, color='r', linestyle='--')\n",
        "    plt.text(\n",
        "        max_observations + 8,\n",
        "        1 / significance + 1,\n",
        "        f'Rejection threshold $1 / \\\\alpha = {1/significance:.0f}$',\n",
        "        fontsize=14,\n",
        "        ha='right',\n",
        "    )\n",
        "    plt.xlabel('Number of observations', fontsize=16)\n",
        "    plt.ylabel('Auditing process $\\mathcal{K}_t$', fontsize=16)\n",
        "    plt.xticks(fontsize=14)\n",
        "    plt.yticks(fontsize=14)\n",
        "    plt.ylim(0, 100)\n",
        "\n",
        "  return {\n",
        "      'rejection_rate': np.mean(\n",
        "          np.array(num_observations_to_reject) < max_observations\n",
        "      ),\n",
        "      'avg_obs_to_reject': np.mean(num_observations_to_reject),\n",
        "  }"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "U5SkKP-LIEGe"
      },
      "cell_type": "code",
      "source": [
        "plot_gaussians_experiments(0, 0, location = 0,  significance=0.05, max_observations = 200, initial_seed=7)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "Ni8UCcnCeUZY"
      },
      "cell_type": "code",
      "source": [
        "plot_gaussians_experiments(0, 0, location = 1.0,  significance=0.05, max_observations = 200, initial_seed=7)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "3uvkHQNHlaZn"
      },
      "cell_type": "code",
      "source": [
        "# Power in different dimensions\n",
        "alpha = 0.05\n",
        "mean_distances = [0, 0.25, 0.5, 0.75, 1]\n",
        "dimensions = np.arange(1, 6)\n",
        "num_samples = 1000\n",
        "num_simulations = 10\n",
        "\n",
        "mean_observations = [[] for _ in range(len(mean_distances))]\n",
        "mean_proportions = [[] for _ in range(len(mean_distances))]\n",
        "ci_observations = [[] for _ in range(len(mean_distances))]\n",
        "\n",
        "for i in range(len(mean_distances)):\n",
        "  mean_distance = mean_distances[i]\n",
        "  observations = mean_observations[i]\n",
        "  proportions = mean_proportions[i]\n",
        "  ci_obs = ci_observations[i]\n",
        "\n",
        "  for dim in dimensions:\n",
        "    obs_to_reject = []\n",
        "    for ii in range(num_simulations):\n",
        "        _RNG = np.random.default_rng(seed=7 + ii)\n",
        "        kernel_mmd_dp = OneSidedTwoSampleSequentialTest(epsilon = 0, delta = 0, bw = np.sqrt(dim))\n",
        "        for jj in range(num_samples):\n",
        "            X1 =  _RNG.normal(loc=0,scale=1, size=dim)\n",
        "            X2 =  _RNG.normal(loc=mean_distance/np.sqrt(dim),scale=1, size=dim)\n",
        "\n",
        "            # Perform the sequential test\n",
        "            kernel_mmd_dp.step(X1, X2)\n",
        "\n",
        "        curve = kernel_mmd_dp.wealth_hist\n",
        "        obs_to_reject.append(get_first_crossing_index(curve, alpha))\n",
        "    observations.append(np.mean(obs_to_reject))\n",
        "    proportions.append(np.mean(np.array(obs_to_reject) < num_samples))\n",
        "    ci_obs.append(1.96*np.std(obs_to_reject)/np.sqrt(num_simulations))"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "N5X2tQOhnPO0"
      },
      "cell_type": "code",
      "source": [
        "markers = ['o', 's', 'd', '^', '*', 'x']\n",
        "\n",
        "for i in range(len(mean_distances)):\n",
        "  plt.errorbar(dimensions, mean_observations[i], yerr=ci_observations[i],\n",
        "                 marker=markers[i], capsize=5, capthick=1,\n",
        "                 label=f\"$||\\mu||_2 = {mean_distances[i]}$\")\n",
        "\n",
        "plt.xticks(range(1,6), range(1,6), fontsize = 14); plt.yticks(fontsize = 14)\n",
        "plt.xlabel('Dimension of $\\mathcal{N}(\\mu, I_d)$ distribution', fontsize = 16)\n",
        "plt.ylabel('Mean size to reject', fontsize = 16)\n",
        "plt.grid(True, linestyle='--', alpha=0.4)\n",
        "plt.legend(loc='right', bbox_to_anchor=(1, 0.65), fontsize = 14, framealpha=0.5)\n",
        "plt.show()"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "L1y2zIL3nSK_"
      },
      "cell_type": "code",
      "source": [
        "for i in range(len(mean_distances)):\n",
        "  ci_props = [1.96*np.sqrt(p*(1 - p)/ num_simulations) for p in mean_proportions[i]]\n",
        "  plt.errorbar(dimensions, mean_proportions[i], yerr=ci_props,\n",
        "                 marker=markers[i], capsize=5, capthick=1,\n",
        "                 label=f\"$||\\mu||_2 = {mean_distances[i]}$\")\n",
        "\n",
        "plt.xticks(range(1,6), range(1,6), fontsize = 14); plt.yticks(fontsize = 14)\n",
        "plt.xlabel(r'Dimension of $\\mathcal{N}(\\mu, I_d)$ distribution', fontsize=16)\n",
        "plt.ylabel('Rejection rate', fontsize = 16)\n",
        "plt.grid(True, linestyle='--', alpha=0.4)\n",
        "plt.legend(loc='right', bbox_to_anchor=(1, 0.6), fontsize = 14)\n",
        "plt.axhline(y=alpha, color='k', linestyle='--', alpha = 0.6)\n",
        "plt.text(4.8, 0.065, r'$\\alpha = 0.05$', fontsize=13, ha='center')\n",
        "plt.show()"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "1zAHWjESWL9p"
      },
      "cell_type": "markdown",
      "source": [
        "## Sequential Test for mean mechanism\n",
        "\n",
        "Please install the dp_auditorium library, following instructions in [their repository](https://github.com/google/differential-privacy/tree/main/python/dp_auditorium)"
      ]
    },
    {
      "metadata": {
        "id": "Ai_8bsXaWLrT"
      },
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "from dp_auditorium.mechanisms import mean\n",
        "from dp_auditorium.mechanisms import mechanisms_utils"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "q4dlOePPWLgD"
      },
      "cell_type": "code",
      "source": [
        "#sequential test for mean mechanisms\n",
        "def mean_mechanisms_experiments(mechanisms_test, epsilon, delta, alpha = 0.05,\n",
        "                                max_observations = 500, bw_obs = 20, num_experiments = 20, show_plot = True):\n",
        "\n",
        "  results = {}\n",
        "  labels = {0: {\"private_mean\": \"DPLaplace\", \"non_private_mean_v1\": \"NonDPLaplace1\", \"non_private_mean_v2\": \"NonDPLaplace2\"},\n",
        "            1e-5: {\"private_mean\": \"DPGaussian\", \"non_private_mean_v1\": \"NonDPGaussian1\", \"non_private_mean_v2\": \"NonDPGaussian2\"}}\n",
        "\n",
        "  for mechanism_name in mechanisms_test:\n",
        "    config = mechanisms_utils.default_mean_mechanism_config_generator(mechanism_name, epsilon=epsilon, delta=delta)\n",
        "    av_curve = np.zeros(max_observations+1)\n",
        "    obs_to_reject = []\n",
        "\n",
        "    for i in range(num_experiments):\n",
        "      _SEED = 7+i\n",
        "      _RNG = np.random.default_rng(seed=_SEED)\n",
        "      mechanism = mean.MeanMechanism(config, _RNG)\n",
        "\n",
        "      bw_samples1 = mechanism(np.array([0]), bw_obs)\n",
        "      bw_samples2 = mechanism(np.array([0, 1]), bw_obs)\n",
        "      med = np.median(pdist(np.concatenate((bw_samples1, bw_samples2), axis=0)))\n",
        "\n",
        "      kernel = OneSidedTwoSampleSequentialTest(epsilon = epsilon, delta = delta, bw = med)\n",
        "\n",
        "      for _ in range(max_observations):\n",
        "          X1 = mechanism(np.array([0]), 1)[0]\n",
        "          X2 = mechanism(np.array([0, 1]), 1)[0]\n",
        "          kernel.step(X1, X2)\n",
        "\n",
        "      curve = kernel.wealth_hist\n",
        "      obs_to_reject.append(get_first_crossing_index(curve, alpha))\n",
        "      av_curve += np.array(curve)/num_experiments\n",
        "\n",
        "    if show_plot:\n",
        "      plt.plot(np.arange(len(av_curve)), av_curve, label = labels[delta][mechanism_name])\n",
        "\n",
        "    # Calculate average crossing time\n",
        "    p = np.mean(np.array(obs_to_reject) < max_observations)\n",
        "    results[mechanism_name] = {'reject_prop': p,\n",
        "                               'ci_reject_prop': np.sqrt(p*(1-p)/num_experiments),\n",
        "                               'avg_obs_to_reject': np.mean(obs_to_reject),\n",
        "                               'ci_obs_to_reject': np.std(obs_to_reject)/np.sqrt(num_experiments)}\n",
        "\n",
        "  if show_plot:\n",
        "    plt.axhline(y=1/alpha, color='r', linestyle='--')\n",
        "    plt.text(max_observations+8, 1/alpha + 1, f\"Rejection threshold $1 / \\\\alpha = {1/alpha:.0f}$\", fontsize=14, ha='right')\n",
        "    plt.xlabel('Number of observations', fontsize = 16); plt.ylabel('Wealth process', fontsize = 16)\n",
        "    plt.legend(loc = 'upper right', fontsize = 14);\n",
        "    plt.ylim(0, 100); plt.xticks(fontsize = 14); plt.yticks(fontsize = 14)\n",
        "    plt.show()\n",
        "\n",
        "  return(results)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "vKelURbzXedv"
      },
      "cell_type": "code",
      "source": [
        "mechanisms_test = (\"private_mean\", \"non_private_mean_v1\", \"non_private_mean_v2\")"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "-KTvGQOrXaO6"
      },
      "cell_type": "code",
      "source": [
        "test_gaussian_high_privacy = mean_mechanisms_experiments(mechanisms_test=mechanisms_test, epsilon=0.01, delta = 1e-5, max_observations=2000)\n",
        "pd.DataFrame(test_gaussian_high_privacy)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "puLBpNl7YlBp"
      },
      "cell_type": "code",
      "source": [
        "test_gaussian_low_privacy = mean_mechanisms_experiments(mechanisms_test=mechanisms_test, epsilon=0.1, delta = 1e-5, max_observations=5000)\n",
        "pd.DataFrame(test_gaussian_low_privacy)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "0Bp_RL_GYk4W"
      },
      "cell_type": "code",
      "source": [
        "test_laplace_high_privacy = mean_mechanisms_experiments(mechanisms_test=mechanisms_test, epsilon=0.01, delta = 0, max_observations=1000)\n",
        "pd.DataFrame(test_laplace_high_privacy)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "l9cUkrMzSut6"
      },
      "cell_type": "code",
      "source": [
        "test_laplace_low_privacy = mean_mechanisms_experiments(mechanisms_test=mechanisms_test, epsilon=0.1, delta = 0, max_observations=5000)\n",
        "pd.DataFrame(test_laplace_low_privacy)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "HDo_C6VWMHzR"
      },
      "cell_type": "markdown",
      "source": [
        "## DP-SGD Sequential Test\n",
        "\n",
        "For these experiments:\n",
        "\n",
        "1. From the root directory of the cloned repository, run [`python run_experiment_jaxline.py --config=mnist_audit.py`](https://github.com/google-deepmind/jax_privacy/blob/main/experiments/image_classification/run_experiment_jaxline.py)\n",
        "2.   The config file can be found [here](https://github.com/google-deepmind/jax_privacy/blob/main/experiments/image_classification/configs/mnist_audit.py). Please update the epsilon value (line 64) to your desired target for the audit. The noise added will per iteration will be adjusted to meet this epsilon over the number of iterations.\n",
        "3. Logged train metrics will have a \"`canary_count`\" field and a `dot_product` field. To form the two distributions for analysis:\n",
        "  - The first distribution consists of `dot_product` values where `canary_count=0`.\n",
        "   - The second distribution consists of `dot_product` values where `canary_count=1`.\n",
        "\n"
      ]
    },
    {
      "metadata": {
        "id": "A_gs0bYgAfTL"
      },
      "cell_type": "code",
      "source": [
        "def empirical_epsilon(samples1, samples2, test_epsilons):\n",
        "\n",
        "  alpha = 0.05; delta = 1e-5; bw_obs = 20\n",
        "  lower_bound = []\n",
        "  med = np.median(pdist(np.concatenate((np.array(samples1[:bw_obs]).reshape(-1, 1),\n",
        "                                        np.array(samples2[:bw_obs]).reshape(-1, 1)), axis=0)))\n",
        "\n",
        "  n = min(len(samples1), len(samples2))\n",
        "  testers = {eps: OneSidedTwoSampleSequentialTest(epsilon = eps, delta = delta, bw = med) for eps in test_epsilons}\n",
        "\n",
        "  for jj in range(bw_obs, n):\n",
        "    lb = 0\n",
        "    X1 = samples1[jj]\n",
        "    X2 = samples2[jj]\n",
        "\n",
        "    for eps in test_epsilons:\n",
        "      testers[eps].step(X1, X2)\n",
        "\n",
        "    rejected_eps = [eps for eps in test_epsilons if testers[eps].wealth_hist[-1] > 1/alpha]\n",
        "    if len(rejected_eps) > 0:\n",
        "      lb = np.max(rejected_eps)\n",
        "\n",
        "    lower_bound.append(lb)\n",
        "\n",
        "  return lower_bound"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {
        "id": "BO-bV0gMaNft"
      },
      "cell_type": "code",
      "source": [
        "# load file with samples.\n",
        "# We use two gaussians here.\n",
        "samples1 = np.random.normal(size=1000)\n",
        "samples2 = np.random.normal(mean=1.0, size=1000)\n",
        "\n",
        "n = min(len(samples1), len(samples2))\n",
        "test_epsilons = np.arange(0.001, 0.02, 0.002)\n",
        "\n",
        "lower_bound_001 = empirical_epsilon(samples1, samples2, test_epsilons)\n",
        "\n",
        "plt.plot(np.arange(len(lower_bound_001)), lower_bound_001, alpha=0.4, color='blue')\n",
        "\n",
        "plt.xlabel('Number of observations', fontsize = 18)\n",
        "plt.ylabel('Empirical lower bound $\\epsilon$', fontsize = 18)\n",
        "plt.axhline(y=0.01, color='r', linestyle='--')\n",
        "plt.text(0, 0.05, \"Theoretical $\\epsilon^{ub} = 0.01$\", fontsize=15, ha='left')\n",
        "plt.ylim(-0.09, 1.09); plt.xticks(fontsize = 14); plt.yticks(fontsize = 14)\n",
        "plt.show()"
      ],
      "outputs": [],
      "execution_count": null
    }
  ],
  "metadata": {
    "colab": {
      "private_outputs": true,
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}