#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.

"""
Command-line script for computing privacy of a model trained with DP-SGD.
The script applies the RDP accountant to estimate privacy budget of an iterated
Sampled Gaussian Mechanism.

The code is mainly based on Google's TF Privacy:
https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py


Example
-------

To call this script from command line, you can enter:

>>>  python compute_dp_sgd_privacy.py --dataset-size=60000 --batch-size=256 --noise_multiplier=1.12 --epochs=60 --delta=1e-5 --a 10 20 100

The training process with these parameters satisfies (epsilon,delta)-DP of (2.95, 1e-5).
"""
import argparse
import math
from typing import List, Tuple

from opacus import privacy_analysis as tf_privacy


def _apply_dp_sgd_analysis(
    sample_rate: float,
    noise_multiplier: float,
    steps: int,
    alphas: List[float],
    delta: float,
    verbose: bool = True,
) -> Tuple[float, float]:
    """
    Performs the DP-SGD privacy analysis based on sample rate and steps.

    Performs the DP-SGD privacy analysis and computes privacy loss epsilon
    and optimal order alpha.

    Parameters
    ----------
    sample_rate : float
        The sample rate in SGD
    noise_multiplier : float
        The ratio of the standard deviation of the Gaussian noise to
        the L2-sensitivity of the function to which the noise is added
    steps : int
        The number of steps
    alphas : List[float]
        A list of RDP orders
    delta : float
        Target delta
    verbose : bool, optional
        If enabled, will print the results of DP-SGD analysis; by default True

    Returns
    -------
    Tuple[float, float]
        Pair of privacy loss epsilon and optimal order alpha
    """
    rdp = tf_privacy.compute_rdp(sample_rate, noise_multiplier, steps, alphas)
    eps, opt_alpha = tf_privacy.get_privacy_spent(alphas, rdp, delta=delta)

    if verbose:
        print(
            f"DP-SGD with\n\tsampling rate = {100 * sample_rate:.3g}%,"
            f"\n\tnoise_multiplier = {noise_multiplier},"
            f"\n\titerated over {steps} steps,\nsatisfies "
            f"differential privacy with\n\tepsilon = {eps:.3g},"
            f"\n\tdelta = {delta}."
            f"\nThe optimal alpha is {opt_alpha}."
        )

        if opt_alpha == max(alphas) or opt_alpha == min(alphas):
            print(
                "The privacy estimate is likely to be improved by expanding "
                "the set of alpha orders."
            )
    return eps, opt_alpha


def compute_dp_sgd_privacy(
    sample_size: int,
    batch_size: int,
    noise_multiplier: float,
    epochs: int,
    delta: float,
    alphas: List[float],
    verbose: bool = True,
) -> Tuple[float, float]:
    """
    Performs the DP-SGD privacy analysis.

    Finds sample rate and number of steps based on the input parameters, and calls
    DP-SGD privacy analysis to find the privacy loss epsilon and optimal order alpha.

    Parameters
    ----------
    sample_size : int
        The size of the sample (dataset)
    batch_size : int
        Batch size
    noise_multiplier : float
        The ratio of the standard deviation of the Gaussian noise to
        the L2-sensitivity of the function to which the noise is added
    epochs : int
        Number of epochs
    delta : float
        Target delta.
    alphas : List[float]
        A list of RDP orders
    verbose : bool, optional
         If enabled, will print the results of DP-SGD analysis; by default True

    Returns
    -------
    Tuple[float, float]
        Pair of privacy loss epsilon and optimal order alpha

    Raises
    ------
    ValueError
        When batch size is greater than sample size
    """
    sample_rate = batch_size / sample_size
    if sample_rate > 1:
        raise ValueError("sample_size must be larger than the batch size.")
    steps = epochs * math.ceil(sample_size / batch_size)

    return _apply_dp_sgd_analysis(
        sample_rate, noise_multiplier, steps, alphas, delta, verbose
    )


def main():
    parser = argparse.ArgumentParser(description="RDP computation")
    parser.add_argument(
        "-s",
        "--dataset-size",
        type=int,
        default=60000,
        help="Training dataset size (default: 60_000)",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=256,
        help="Input batch size (default: 256)",
    )
    parser.add_argument(
        "-n",
        "--noise_multiplier",
        type=float,
        default=1.12,
        help="Noise multiplier (default: 1.12)",
    )
    parser.add_argument(
        "-e",
        "--epochs",
        type=int,
        default=60,
        help="Number of epochs to train (default: 60)",
    )
    parser.add_argument(
        "-d", "--delta", type=float, default=1e-5, help="Targeted delta (default: 1e-5)"
    )
    parser.add_argument(
        "-a",
        "--alphas",
        action="store",
        dest="alphas",
        type=float,
        nargs="+",
        default=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
        help="List of alpha values (alpha orders of Renyi-DP evaluation). "
        "A default list is provided. Else, space separated numbers. E.g.,"
        "-a 10 100",
    )

    args = parser.parse_args()

    compute_dp_sgd_privacy(
        args.dataset_size,
        args.batch_size,
        args.noise_multiplier,
        args.epochs,
        args.delta,
        args.alphas,
    )


if __name__ == "__main__":
    main()
