library(ggplot2)
library(ExtDist)
library(dplyr)
library(tidyr)
library(latex2exp)

# Figure 1: MCMC results

# seed set by date of original submission
set.seed(210522)

mcmc_unif_log10delta = function(n, ep, m, d) {
  #' delta cost for MCMC implementation with uniform proposals
  #' @param n sample size
  #' @param ep epsilon 
  #' @param m chain length
  #' @param d dimension
  b = (2 * d / (n * ep) * (1 - exp(-n * ep / (2 * d))))**d
  res = m * log10(1 - b) + log10(1 + exp(ep))
  ifelse(res >= -308, res, -308)
}

mur = expand.grid(
  ns=c(100, 200, 500),
  eps=c(.01, .02, .05, .1),
  ms=10**(2:5),
  ds=c(1)
)

mur$del = mcmc_unif_log10delta(mur$ns, mur$eps, mur$ms, mur$ds)
mur$nlab = paste("n=", mur$ns, sep="")
mur$dlab = paste("d=", mur$ds, sep="")
mur$epsilon = factor(mur$eps)

ggplot(mur) + 
  geom_point(aes(log10(ms), del, color=epsilon)) +
  geom_line(aes(log10(ms), del, color=epsilon)) + 
  geom_hline(yintercept=-308, linetype="dashed", alpha=.5) +
  facet_grid(cols=vars(nlab)) + 
  ylim(-309, 1) + 
  xlab("log10(ChainLength)") + 
  ylab("log10(Delta)")


mcmc_lap_log10delta = function(n, ep, m, d, a=1) {
  #' delta cost for MCMC implementation with Laplace proposals
  #' @param n sample size
  #' @param ep epsilon 
  #' @param m chain length
  #' @param d dimension
  #' @param a alpha scale factor 
  b = (2 * a)**d * exp(- (a*d + ep * n / 2)) * 
    (1 / a * (1 - exp(-a)))**d
  res = m * log10(1 - b) * log10(1 + exp(ep))
  ifelse(res >= -308, res, -308)
}

mlr = expand.grid(
  ns=c(100, 200, 500),
  eps=c(.01, .02, .05, .1),
  ms=10**(2:15),
  ds=c(1),
  afs=c(1)
)
mlr$as = mlr$ns * mlr$eps / (2 * mlr$ds) * mlr$afs
mlr$del = mcmc_lap_log10delta(mlr$ns, mlr$eps, mlr$ms, mlr$ds, mlr$as)
mlr$nlab = paste("n=", mlr$ns, sep="")
mlr$dlab = paste("d=", mlr$ds, sep="")
mlr$epsilon = factor(mlr$eps)

ggplot(mlr) + 
  geom_point(aes(log10(ms), del, color=epsilon)) +
  geom_line(aes(log10(ms), del, color=epsilon)) + 
  geom_hline(yintercept=-308, linetype="dashed", alpha=.5) +
  facet_grid(cols=vars(nlab)) + 
  ylim(-309, 1) + 
  xlab("log10(ChainLength)") + 
  ylab("log10(Delta)")

# Figure 2: perfect sampling results

sim_dlap = function(n, ep, d, x) { 
  #' draws one sample from N_prop using Laplace proposals
  #' @param n sample size
  #' @param ep epsilon 
  #' @param d dimension 
  #' @param x confidential mean value
  n_tot = 0
  outer_accept = 0
  while (outer_accept == 0) {
    outr = rgeom(1, 1/4) + 1
    y_curr = x
    for (ix in 1:outr) {
      # bernoulli draw
      bfn = rbinom(1, 1, 2/3) 
      # if sampling from remainder
      if (bfn == 1) { 
        # sample from chain
        accept = 0
        while (accept == 0) { 
          n_tot = n_tot + 1
          prop_pert = rLaplace(d, b=ep*n/2)
          y_prop = y_curr + prop_pert 
          if (all(y_prop <= 1 & y_prop >= 0)) {
            log_p_acc = sum(dLaplace(y_prop, x, ep*n/2, log=TRUE) - 
                              dLaplace(y_curr, x, ep*n/2, log=TRUE))
            if (rbinom(1, 1, min(exp(log_p_acc), 1)) * rbinom(1, 1, 1/2) == 1) {
              y_curr = y_prop
              accept = 1
            }
          }
        }
      } else {
        # regenerate
        n_tot = n_tot + 1
        y_curr = x
      }
    }
    if (y_curr != x) { 
      outer_accept = 1
    }
  }
  n_tot
}

# Figure 2: distribution of N_prop

# seed set by date of revisions with reviewers
set.seed(20211015)
ep_vs = c(.01, .02, .05, .1)
n_vs = c(100, 200, 500)
x_vs = runif(10000)

# Experimental runtimes
exp_vs = expand.grid(n_vs, ep_vs, x_vs)
exp_out_lap = mapply(
  function(n, ep, x) { sim_dlap(n, ep, 1, x) }, 
  exp_vs$Var1, exp_vs$Var2, exp_vs$Var3
)

exp_res_lap = cbind(exp_vs, exp_out_lap)
names(exp_res_lap) <- c("n", "epsilon", "x", "n_prop")
exp_res_lap$prop_type = "Laplace proposals"

exp_res_lap$`X ~ Beta(.1,.1)` = dbeta(exp_res_lap$x, .1, .1)
exp_res_lap$`X ~ Beta(1,1)`= dbeta(exp_res_lap$x, 1., 1.)
exp_res_lap$`X ~ Beta(10,10)` = dbeta(exp_res_lap$x, 10, 10)

exp_res = pivot_longer(exp_res_lap, c("X ~ Beta(.1,.1)", "X ~ Beta(1,1)", "X ~ Beta(10,10)"),
                       names_to="betas",
                       values_to="weight")
exp_res$n = paste("n=", exp_res$n, sep="")


ggplot(exp_res) + 
  geom_violin(mapping=aes(x=factor(epsilon), y=log10(n_prop), weight=weight, color=betas),
              draw_quantiles=c(.5)) + 
  scale_y_continuous(name = "log10(Nprop)", labels = scales::math_format(10^.x)) + 
  facet_grid(~ n) + 
  annotation_logticks(side="l") + 
  xlab("Epsilon")

# Figure 3: discrete approximation results 

# seed set by date of original submission
set.seed(210522)
d = 5
n = 100
xs = matrix(rbeta(n*d, 5, 5), nrow=n)
bs = c(.1, .2, -.3, 0, 0)
ys = xs %*% bs + rbeta(n, 20, 20) * 2 - 1
lam = 1
sens = 8*d
A = t(xs) %*% xs + diag(d) * lam
B = as.vector(t(ys) %*% xs)

# simulate within L_1 ball of radius 1

sim_params = expand.grid(
  nsim=10**(1:4), 
  nrep=1:1000,
  eps=c(.1, 1, 10)
)

tveps = seq(5, 125, 5)

ci_discrete = function(nsim, eps) {
  tps = matrix(rLaplace(nsim*d), ncol=d)
  tps = sweep(tps, 1, rexp(nsim) + rowSums(abs(tps)), FUN="/")
  l1ns = rowSums(abs(sweep(tps %*% A, 1, B)))
  tpmf = exp(-eps/sens * l1ns) / sum(exp(-eps/sens * l1ns))
  tdf = data.frame(
    l1=l1ns,
    pmf=tpmf
  )
  sapply(tveps, function(v) { sum(tdf[tdf$l1 <= v, "pmf"]) })
}

ci_res = mapply(
  ci_discrete,
  nsim=sim_params$nsim,
  eps=sim_params$eps
)

ci_vs = data.frame(t(ci_res))
names(ci_vs) = paste("v", 1:25, sep="")
ci_res_g = ci_vs %>%
  bind_cols(sim_params) %>%
  group_by(nsim, eps) %>%
  summarise_all(quantile, probs=.05) %>%
  pivot_longer(paste("v", 1:25, sep="")) %>%
  merge(data.frame(name=paste("v", 1:25, sep=""),
                   tveps=tveps),
        by="name")

ci_res_g$epsf = paste("epsilon=", ci_res_g$eps, sep="")
ci_res_g$ell = factor(ci_res_g$nsim)
ggplot(ci_res_g) + 
  geom_line(aes(tveps/n, value, color=ell)) +
  facet_grid(~ epsf) + 
  ylab("Err") + 
  xlab(expression(epsilon*"/n"))


