rm(list=ls())
timestart<-Sys.time()
library(MASS)

gendata <- function(n, sigma_x, sigma_e, beta_0){
  #beta_0: coefficients contain intercept term
  #n: sample size
  #sigma_x: variance for x
  #sigma_e: variance for error term
  p <- length(beta_0) - 1
  x <- mvrnorm(n, rep(0,p), sigma_x*diag(1,p))
  if(n == 1){
    z <- c(1, x)
    y <- as.numeric(z%*%beta_0) + rnorm(1, 0, sigma_e)
  }else{
    z <- cbind(1, x)
    y <- z%*%beta_0 + rnorm(n, 0, sigma_e)
  }
  dat <- list(x = x, z = z, y = y)
  return(dat)
}


# auxiliary functions
# first derivative for huber loss
psiHuber <- function(u, k) 
{
  return(u*pmin(1, k/abs(u)))
} 

############ Mallows weights ##############
####### min(1, 2/||x_i||_2^2) ############# 
###########################################
weightfn <- function(x,max.norm=sqrt(2)){
  return(min(1,(max.norm/norm(x,type="2"))^2))
}


Fisher.constant <- function(k){ # computes Fisher consistency constant for Huber's scale estimator under normality
  fct.Huber <- function(r) # central part
  {r^2*exp(-r^2/2)/(sqrt(2*pi))}    
  beta1 <- integrate(fct.Huber, lower=-k, upper=k)$value
  beta2 <- k^2*(pnorm(-k)+1-pnorm(k))
  beta <- beta1 + beta2
  return(beta)
}

Noisysgd <- function(n, x, y, k=1.345, fisher_beta=0.7101645, scale, private, mu, s0, beta0, stepsize)
 {
## (x, y): one data point: 
## x: covariate that does not contain intercept term
## y: response variable
## n: sample size, if we use sgd, n=1
## k: truncation parameter
## scale: whether estimate sigma_e for error term
## private: whether add privacy
## mu: privacy budget
## s0: variance for error term
## bta0: coefficients.
   p <- length(x)
   if(k!=1.345){fisher_beta <- Fisher.constant(k)}
   # gets updated further down if private = T
   ##########################################
   grad_noise <- 0 
   priv_grad_traj <- NA
   if(n == 1){
    z <- c(1, x)
    r <- (y - as.numeric(z%*%beta0))/s0
   }else{
    z <- cbind(1, x)
    r <- (y - z%*%beta0)/s0 
   }
   weightfun <- weightfn(z, max.norm = sqrt(2))
   psi.vec <- psiHuber(r,k)
   ##divide by 2 so psi and chi come from same objective function
   ##objective function 
   sum.chi <- mean(((psiHuber(r,k)^2)-fisher_beta)*weightfun)/2 
  # Location estimation with known scale
   if(scale == F){ 
    if(private == T){     
     grad_noise <- 2*sqrt(2)*k/mu
    }
    s <- s0
    noisy_grad <- psi.vec*weightfun*z + grad_noise*rnorm(p+1)
    #tracks the evolution of the noisy gradient (in L2 norm)
    priv_grad_traj <- sqrt(sum(noisy_grad^2)) 
 # this performs stochastic gradient descent to estimate beta
 ###############################################
    beta <- beta0 + stepsize*noisy_grad  
 }
 ###Joint location and scale estimation
  
  if(scale == T){
    theta0 <- c(beta0, s0)
    GS_sigmagrad <- min((k^2)/2,(k^2-fisher_beta))
    GS_grad <- sqrt(8*k^2 + GS_sigmagrad^2)
    if(private == T){
      grad_noise <- GS_grad/mu
    }
  #  noisy_grad <- c(colMeans(psi.vec*weightfun*z), sum.chi) + grad_noise*rnorm(p+2)
    noisy_grad <- c((psi.vec*weightfun*z), sum.chi) + grad_noise*rnorm(p+2)
    priv_grad_traj <- sqrt(sum(noisy_grad^2))
 # this performs stochastic gradient descent to estimate beta
 ###############################################
    theta <- theta0 + stepsize*noisy_grad
    beta <- theta[1:(p+1)]
    s <- theta[(p+2)]   
  }
   out <- NULL
   out$beta <- beta
   out$s <- s
   out$grad_traj <- priv_grad_traj
   return(out)
}



# ########################
# ########################
# ##### run ##############
# ##### true value #######
# beta_true <- c(1,1,1,1)
# sigma_x <- 2
# sigma_e <- 2
# MAX <- 1
# n <- 50000
# mu <- 1
# ##### null value #######
# p <- length(beta_true) - 1
# beta_all <- array(0, dim = c((p+1), n, MAX))
# s_all <- array(0, dim = c(n, MAX))

# beta <- array(0, dim = c(p+1, MAX))
# s <- rep(0, MAX)
# beta_bar <- array(0, dim = c(p+1, MAX))
# s_bar <- rep(0, MAX)
# ##### initial value ####
# beta_0 <- rep(0, p+1)
# s0 <- 1
# stepsize <- n^(-1/2)
# n_drop <- 0

# for(tt in 1:MAX){
#  set.seed(20230907 + tt)
#  for(i in 1:n){
#         data <- gendata(1, sigma_x, sigma_e, beta_true)
#         x <- data$x
#         y <- data$y
     
     
#         ####### calculate noisy sgd ########
#         Noisysgd_out <- Noisysgd(1, x, y, k=1.345, fisher_beta=0.7101645, scale=T, private=T, mu, s0, beta_0, stepsize)
#         beta_all[, i, tt] <- Noisysgd_out$beta
#         s_all[i, tt] <- Noisysgd_out$s
#         ####### iterate ####################
#         beta_0 <- beta_all[, i, tt] 
#         s0 <- s_all[i, tt]
#         if(i>n_drop){
#           s_bar[tt] <- (i-n_drop-1)*s_bar[tt]/(i-n_drop) + s_all[i, tt]/(i-n_drop)
#           beta_bar[,tt] <- (i-n_drop-1)*beta_bar[,tt]/(i-n_drop) + beta_all[, i, tt]/(i-n_drop)
#         }

#       }
#   beta[, tt] <- beta_all[, n, tt]
#   s[tt] <- s_all[n, tt] 
#   print(tt)
# }






