library(MASS)
library(ncvreg)
library(glmnet)
library(Matrix) # block diagonal matrix
library(SIS)
library(scalreg)
library(parallel)
library(hdi)

ncores <- 8
SIM_global <- function(n=200, p=400, sparsity=10, beta.type=1, 
                       delta0=1, delta1=0.1, delta2=2, 
                       x.type=1, rho=0.1, err.type=1, model=1, 
                       outlier.prop=0, outlier.multi=10,
                       penalty="lasso", nfolds=10, alpha=0.05, 
                       method="proposed", loops=3, M=500) {
  
  # n=200; p=400; sparsity=10; beta.type=1;
  # delta0=1; delta1=0.1; delta2=1;
  # x.type=2; rho=0.1; err.type=1; model=1;
  # outlier.prop=0; outlier.multi=10;
  # penalty="lasso"; nfolds=10; alpha=0.05;
  # method="proposed"; loops=3;
  
  ## generate data
  # beta.true
  if(beta.type == 1) {
    beta.true <- c(rep(delta0, sparsity), rep(0, p-sparsity))
  }
  if(beta.type == 2) {
    beta.true <- c(seq(delta1, delta2, length.out=sparsity), rep(0, p-sparsity))
  }
  
  # x
  genedata <- function(i) {
    y <- Inf
    while (length(which(y == Inf)) != 0) {
      if(x.type == 1) {
        p.sub <- p/10
        list.temp <- NULL
        for (ii in 1:10) {
          list.temp[[ii]] <- toeplitz((0.1*ii-0.1)^seq(0, p.sub-1))
        }
        Sig <- as.matrix(bdiag(list.temp))
        x <- mvrnorm(n, mu=rep(0, p), Sigma=Sig)
      }
      if(x.type == 2) {
        p.sub <- p/10
        list.temp <- NULL
        for (ii in 1:10) {
          list.temp[[ii]] <- toeplitz((0.1*ii-0.1)^seq(0, p.sub-1))
        }
        Sig <- as.matrix(bdiag(list.temp))
        for (i in c(1:sparsity, (sparsity+1):(sparsity+5))) {
          for (j in c(1:sparsity, (sparsity+1):(sparsity+5))) {
            Sig[i,j] <- 0.8
          }
        }
        diag(Sig) <- 1
        x <- mvrnorm(n, mu=rep(0, p), Sigma=Sig)
      }
      
      # error
      if(err.type == 1) { error <- rnorm(n) }
      if(err.type == 2) { error <- rt(n, 1) }
      
      # linear model
      if(model == 1) { y <- x%*%beta.true + 1*error } 
      # nonlinear model
      if(model == 2) { 
        y <- exp(x%*%beta.true + error)
      }
      
      # add outliers
      outlier.pos <- sample(1:n, n*outlier.prop)
      y[outlier.pos] <- y[outlier.pos] + outlier.multi*max(y)
      Fy <- (rank(y))/length(y)
    }
    return(list(x=x, y=y, Fy=Fy))
  }
  
  ## penalized estimator
  fit_func <- function(penalty, est="theta") {
    if(est == "theta") {
      model.x <- cv.ncvreg(z, xj, penalty=penalty, nfolds=nfolds)
      lambda.x <- model.x$lambda.min
      model.x <- ncvfit(z, xj, penalty=penalty, lambda=lambda.x)
      theta.hat <- as.numeric(model.x$beta)
      
      return(theta.hat)
    }
    if(est == "beta") {
      model.Fy <- cv.ncvreg(x, Fy-1/2, penalty=penalty, nfolds=nfolds)
      lambda.Fy <- model.Fy$lambda.min
      model.Fy <- ncvfit(x, Fy-1/2, penalty=penalty, lambda=lambda.Fy)
      beta.hat <- as.numeric(model.Fy$beta)
      
      return(beta.hat)
    }
    if(est == "beta.y") {
      model.y <- cv.ncvreg(x, y, penalty=penalty, nfolds=nfolds)
      lambda.y <- model.y$lambda.min
      model.y <- ncvfit(x, y, penalty=penalty, lambda=lambda.y)
      beta.y.hat <- as.numeric(model.y$beta)
      
      return(beta.y.hat)
    }
  }
  
  if(method == "proposed") {
    temp <- mclapply(1:loops, function(i) {
      cat(i, "\r")
      set.seed(proc.time()[1]*1000 + i)
      outdata <- genedata(i)
      x <<- outdata$x
      y <<- outdata$y
      Fy <<- outdata$Fy
      
      beta.hat <- fit_func(penalty, est="beta")
      interest <- c(ceiling(2*sparsity/5):sparsity, (sparsity+4):(sparsity+53), (p-149):p)
      stat.record <- c(); M.G <- matrix(NA, M, length(interest))
      for (l in 1:length(interest)) {
        j <- interest[l]
        xj <<- x[,j]
        z <<- x[,-j]
        theta.hat <- fit_func(penalty, est="theta")
        gamma.hat <- beta.hat[-j]
        e.hat <- Fy-1/2-z%*%gamma.hat
        my.hat.func <- function(yi) {
          1/n * sum((xj-z%*%theta.hat)*(ifelse(y>=yi, 1, 0)-Fy))
        }
        stat.T <- 1/sqrt(n) * sum(e.hat*(xj-z%*%theta.hat))
        my.hat <- c()
        for (k in 1:n) { my.hat[k] <- my.hat.func(y[k]) }
        Sig.hat <- 1/n * sum(((xj-z%*%theta.hat)*e.hat + my.hat)^2)
        
        stat.S <- stat.T/sqrt(Sig.hat)
        stat.record[l] <- stat.S
        
        ## Gaussian approximate
        hj.hat_func <- function(o) {
          hj.hat.oo <- c(); k.oo <- 1;
          for (oo in 1:n) {
            if(oo != o) {
              hj.hat.oo[k.oo] <- 1/2*((ifelse(y[oo]<=y[o], 1, 0)-1/2-z[o,]%*%gamma.hat)*(x[o,j]-z[o,]%*%theta.hat) +
                                        (ifelse(y[o]<=y[oo], 1, 0)-1/2-x[oo,-j]%*%gamma.hat)*(x[oo,j]-x[oo,-j]%*%theta.hat))
              k.oo <- k.oo + 1
            }
          }
          hj.hat.o <- sum(hj.hat.oo)
          return(hj.hat.o)
        }
        hj.hat <- c()
        for (o in 1:n) {hj.hat[o] <- hj.hat_func(o)}
        
        for (m in 1:M) {
          set.seed(m+0417)
          e <- rnorm(n)
          M.G[m,l] <- (2/(sqrt(Sig.hat)*sqrt(n))*sum(((1/(n-1))*hj.hat-(1/sqrt(n))*stat.T)*e))^2
        }
      }
      
      G1 <- c(5:6, 200:204); T1.inf <- max(stat.record[G1]^2)
      G2 <- c(1:4, G1); T2.inf <- max(stat.record[G2]^2)
      G4 <- c(5:204); T4.inf <- max(stat.record[G4]^2)
      G5 <- 1:204; T5.inf <- max(stat.record[G5]^2)
      
      stat.boot1 <- apply(M.G[,G1], 1, max)
      stat.boot2 <- apply(M.G[,G2], 1, max)
      stat.boot4 <- apply(M.G[,G4], 1, max)
      stat.boot5 <- apply(M.G[,G5], 1, max)
      
      reject.boot <- c(T1.inf>=quantile(stat.boot1, 1-alpha),
                       T2.inf>=quantile(stat.boot2, 1-alpha), 
                       T4.inf>=quantile(stat.boot4, 1-alpha),
                       T5.inf>=quantile(stat.boot5, 1-alpha))
      return(reject.boot)
    }, mc.cores=ncores)
    
    reject.inf <- Reduce("rbind", temp)
    res <- colMeans(reject.inf)
    return(res)
  }
}


## Simulation example
method="proposed"
beta.type=2;
p=800; rho=0.5;
outlier.multi=10;
penalty="lasso"; nfolds=10; alpha=0.05;
loops=500; M=500
global.df <- data.frame(method=NA, model=NA, n=NA, p=NA, x_type=NA, sparsity=NA,
                        err_type=NA, outprop=NA, G1=NA, G2=NA, G4=NA, G5=NA)
row <- 1
for (n in c(200, 500)) {
  for (model in c(2, 1)) {
    cat("#--------model (n, p) =", model, c(n, p), "--------#", "\n")
    for (x.type in 1) {
      for (sparsity in 6) {
        # cat("sparsity =", sparsity, "\n")
        for (err.type in (1:2)) {
          # cat("err.type =", err.type, "\n")
          if(err.type == 1) {
            for (outlier.prop in c(0.1, 0)) {
              # cat("outlier.prop =", outlier.prop, "\n")
              out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
                                    x.type, rho, err.type, model,
                                    outlier.prop, outlier.multi,
                                    penalty, nfolds, alpha,
                                    method, loops, M)
              global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
                                   outlier.prop, out.res)
              cat("row =", row, "\n")
              row <- row + 1
            }
          }
          if(err.type == 2) {
            outlier.prop <- 0
            out.res <- SIM_global(n, p, sparsity, beta.type, delta0=1, delta1=0.1, delta2=2,
                                  x.type, rho, err.type, model,
                                  outlier.prop, outlier.multi,
                                  penalty, nfolds, alpha,
                                  method, loops, M)
            global.df[row,] <- c(method, model, n, p, x.type, sparsity, err.type,
                                 outlier.prop, out.res)
            cat("row =", row, "\n")
            row <- row + 1
          }
        }
      }
    }
  }
}