set.seed(2)

library(ggplot2)
library(parallel)

## Load methods and data
source("estimators.R")
source("k_class.R")
source("simulate_data.R")
source("check_assumptions.R")


## Simple simulation to illustrate assumptions
B <- 2000
m <- 10
d <- 20
p_connect <- 1
sparsity <- 2
n_grid <- c(50, 100, 200, 400, 800, 1600)
ncum <- c(0, cumsum(n_grid))
n_index <- lapply(1:length(n_grid), function(k) (ncum[k]+1):ncum[k+1])


single_simulation <- function(i){
  dfres <- data.frame(loss=numeric(0),
                      method=character(0),
                      valid_ass=numeric(0),
                      assA1=numeric(0),
                      assA3=numeric(0),
                      n=numeric(0),
                      estimated_sparsity=numeric(0))

  print(i)
  ptm <- proc.time()
  data <- linear_DAG_model_sparse(d, m, ncum[length(ncum)], p_connect, sparsity,
                                  noise_sd=1, noise_H=1, noise_I=1)
  check_ass_A <- check_assumptions_A(data$A, data$B, data$parents)
  valid_ass_A <- check_ass_A$assA1 & check_ass_A$assA3
  
  for(k in 1:length(n_grid)){
    loss <- vector("numeric", 4)
    estimated_sparsity <- vector("numeric", 4)
    XX = data$X[n_index[[k]],,drop=FALSE]
    II = data$I[n_index[[k]],,drop=FALSE]
    YY = data$Y[n_index[[k]]]
    # oracle-|PA|
    beta1 <- spaceIV_estimator_inner(XX, II, YY, size=sparsity, use_liml=TRUE)
    loss[1] <- sqrt(sum((data$beta_star - beta1)^2))
    # oracle-PA
    true_S <- which(data$beta_star!=0)
    beta2 <- matrix(0, ncol(XX), 1)
    beta2[true_S] <- spaceIV_estimator_inner(XX[,true_S,drop=F],
                                             II, YY, size=sparsity, use_liml=TRUE)
    loss[2] <- sqrt(sum((data$beta_star - beta2)^2))
    # spaceIV
    beta3 <- spaceIV_estimator(XX, II, YY, max_size=3,
                               alpha=0.1, use_liml=TRUE)$beta_hat
    loss[3] <- sqrt(sum((data$beta_star - beta3)^2))
    # OLS-sparse
    beta4 <- l0_OLS_estimator(XX, YY, max_size=3)$beta_hat
    loss[4] <- sqrt(sum((data$beta_star - beta4)^2))
    ## Add to results
    dfres <- rbind(
      dfres,
      data.frame(loss=loss,
                 method=c("oracle-|PA|", "oracle-PA", "spaceIV",
                          "OLS-sparse"),
                 valid_ass=rep(valid_ass_A, 4),
                 assA1=rep(check_ass_A$assA1, 4),
                 assA3=rep(check_ass_A$assA3, 4),
                 n=rep(toString(n_grid[k]), 4),
                 estimated_sparsity=rep(sum(beta3 != 0), 4)))
  }
  print(proc.time() - ptm)
  return(dfres)
}

## Run simulation
ptm <- proc.time()
res <- mclapply(1:B, single_simulation, mc.cores=36)
print(proc.time() - ptm)

dfres <- do.call(rbind, res)
## Save results
full_results <- list(parameters=list(m=m,
                                     d=d,
                                     B=B,
                                     p_connect=p_connect,
                                     sparsity=sparsity,
                                     n_grid=n_grid),
                     results=dfres)
save(full_results, file="results/full_results.Rda")
