getwd()  # Check current working directory
setwd("C:/Users/bhap2601/OneDrive - University of Alberta/UAlberta2024/Projects/ICLR2024_CBMA_Revision/RebuttalExperiments/CBMA_finalcode/QuadraticModel/n50")
getwd()

library(rstan)
library(matrixStats)
library(Matrix)
library(BayesianTools)
library(brms)
library(bridgesampling)
suppressPackageStartupMessages(library(mlbench))
suppressPackageStartupMessages(library(rstanarm))
suppressPackageStartupMessages(library(bayestestR))
suppressPackageStartupMessages(library(bayesplot))
suppressPackageStartupMessages(library(insight))
suppressPackageStartupMessages(library(broom))



n_origin <- 50

d <- 2
n_plot <- 100
alpha <- 0.2
train_frac <- 0.6
B <- 2000

nsize <- n_origin
dataset <- sprintf("ToyEx_%d",nsize)
rep_exp <- 50
set.seed(123)

load_traintest <- function(train_frac = train_frac,  seed, n_origin = n_origin) {
  set.seed(seed)
  n_train <- floor(n_origin*train_frac)
  n_test <- n_origin - n_train
  x1_train <- runif(n_train)
  x1_test <- seq(0,1,length.out= n_test)
  
  beta0 <- rnorm(1,0,0.25)
  beta1 <- rnorm(1,1,0.25)
  beta2 <- rnorm(1,0.5,0.25)
  y_train <- beta0 + beta1 * x1_train + beta2*x1_train^2 + rnorm(n_train,mean=0,sd=0.2)
  y_test <- beta0 + beta1 * x1_test + beta2*x1_test^2 + rnorm(n_test,mean=0,sd=0.2)
  
  x_train <- cbind(x1_train,x1_train^2)
  x_test <- cbind(x1_test,x1_test^2)

  y_plot <- seq(min(y_train) - 2, max(y_train) + 2, length.out = n_plot)
  dy <- y_plot[2] - y_plot[1]
  return(list(x_train = x_train, y_train = y_train,
              x_test = x_test, y_test = y_test,
              y_plot = y_plot, n = nsize, d = d, dy = dy))
}




compute_rank_IS <- function(logp_samp_n, logwjk) {
  n <- ncol(logp_samp_n)  # logp_samp_n is B x n
  n_plot <- nrow(logwjk) # 100 X n_test
  rank_cp <- rep(0, n_plot)
  
  # Compute importance sampling weights and normalizing
  wjk <- exp(logwjk)
  Zjk <- rowSums(wjk)
  
  # Compute predictives for y_i,x_i and y_new,x_n+1
  p_cp <- (wjk / Zjk) %*% exp(logp_samp_n)
  p_new <- rowSums(wjk^2) / Zjk
  
  # Compute nonconformity score and sort
  pred_tot <- cbind(p_cp, p_new)
  rank_cp <- rowSums(pred_tot <= pred_tot[, n + 1])
  return(list(rank_cp = rank_cp, pred_tot = pred_tot))
}

compute_cb_region_IS <- function(alpha, logp_samp_n, logwjk) {
  n <- ncol(logp_samp_n)  # logp_samp_n is B x n
  pred_rank <- compute_rank_IS(logp_samp_n, logwjk)
  rank_cp <- pred_rank$rank_cp
  region_true <- rank_cp > alpha * (n + 1)
  return(list(region_true = region_true,pred_tot = pred_rank$pred_tot))
}

diagnose_is_weights <- function(logp_samp_n, logwjk) {
  n <- ncol(logp_samp_n)  # logp_samp_n is B x n
  n_plot <- nrow(logwjk)
  rank_cp <- rep(0, n_plot)
  
  # Compute importance sampling weights and normalizing
  logZjk <- logSumExp(logwjk)
  
  # Compute predictives for y_i,x_i and y_new,x_n+1
  logp_new <- logSumExp(2 * logwjk) - logZjk
  
  # Compute ESS
  wjk <- exp(logwjk - logZjk)
  ESS <- 1 / rowSums(wjk^2)
  
  # Compute variance for p_new
  var <- rowSums(wjk^2 * (wjk - exp(logp_new)) ^ 2)
  return(list(ESS = ESS, var = var))
}


compute_bayes_band_MCMC <- function(alpha, y_plot, cdf_pred) {
  #cdf_pred <- apply(cdf_pred, 1, mean)
  
  band_bayes <- numeric(2)
  band_bayes[1] <- y_plot[which.min(abs(cdf_pred - alpha / 2))]
  band_bayes[2] <- y_plot[which.min(abs(cdf_pred - (1 - alpha / 2)))]
  
  return(band_bayes)
}

# Model 1 with x and x^2
fit_mcmc_m1 <- function(y, x, B, seed = 100) {
  n <- nrow(x)
  d <- ncol(x)
  model <- "
  data {
    int<lower=0> n;          // number of observations
    int<lower=0> d;          // number of predictors
    vector[n] y;             // response variable
    matrix[n, d] x;          // design matrix
  }
  parameters {
    vector[d] beta;           // regression coefficients
    real intercept;           // intercept
    real<lower=0> sigma;      // standard deviation
  }
  model {
    beta[1] ~ normal(1,0.25);
    beta[2] ~ normal(0.5,0.25);
    intercept ~ normal(0, 0.25); 
    sigma ~ normal(0, 0.2)T[0,];    // half-normal prior for sigma
    y ~ normal(x * beta + intercept, sigma); // likelihood
  }
  "
  data <- list(n = n, d = d, y = y, x = x)
  fit <- stan(model_code = model, data = data, chains = 4,
              warmup = 1500, iter = B +1500, seed = seed,
              control = list(adapt_delta = 0.90), refresh = 0,
              diagnostic_file = file.path(tempdir(), "df1.csv"))
  
  return(fit)
}

# Model 2 with x
fit_mcmc_m2 <- function(y, x, B, seed = 100) {
  x <- as.matrix(x)
  n <- nrow(x)
  d <- ncol(x)
  model <- "
  data {
    int<lower=0> n;          // number of observations
    int<lower=0> d;          // number of predictors
    vector[n] y;             // response variable
    matrix[n, d] x;          // design matrix
  }
  parameters {
    vector[d] beta;           // regression coefficients
    real intercept;           // intercept
    real<lower=0> sigma;      // standard deviation
  }
  model {
    beta ~ normal(1,0.25);
    intercept ~ normal(0, 0.25); 
    sigma ~ normal(0, 0.2)T[0,];    // half-normal prior for sigma
    y ~ normal(x * beta + intercept, sigma); // likelihood
  }
  "
  data <- list(n = n, d = d, y = y, x = x)
  fit <- stan(model_code = model, data = data, chains = 4,
              warmup = 1500, iter = B +1500, seed = seed,
              control = list(adapt_delta = 0.90), refresh = 0,
              diagnostic_file = file.path(tempdir(), "df1.csv"))
  
  return(fit)
}

# Model 3 with x^2
fit_mcmc_m3 <- function(y, x, B, seed = 100) {
  x <- as.matrix(x)
  n <- nrow(x)
  d <- ncol(x)
  model <- "
  data {
    int<lower=0> n;          // number of observations
    int<lower=0> d;          // number of predictors
    vector[n] y;             // response variable
    matrix[n, d] x;          // design matrix
  }
  parameters {
    vector[d] beta;           // regression coefficients
    real intercept;           // intercept
    real<lower=0> sigma;      // standard deviation
  }
  model {
    beta ~ normal(0.5,0.25);
    intercept ~ normal(0, 0.25); 
    sigma ~ normal(0, 0.2)T[0,];    // half-normal prior for sigma
    y ~ normal(x * beta + intercept, sigma); // likelihood
  }
  "
  data <- list(n = n, d = d, y = y, x = x)
  fit <- stan(model_code = model, data = data, chains = 4,
              warmup = 1500, iter = B +1500, seed = seed,
              control = list(adapt_delta = 0.90), refresh = 0,
              diagnostic_file = file.path(tempdir(), "df1.csv"))
  
  return(fit)
}





run_mcmc_m1 <- function(dataset) {
  
  # Initialize
  data_j <- load_traintest(train_frac, seed = 100,n_origin = n_origin)
  x <- data_j$x_train
  y <- data_j$y_train
  x_test <- data_j$x_test
  y_test <- data_j$y_test
  y_plot <- data_j$y_plot
  dy <- data_j$dy
  n <- data_j$n
  d <- ncol(x)
  
  beta_post <- array(0, dim = c(rep_exp, 4 * B, d))
  intercept_post <- matrix(0, rep_exp, 4 * B)
  sigma_post <- matrix(0, rep_exp, 4 * B)
  times <- numeric(rep_exp)
  logml1 <- numeric(rep_exp)
  
  for (j in 1:rep_exp) {
    seed <- 100 + j
    data_j <- load_traintest(train_frac,  seed,n_origin = n_origin)
    x <- data_j$x_train
    y <- data_j$y_train
    x_test <- data_j$x_test
    y_test <- data_j$y_test
    y_plot <- data_j$y_plot
    dy <- data_j$dy
    n <- data_j$n
    d <- ncol(x)
    start <- Sys.time()
    fit <- fit_mcmc_m1(y, x, B, seed)
    samples_fit <- as.matrix(fit)
    loo_fit <- bridge_sampler(fit, silent = TRUE)
    
    beta_post[j,,] <- samples_fit[,1:d]
    intercept_post[j,] <- samples_fit[,d+1]
    sigma_post[j,] <- samples_fit[,d+2]
    logml1[j] <- loo_fit$logml
    end <- Sys.time()
    times[j] <- end - start
    print(j)
  }
  
  # Save posterior samples
  suffix <- paste0(dataset, "_m1")
  cat(sprintf("%s: %f (%f)\n", suffix, mean(times), sd(times) / sqrt(rep_exp)))
  
  
  saveRDS(beta_post, file = sprintf("samples/beta_post_%s.RDS", suffix))
  saveRDS(intercept_post, file = sprintf("samples/intercept_post_%s.RDS", suffix))
  saveRDS(sigma_post, file = sprintf("samples/sigma_post_%s.RDS", suffix))
  saveRDS(logml1, file = sprintf("samples/logml1_%s.RDS", suffix))
  saveRDS(times, file = sprintf("samples/times_%s.RDS", suffix))
  
}

run_mcmc_m2 <- function(dataset) {
  
  # Initialize
  data_j <- load_traintest(train_frac, seed = 100,n_origin = n_origin )
  x <- data_j$x_train[,1]
  y <- data_j$y_train
  x_test <- data_j$x_test[,1]
  y_test <- data_j$y_test
  y_plot <- data_j$y_plot
  dy <- data_j$dy
  n <- data_j$n
  d <- 1
  
  beta_post <- array(0, dim = c(rep_exp, 4 * B, d))
  intercept_post <- matrix(0, rep_exp, 4 * B)
  sigma_post <- matrix(0, rep_exp, 4 * B)
  times <- numeric(rep_exp)
  logml1 <- numeric(rep_exp)
  
  for (j in 1:rep_exp) {
    seed <- 100 + j
    data_j <- load_traintest(train_frac,  seed,n_origin = n_origin)
    x <- data_j$x_train[,1]
    y <- data_j$y_train
    x_test <- data_j$x_test[,1]
    y_test <- data_j$y_test
    y_plot <- data_j$y_plot
    dy <- data_j$dy
    n <- data_j$n
    d <- ncol(x)
    start <- Sys.time()
    fit <- fit_mcmc_m2(y, x, B, seed)
    samples_fit <- as.matrix(fit)
    loo_fit <- bridge_sampler(fit, silent = TRUE)
    
    beta_post[j,,] <- samples_fit[,1]
    intercept_post[j,] <- samples_fit[,2]
    sigma_post[j,] <- samples_fit[,3]
    logml1[j] <- loo_fit$logml
    end <- Sys.time()
    times[j] <- end - start
    print(j)
  }
  
  # Save posterior samples
  suffix <- paste0(dataset, "_m2")
  cat(sprintf("%s: %f (%f)\n", suffix, mean(times), sd(times) / sqrt(rep_exp)))
  
  
  saveRDS(beta_post, file = sprintf("samples/beta_post_%s.RDS", suffix))
  saveRDS(intercept_post, file = sprintf("samples/intercept_post_%s.RDS", suffix))
  saveRDS(sigma_post, file = sprintf("samples/sigma_post_%s.RDS", suffix))
  saveRDS(logml1, file = sprintf("samples/logml1_%s.RDS", suffix))
  saveRDS(times, file = sprintf("samples/times_%s.RDS", suffix))
  
}

run_mcmc_m3 <- function(dataset) {
  
  # Initialize
  data_j <- load_traintest(train_frac, seed = 100,n_origin = n_origin)
  x <- data_j$x_train[,2]
  y <- data_j$y_train
  x_test <- data_j$x_test[,2]
  y_test <- data_j$y_test
  y_plot <- data_j$y_plot
  dy <- data_j$dy
  n <- data_j$n
  d <- 1
  
  beta_post <- array(0, dim = c(rep_exp, 4 * B, d))
  intercept_post <- matrix(0, rep_exp, 4 * B)
  sigma_post <- matrix(0, rep_exp, 4 * B)
  times <- numeric(rep_exp)
  logml1 <- numeric(rep_exp)
  
  for (j in 1:rep_exp) {
    seed <- 100 + j
    data_j <- load_traintest(train_frac,  seed,n_origin = n_origin)
    x <- data_j$x_train[,2]
    y <- data_j$y_train
    x_test <- data_j$x_test[,2]
    y_test <- data_j$y_test
    y_plot <- data_j$y_plot
    dy <- data_j$dy
    n <- data_j$n
    d <- ncol(x)
    start <- Sys.time()
    fit <- fit_mcmc_m3(y, x, B, seed)
    samples_fit <- as.matrix(fit)
    loo_fit <- bridge_sampler(fit, silent = TRUE)
    
    beta_post[j,,] <- samples_fit[,1]
    intercept_post[j,] <- samples_fit[,2]
    sigma_post[j,] <- samples_fit[,3]
    logml1[j] <- loo_fit$logml
    end <- Sys.time()
    times[j] <- end - start
    print(j)
  }
  
  # Save posterior samples
  suffix <- paste0(dataset, "_m3")
  cat(sprintf("%s: %f (%f)\n", suffix, mean(times), sd(times) / sqrt(rep_exp)))
  
  
  saveRDS(beta_post, file = sprintf("samples/beta_post_%s.RDS", suffix))
  saveRDS(intercept_post, file = sprintf("samples/intercept_post_%s.RDS", suffix))
  saveRDS(sigma_post, file = sprintf("samples/sigma_post_%s.RDS", suffix))
  saveRDS(logml1, file = sprintf("samples/logml1_%s.RDS", suffix))
  saveRDS(times, file = sprintf("samples/times_%s.RDS", suffix))
  
}




normal_likelihood_cdf <- function(y_plot, x, beta_post, intercept_post, sigma_post, j) {
  mu <- beta_post[j,,]%*%(x) + intercept_post[j,]
  sd_p <- sigma_post[j,]
  cdf_matrix <- outer(y_plot, 1:(4*B) , function(y, k) pnorm(y, mean = mu[k], sd = sd_p[k]))
  
  return(rowMeans(cdf_matrix))
}

normal_likelihood_cdf_m2 <- function(y_plot, x, beta_post, intercept_post, sigma_post, j) {
  mu <- beta_post[j,,]*x + intercept_post[j,]
  sd_p <- sigma_post[j,]
  cdf_matrix <- outer(y_plot, 1:(4*B) , function(y, k) pnorm(y, mean = mu[k], sd = sd_p[k]))
  
  return(rowMeans(cdf_matrix))
}




run_conformal_m1 <- function(dataset) {
  
  data <- load_traintest(train_frac, seed=100,n_origin = n_origin)
  x <- data$x_train
  y <- data$y_train
  x_test <- data$x_test
  y_test <- data$y_test
  y_plot <- data$y_plot
  dy <- data$dy
  n <- nrow(x)
  d <- ncol(x)

  # Load posterior samples
  suffix <- paste0(dataset, "_m1")
  
  beta_post <- readRDS(sprintf("samples/beta_post_%s.RDS", suffix))
  intercept_post <- readRDS(sprintf("samples/intercept_post_%s.RDS", suffix))
  sigma_post <- readRDS(sprintf("samples/sigma_post_%s.RDS", suffix))
  
  
  
  n_test <- nrow(x_test)
  
  coverage_cb <- matrix(0, nrow = rep_exp, ncol = n_test)
  coverage_bayes <- matrix(0, nrow = rep_exp, ncol = n_test)
  length_cb <- matrix(0, nrow = rep_exp, ncol = n_test)
  length_bayes <- matrix(0, nrow = rep_exp, ncol = n_test)
  band_bayes <- array(0, dim = c(rep_exp, n_test, 2))
  region_cb <- array(0, dim = c(rep_exp, n_test, n_plot))
  times_bayes <- numeric(rep_exp)
  times_cb <- numeric(rep_exp)
  cdfs_test <- array(0,dim = c(rep_exp,n_plot,n_test))
  PostpredMat <- array(0,dim = c(rep_exp, n_plot,nrow(x_test)))
  for (j in 1:rep_exp) {
    seed <- 100 + j
    data <- load_traintest(train_frac,  seed,n_origin = n_origin)
    x <- data$x_train
    y <- data$y_train
    x_test <- data$x_test
    y_test <- data$y_test
    y_plot <- data$y_plot
    dy <- data$dy
    n <- nrow(x)
    d <- ncol(x)
    #n_plot <- length(y_plot)
    
    # Bayes
    start <- Sys.time()
    cdf_test <- sapply(1:n_test, function(i) {
      normal_likelihood_cdf(y_plot, x_test[i,], beta_post, intercept_post, sigma_post,j)
    })
    cdfs_test[j,,] <- cdf_test
    for (i in 1:n_test) {
      band_bayes[j,i,] <- compute_bayes_band_MCMC(alpha, y_plot, cdf_test[,i])
      coverage_bayes[j,i] <- (y_test[i] >= band_bayes[j,i,1]) & (y_test[i] <= band_bayes[j,i,2])
      length_bayes[j,i] <- abs(band_bayes[j,i,2] - band_bayes[j,i,1])
    }
    
    end <- Sys.time()
    times_bayes[j] <- end - start
    
    # Conformal Bayes
    start <- Sys.time()
    #logp_samp_n <- normal_loglikelihood(y, x, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    mu <- beta_post[j,,]%*%t(x) + intercept_post[j,]
    sd_p <- sigma_post[j,]
    logp_samp_n <- sapply(1:nrow(x), function(k) dnorm(y[k],mu[,k],sd_p[k]))
    
    
    #logwjk <- normal_loglikelihood(y_plot, x_test, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    logwjk <- array(0,dim = c(n_plot,4*B,nrow(x_test)))
    for (i in 1:n_plot) {
      mu_jk <- beta_post[j,,]%*%t(x_test) + intercept_post[j,]
      sd_p_jk <- sigma_post[j,]
      logwjk[i,,] <- sapply(1:nrow(x_test), function(k) dnorm(y_plot[i],mu_jk[,k],sd_p_jk[k]))
    }
    
    
    
    #logwjk_test <- normal_loglikelihood(y_test, x_test, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    mu_test <- beta_post[j,,]%*%t(x_test) + intercept_post[j,]
    sd_p_test <- sigma_post[j,]
    logwjk_test <- sapply(1:nrow(x_test), function(k) dnorm(y_test[k],mu_test[,k],sd_p_test[k]))
    logwjk_test <- array(logwjk_test,dim = c(1,nrow(logwjk_test),nrow(x_test)))
    conf_scoes_j <- array(0,dim = c(n_test,n_plot, nrow(x)+1))
    
    for (i in 1:n_test) {
      PostpredMat[j, , i] <- rowSums(exp(logwjk[,,i]))/(4*B)
      comp_rank_pred <- compute_cb_region_IS(alpha, logp_samp_n, logwjk[,,i])
      conf_scoes_j[i,,] <- comp_rank_pred$pred_tot
      region_cb[j,i,] <- comp_rank_pred$region_true
      coverage_cb[j,i] <- region_cb[j,i,which.min(abs(y_plot - y_test[i]))] # Grid coverage
      length_cb[j,i] <- sum(region_cb[j,i,]) * dy
    }
    
    end <- Sys.time()
    times_cb[j] <- end - start
    saveRDS(conf_scoes_j, sprintf("results/conf_scoes_%d_%s.RDS", j, suffix))
    
    print(j)
    
    
    
  }
  
  saveRDS(cdfs_test, sprintf("results/cdfs_test_%s.RDS", suffix))
  saveRDS(PostpredMat, sprintf("results/PostpredMat_%s.RDS", suffix))
  
  # Save regions
  saveRDS(region_cb, sprintf("results/region_cb_%s.RDS", suffix))
  saveRDS(band_bayes, sprintf("results/band_bayes_%s.RDS", suffix))
  
  saveRDS(coverage_cb, sprintf("results/coverage_cb_%s.RDS", suffix))
  #saveRDS(coverage_cb_exact, sprintf("results/coverage_cb_exact_%s.RDS", suffix))
  saveRDS(coverage_bayes, sprintf("results/coverage_bayes_%s.RDS", suffix))
  
  saveRDS(length_cb, sprintf("results/length_cb_%s.RDS", suffix))
  saveRDS(length_bayes, sprintf("results/length_bayes_%s.RDS", suffix))
  
  saveRDS(times_cb, sprintf("results/times_cb_%s.RDS", suffix))
  saveRDS(times_bayes, sprintf("results/times_bayes_%s.RDS", suffix))
}

run_conformal_m2 <- function(dataset) {
  
  data <- load_traintest(train_frac, seed=100,n_origin = n_origin)
  x <- data$x_train
  y <- data$y_train
  x_test <- data$x_test
  y_test <- data$y_test
  y_plot <- data$y_plot
  dy <- data$dy
  n <- nrow(x)
  d <- ncol(x)
  n_test <- nrow(x_test)
  #n_plot <- length(y_plot)
  
  # Load posterior samples
  suffix <- paste0(dataset, "_m2")
  
  beta_post <- readRDS(sprintf("samples/beta_post_%s.RDS", suffix))
  intercept_post <- readRDS(sprintf("samples/intercept_post_%s.RDS", suffix))
  sigma_post <- readRDS(sprintf("samples/sigma_post_%s.RDS", suffix))
  
  
  coverage_cb <- matrix(0, nrow = rep_exp, ncol = n_test)
  coverage_bayes <- matrix(0, nrow = rep_exp, ncol = n_test)
  length_cb <- matrix(0, nrow = rep_exp, ncol = n_test)
  length_bayes <- matrix(0, nrow = rep_exp, ncol = n_test)
  band_bayes <- array(0, dim = c(rep_exp, n_test, 2))
  region_cb <- array(0, dim = c(rep_exp, n_test, n_plot))
  times_bayes <- numeric(rep_exp)
  times_cb <- numeric(rep_exp)
  cdfs_test <- array(0,dim = c(rep_exp,n_plot,n_test))
  PostpredMat <- array(0,dim = c(rep_exp, n_plot,nrow(x_test)))
  for (j in 1:rep_exp) {
    seed <- 100 + j
    data <- load_traintest(train_frac,  seed,n_origin = n_origin)
    x <- data$x_train
    y <- data$y_train
    x_test <- data$x_test
    y_test <- data$y_test
    y_plot <- data$y_plot
    dy <- data$dy
    n <- nrow(x)
    d <- ncol(x)
    #n_plot <- length(y_plot)
    
    # Bayes
    start <- Sys.time()
    cdf_test <- sapply(1:n_test, function(i) {
      normal_likelihood_cdf_m2(y_plot, x_test[i,1], beta_post, intercept_post, sigma_post,j)
    })
    cdfs_test[j,,] <- cdf_test
    for (i in 1:n_test) {
      band_bayes[j,i,] <- compute_bayes_band_MCMC(alpha, y_plot, cdf_test[,i])
      coverage_bayes[j,i] <- (y_test[i] >= band_bayes[j,i,1]) & (y_test[i] <= band_bayes[j,i,2])
      length_bayes[j,i] <- abs(band_bayes[j,i,2] - band_bayes[j,i,1])
    }
    
    end <- Sys.time()
    times_bayes[j] <- end - start
    
    # Conformal Bayes
    start <- Sys.time()
    #logp_samp_n <- normal_loglikelihood(y, x, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    mu <- as.matrix(beta_post[j,,])%*%t(x[,1]) + intercept_post[j,]
    sd_p <- sigma_post[j,]
    logp_samp_n <- sapply(1:nrow(x), function(k) dnorm(y[k],mu[,k],sd_p[k]))
    
    
    #logwjk <- normal_loglikelihood(y_plot, x_test, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    logwjk <- array(0,dim = c(n_plot,4*B,nrow(x_test)))
    for (i in 1:n_plot) {
      mu_jk <- as.matrix(beta_post[j,,])%*%t(x_test[,1]) + intercept_post[j,]
      sd_p_jk <- sigma_post[j,]
      logwjk[i,,] <- sapply(1:nrow(x_test), function(k) dnorm(y_plot[i],mu_jk[,k],sd_p_jk[k]))
    }
    
    
    
    #logwjk_test <- normal_loglikelihood(y_test, x_test, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    mu_test <- as.matrix(beta_post[j,,])%*%t(x_test[,1]) + intercept_post[j,]
    sd_p_test <- sigma_post[j,]
    logwjk_test <- sapply(1:nrow(x_test), function(k) dnorm(y_test[k],mu_test[,k],sd_p_test[k]))
    logwjk_test <- array(logwjk_test,dim = c(1,nrow(logwjk_test),nrow(x_test)))
    conf_scoes_j <- array(0,dim = c(n_test,n_plot, nrow(x)+1))
    
    for (i in 1:n_test) {
      PostpredMat[j, , i] <- rowSums(exp(logwjk[,,i]))/(4*B)
      comp_rank_pred <- compute_cb_region_IS(alpha, logp_samp_n, logwjk[,,i])
      conf_scoes_j[i,,] <- comp_rank_pred$pred_tot
      region_cb[j,i,] <- comp_rank_pred$region_true
      coverage_cb[j,i] <- region_cb[j,i,which.min(abs(y_plot - y_test[i]))] # Grid coverage
      length_cb[j,i] <- sum(region_cb[j,i,]) * dy
    }
    
    end <- Sys.time()
    times_cb[j] <- end - start
    saveRDS(conf_scoes_j, sprintf("results/conf_scoes_%d_%s.RDS", j, suffix))
    
    # Compute exact coverage to avoid grid effects
    #for (i in 1:n_test) {
    #  coverage_cb_exact[j,i] <- compute_cb_region_IS(alpha, logp_samp_n, logwjk_test[,,i])
    #}
    print(j)
    
    
    
  }
  
  saveRDS(cdfs_test, sprintf("results/cdfs_test_%s.RDS", suffix))
  saveRDS(PostpredMat, sprintf("results/PostpredMat_%s.RDS", suffix))
  
  # Save regions
  saveRDS(region_cb, sprintf("results/region_cb_%s.RDS", suffix))
  saveRDS(band_bayes, sprintf("results/band_bayes_%s.RDS", suffix))
  
  saveRDS(coverage_cb, sprintf("results/coverage_cb_%s.RDS", suffix))
  #saveRDS(coverage_cb_exact, sprintf("results/coverage_cb_exact_%s.RDS", suffix))
  saveRDS(coverage_bayes, sprintf("results/coverage_bayes_%s.RDS", suffix))
  
  saveRDS(length_cb, sprintf("results/length_cb_%s.RDS", suffix))
  saveRDS(length_bayes, sprintf("results/length_bayes_%s.RDS", suffix))
  
  saveRDS(times_cb, sprintf("results/times_cb_%s.RDS", suffix))
  saveRDS(times_bayes, sprintf("results/times_bayes_%s.RDS", suffix))
}

run_conformal_m3 <- function(dataset) {
  
  data <- load_traintest(train_frac, seed=100,n_origin = n_origin)
  x <- data$x_train
  y <- data$y_train
  x_test <- data$x_test
  y_test <- data$y_test
  y_plot <- data$y_plot
  dy <- data$dy
  n <- nrow(x)
  d <- ncol(x)
  n_test <- nrow(x_test)
  #n_plot <- length(y_plot)
  
  # Load posterior samples
  suffix <- paste0(dataset, "_m3")
  
  beta_post <- readRDS(sprintf("samples/beta_post_%s.RDS", suffix))
  intercept_post <- readRDS(sprintf("samples/intercept_post_%s.RDS", suffix))
  sigma_post <- readRDS(sprintf("samples/sigma_post_%s.RDS", suffix))
  
  
  coverage_cb <- matrix(0, nrow = rep_exp, ncol = n_test)
  coverage_bayes <- matrix(0, nrow = rep_exp, ncol = n_test)
  length_cb <- matrix(0, nrow = rep_exp, ncol = n_test)
  length_bayes <- matrix(0, nrow = rep_exp, ncol = n_test)
  band_bayes <- array(0, dim = c(rep_exp, n_test, 2))
  region_cb <- array(0, dim = c(rep_exp, n_test, n_plot))
  times_bayes <- numeric(rep_exp)
  times_cb <- numeric(rep_exp)
  cdfs_test <- array(0,dim = c(rep_exp,n_plot,n_test))
  PostpredMat <- array(0,dim = c(rep_exp, n_plot,nrow(x_test)))
  for (j in 1:rep_exp) {
    seed <- 100 + j
    data <- load_traintest(train_frac,  seed,n_origin = n_origin)
    x <- data$x_train
    y <- data$y_train
    x_test <- data$x_test
    y_test <- data$y_test
    y_plot <- data$y_plot
    dy <- data$dy
    n <- nrow(x)
    d <- ncol(x)
    #n_plot <- length(y_plot)
    
    # Bayes
    start <- Sys.time()
    cdf_test <- sapply(1:n_test, function(i) {
      normal_likelihood_cdf_m2(y_plot, x_test[i,2], beta_post, intercept_post, sigma_post,j)
    })
    cdfs_test[j,,] <- cdf_test
    for (i in 1:n_test) {
      band_bayes[j,i,] <- compute_bayes_band_MCMC(alpha, y_plot, cdf_test[,i])
      coverage_bayes[j,i] <- (y_test[i] >= band_bayes[j,i,1]) & (y_test[i] <= band_bayes[j,i,2])
      length_bayes[j,i] <- abs(band_bayes[j,i,2] - band_bayes[j,i,1])
    }
    
    end <- Sys.time()
    times_bayes[j] <- end - start
    
    # Conformal Bayes
    start <- Sys.time()
    #logp_samp_n <- normal_loglikelihood(y, x, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    mu <- as.matrix(beta_post[j,,])%*%t(x[,2]) + intercept_post[j,]
    sd_p <- sigma_post[j,]
    logp_samp_n <- sapply(1:nrow(x), function(k) dnorm(y[k],mu[,k],sd_p[k]))
    
    
    #logwjk <- normal_loglikelihood(y_plot, x_test, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    logwjk <- array(0,dim = c(n_plot,4*B,nrow(x_test)))
    for (i in 1:n_plot) {
      mu_jk <- as.matrix(beta_post[j,,])%*%t(x_test[,2]) + intercept_post[j,]
      sd_p_jk <- sigma_post[j,]
      logwjk[i,,] <- sapply(1:nrow(x_test), function(k) dnorm(y_plot[i],mu_jk[,k],sd_p_jk[k]))
    }
    
    
    
    #logwjk_test <- normal_loglikelihood(y_test, x_test, beta_post[j,, , drop = FALSE], intercept_post[j,], sigma_post[j])
    mu_test <- as.matrix(beta_post[j,,])%*%t(x_test[,2]) + intercept_post[j,]
    sd_p_test <- sigma_post[j,]
    logwjk_test <- sapply(1:nrow(x_test), function(k) dnorm(y_test[k],mu_test[,k],sd_p_test[k]))
    logwjk_test <- array(logwjk_test,dim = c(1,nrow(logwjk_test),nrow(x_test)))
    conf_scoes_j <- array(0,dim = c(n_test,n_plot, nrow(x)+1))
    
    for (i in 1:n_test) {
      PostpredMat[j, , i] <- rowSums(exp(logwjk[,,i]))/(4*B)
      comp_rank_pred <- compute_cb_region_IS(alpha, logp_samp_n, logwjk[,,i])
      conf_scoes_j[i,,] <- comp_rank_pred$pred_tot
      region_cb[j,i,] <- comp_rank_pred$region_true
      coverage_cb[j,i] <- region_cb[j,i,which.min(abs(y_plot - y_test[i]))] # Grid coverage
      length_cb[j,i] <- sum(region_cb[j,i,]) * dy
    }
    
    end <- Sys.time()
    times_cb[j] <- end - start
    saveRDS(conf_scoes_j, sprintf("results/conf_scoes_%d_%s.RDS", j, suffix))
    
    # Compute exact coverage to avoid grid effects
    #for (i in 1:n_test) {
    #  coverage_cb_exact[j,i] <- compute_cb_region_IS(alpha, logp_samp_n, logwjk_test[,,i])
    #}
    print(j)
    
    
    
  }
  
  saveRDS(cdfs_test, sprintf("results/cdfs_test_%s.RDS", suffix))
  saveRDS(PostpredMat, sprintf("results/PostpredMat_%s.RDS", suffix))
  
  # Save regions
  saveRDS(region_cb, sprintf("results/region_cb_%s.RDS", suffix))
  saveRDS(band_bayes, sprintf("results/band_bayes_%s.RDS", suffix))
  
  saveRDS(coverage_cb, sprintf("results/coverage_cb_%s.RDS", suffix))
  #saveRDS(coverage_cb_exact, sprintf("results/coverage_cb_exact_%s.RDS", suffix))
  saveRDS(coverage_bayes, sprintf("results/coverage_bayes_%s.RDS", suffix))
  
  saveRDS(length_cb, sprintf("results/length_cb_%s.RDS", suffix))
  saveRDS(length_bayes, sprintf("results/length_bayes_%s.RDS", suffix))
  
  saveRDS(times_cb, sprintf("results/times_cb_%s.RDS", suffix))
  saveRDS(times_bayes, sprintf("results/times_bayes_%s.RDS", suffix))
}








run_mcmc_m1(dataset)
run_mcmc_m2(dataset)
run_mcmc_m3(dataset)


run_conformal_m1(dataset)
run_conformal_m2(dataset)
run_conformal_m3(dataset)


model_id <- "_m1"
suffix <- paste0(dataset,model_id)

loaded_region_cb_m1 <- readRDS(sprintf("results/region_cb_%s.RDS", suffix))
dim(loaded_region_cb_m1)
image(loaded_region_cb_m1[1,,])

loaded_coverage_cb_m1 <- readRDS(sprintf("results/coverage_cb_%s.RDS", suffix))
loaded_coverage_bayes_m1 <- readRDS(sprintf("results/coverage_bayes_%s.RDS", suffix))
loaded_length_bayes_m1 <- readRDS(sprintf("results/length_bayes_%s.RDS", suffix))
loaded_length_cb_m1 <- readRDS(sprintf("results/length_cb_%s.RDS", suffix))
loaded_times_m1 <- readRDS(sprintf("samples/times_%s.RDS", suffix))
loaded_logml1_m1 <- readRDS(sprintf("samples/logml1_%s.RDS", suffix))
loaded_times_bayes_m1 <- readRDS(sprintf("results/times_bayes_%s.RDS", suffix))
loaded_times_cb_m1 <- readRDS(sprintf("results/times_cb_%s.RDS", suffix))
loaded_cdfs_test_m1 <- readRDS(sprintf("results/cdfs_test_%s.RDS", suffix))
loaded_PostpredMat_m1 <- readRDS(sprintf("results/PostpredMat_%s.RDS", suffix))



model_id <- "_m2"
suffix <- paste0(dataset,model_id)

loaded_region_cb_m2 <- readRDS(sprintf("results/region_cb_%s.RDS", suffix))
dim(loaded_region_cb_m2)
image(loaded_region_cb_m2[1,,])

loaded_coverage_cb_m2 <- readRDS(sprintf("results/coverage_cb_%s.RDS", suffix))
loaded_coverage_bayes_m2 <- readRDS(sprintf("results/coverage_bayes_%s.RDS", suffix))
loaded_length_bayes_m2 <- readRDS(sprintf("results/length_bayes_%s.RDS", suffix))
loaded_length_cb_m2 <- readRDS(sprintf("results/length_cb_%s.RDS", suffix))
loaded_times_m2 <- readRDS(sprintf("samples/times_%s.RDS", suffix))
loaded_logml1_m2 <- readRDS(sprintf("samples/logml1_%s.RDS", suffix))
loaded_times_bayes_m2 <- readRDS(sprintf("results/times_bayes_%s.RDS", suffix))
loaded_times_cb_m2 <- readRDS(sprintf("results/times_cb_%s.RDS", suffix))
loaded_cdfs_test_m2 <- readRDS(sprintf("results/cdfs_test_%s.RDS", suffix))
loaded_PostpredMat_m2 <- readRDS(sprintf("results/PostpredMat_%s.RDS", suffix))




model_id <- "_m3"
suffix <- paste0(dataset,model_id)

loaded_region_cb_m3 <- readRDS(sprintf("results/region_cb_%s.RDS", suffix))
dim(loaded_region_cb_m3)
image(loaded_region_cb_m3[1,,])

loaded_coverage_cb_m3 <- readRDS(sprintf("results/coverage_cb_%s.RDS", suffix))
loaded_coverage_bayes_m3 <- readRDS(sprintf("results/coverage_bayes_%s.RDS", suffix))
loaded_length_bayes_m3 <- readRDS(sprintf("results/length_bayes_%s.RDS", suffix))
loaded_length_cb_m3 <- readRDS(sprintf("results/length_cb_%s.RDS", suffix))
loaded_times_m3 <- readRDS(sprintf("samples/times_%s.RDS", suffix))
loaded_logml1_m3 <- readRDS(sprintf("samples/logml1_%s.RDS", suffix))
loaded_times_bayes_m3 <- readRDS(sprintf("results/times_bayes_%s.RDS", suffix))
loaded_times_cb_m3 <- readRDS(sprintf("results/times_cb_%s.RDS", suffix))
loaded_cdfs_test_m3 <- readRDS(sprintf("results/cdfs_test_%s.RDS", suffix))
loaded_PostpredMat_m3 <- readRDS(sprintf("results/PostpredMat_%s.RDS", suffix))








methods_tot = c('bayes', 'cb')


#logml1
mean(loaded_logml1_m1)

mean(loaded_logml1_m2)

mean(loaded_logml1_m3)

# MCMC Times
#1
rep_times_m1 <- length(loaded_times_m1)
mean_times_m1 <- mean(loaded_times_m1)
se_times_m1 <- sd(loaded_times_m1) / sqrt(rep_times_m1)
cat(sprintf("%s times for m1 is %.3f (%.3f)\n\n", methods_tot[1], mean_times_m1, se_times_m1))

#2
rep_times_m2 <- length(loaded_times_m2)
mean_times_m2 <- mean(loaded_times_m2)
se_times_m2 <- sd(loaded_times_m2) / sqrt(rep_times_m2)
cat(sprintf("%s times for m2 is %.3f (%.3f)\n\n", methods_tot[1], mean_times_m2, se_times_m2))


#3
rep_times_m3 <- length(loaded_times_m3)
mean_times_m3 <- mean(loaded_times_m3)
se_times_m3 <- sd(loaded_times_m3) / sqrt(rep_times_m3)
cat(sprintf("%s times for m3 is %.3f (%.3f)\n\n", methods_tot[1], mean_times_m3, se_times_m3))



# Coverage
#1
coverage_m1 <- rowMeans(loaded_coverage_bayes_m1, na.rm = TRUE)
rep_coverage_m1 <- length(coverage_m1)
mean_coverage_m1 <- mean(coverage_m1)
se_coverage_m1 <- sd(coverage_m1) / sqrt(rep_coverage_m1)
cat(sprintf("%s coverage for m1 is %.3f (%.3f)\n", methods_tot[1], mean_coverage_m1, se_coverage_m1))
#2
coverage_m2 <- rowMeans(loaded_coverage_bayes_m2 , na.rm = TRUE)
rep_coverage_m2 <- length(coverage_m2)
mean_coverage_m2 <- mean(coverage_m2)
se_coverage_m2 <- sd(coverage_m2) / sqrt(rep_coverage_m2)
cat(sprintf("%s coverage for m2 is %.3f (%.3f)\n", methods_tot[1], mean_coverage_m2, se_coverage_m2))
#3
coverage_m3 <- rowMeans(loaded_coverage_bayes_m3, na.rm = TRUE)
rep_coverage_m3 <- length(coverage_m3)
mean_coverage_m3 <- mean(coverage_m3)
se_coverage_m3 <- sd(coverage_m3) / sqrt(rep_coverage_m3)
cat(sprintf("%s coverage for m3 is %.3f (%.3f)\n", methods_tot[1], mean_coverage_m3, se_coverage_m3))




#1
coverage_cb_m1 <- rowMeans(loaded_coverage_cb_m1, na.rm = TRUE)
rep_coverage_cb_m1 <- length(coverage_cb_m1)
mean_coverage_cb_m1 <- mean(coverage_cb_m1)
se_coverage_cb_m1 <- sd(coverage_cb_m1) / sqrt(rep_coverage_cb_m1)
cat(sprintf("%s cb coverage for m1 is %.3f (%.3f)\n", methods_tot[2], mean_coverage_cb_m1, se_coverage_cb_m1))
#2
coverage_cb_m2 <- rowMeans(loaded_coverage_cb_m2, na.rm = TRUE)
rep_coverage_cb_m2 <- length(coverage_cb_m2)
mean_coverage_cb_m2 <- mean(coverage_cb_m2)
se_coverage_cb_m2 <- sd(coverage_cb_m2) / sqrt(rep_coverage_cb_m2)
cat(sprintf("%s cb coverage for m2 is %.3f (%.3f)\n", methods_tot[2], mean_coverage_cb_m2, se_coverage_cb_m2))
#3
coverage_cb_m3 <- rowMeans(loaded_coverage_cb_m3, na.rm = TRUE)
rep_coverage_cb_m3 <- length(coverage_cb_m3)
mean_coverage_cb_m3 <- mean(coverage_cb_m3)
se_coverage_cb_m3 <- sd(coverage_cb_m3) / sqrt(rep_coverage_cb_m3)
cat(sprintf("%s cb coverage for m3 is %.3f (%.3f)\n", methods_tot[2], mean_coverage_cb_m3, se_coverage_cb_m3))


# Length

#1
length_m1 <- rowMeans(loaded_length_bayes_m1, na.rm = TRUE)
rep_length_m1 <- length(length_m1)
mean_length_m1 <- mean(length_m1)
se_length_m1 <- sd(length_m1) / sqrt(rep_length_m1)
cat(sprintf("%s length for m1 is %.3f (%.3f)\n", methods_tot[1], mean_length_m1, se_length_m1))
#2
length_m2 <- rowMeans(loaded_length_bayes_m2, na.rm = TRUE)
rep_length_m2 <- length(length_m2)
mean_length_m2 <- mean(length_m2)
se_length_m2 <- sd(length_m2) / sqrt(rep_length_m2)
cat(sprintf("%s length for m2 is %.3f (%.3f)\n", methods_tot[1], mean_length_m2, se_length_m2))
#3
length_m3 <- rowMeans(loaded_length_bayes_m3, na.rm = TRUE)
rep_length_m3 <- length(length_m3)
mean_length_m3 <- mean(length_m3)
se_length_m3 <- sd(length_m1) / sqrt(rep_length_m3)
cat(sprintf("%s length for m3 is %.3f (%.3f)\n", methods_tot[1], mean_length_m3, se_length_m3))



#1
length_cb_m1 <- rowMeans(loaded_length_cb_m1, na.rm = TRUE)
rep_length_cb_m1 <- length(length_cb_m1)
mean_length_cb_m1 <- mean(length_cb_m1)
se_length_cb_m1 <- sd(length_cb_m1) / sqrt(rep_length_cb_m1)
cat(sprintf("%s cb length for m1 is %.3f (%.3f)\n", methods_tot[2], mean_length_cb_m1, se_length_cb_m1))
#2
length_cb_m2 <- rowMeans(loaded_length_cb_m2, na.rm = TRUE)
rep_length_cb_m2 <- length(length_cb_m2)
mean_length_cb_m2 <- mean(length_cb_m2)
se_length_cb_m2 <- sd(length_cb_m2) / sqrt(rep_length_cb_m2)
cat(sprintf("%s cb length for m2 is %.3f (%.3f)\n", methods_tot[2], mean_length_cb_m2, se_length_cb_m2))
#3
length_cb_m3 <- rowMeans(loaded_length_cb_m3, na.rm = TRUE)
rep_length_cb_m3 <- length(length_cb_m3)
mean_length_cb_m3 <- mean(length_cb_m3)
se_length_cb_m3 <- sd(length_cb_m3) / sqrt(rep_length_cb_m3)
cat(sprintf("%s cb length for m3 is %.3f (%.3f)\n", methods_tot[2], mean_length_cb_m3, se_length_cb_m3))




# Times
#1
rep_times_m1 <- length(loaded_times_bayes_m1)
mean_times_m1 <- mean(loaded_times_bayes_m1)
se_times_m1 <- sd(loaded_times_bayes_m1) / sqrt(rep_times_m1)
cat(sprintf("%s times for m1 is %.3f (%.3f)\n\n", methods_tot[1], mean_times_m1, se_times_m1))

#2
rep_times_m2 <- length(loaded_times_bayes_m2)
mean_times_m2 <- mean(loaded_times_bayes_m2)
se_times_m2 <- sd(loaded_times_bayes_m2) / sqrt(rep_times_m2)
cat(sprintf("%s times for m2 is %.3f (%.3f)\n\n", methods_tot[1], mean_times_m2, se_times_m2))


#3
rep_times_m3 <- length(loaded_times_bayes_m3)
mean_times_m3 <- mean(loaded_times_bayes_m3)
se_times_m3 <- sd(loaded_times_bayes_m3) / sqrt(rep_times_m3)
cat(sprintf("%s times for m3 is %.3f (%.3f)\n\n", methods_tot[1], mean_times_m3, se_times_m3))



#1
rep_times_cb_m1 <- length(loaded_times_cb_m1)
mean_times_cb_m1 <- mean(loaded_times_cb_m1)
se_times_cb_m1 <- sd(loaded_times_cb_m1) / sqrt(rep_times_cb_m1)
cat(sprintf("%s cb times for m1 is %.3f (%.3f)\n\n", methods_tot[2], mean_times_cb_m1, se_times_cb_m1))

#2
rep_times_cb_m2 <- length(loaded_times_cb_m2)
mean_times_cb_m2 <- mean(loaded_times_cb_m2)
se_times_cb_m2 <- sd(loaded_times_cb_m2) / sqrt(rep_times_cb_m2)
cat(sprintf("%s cb times for m2 is %.3f (%.3f)\n\n", methods_tot[2], mean_times_cb_m2, se_times_cb_m2))

#3
rep_times_cb_m3 <- length(loaded_times_cb_m3)
mean_times_cb_m3 <- mean(loaded_times_cb_m3)
se_times_cb_m3 <- sd(loaded_times_cb_m3) / sqrt(rep_times_cb_m3)
cat(sprintf("%s cb times for m3 is %.3f (%.3f)\n\n", methods_tot[2], mean_times_cb_m3, se_times_cb_m3))




dim(loaded_cdfs_test_m1)
dim(loaded_cdfs_test_m2)
dim(loaded_cdfs_test_m3)


dim(loaded_PostpredMat_m1)
dim(loaded_PostpredMat_m2)
dim(loaded_PostpredMat_m3)


rep_exp <- dim(loaded_cdfs_test_m1)[1]
n_plot <- dim(loaded_cdfs_test_m1)[2]
n_test <- dim(loaded_cdfs_test_m1)[3]


coverage_cb_bma <- matrix(0, nrow = rep_exp, ncol = n_test)
coverage_bayes_bma <- matrix(0, nrow = rep_exp, ncol = n_test)
length_cb_bma <- matrix(0, nrow = rep_exp, ncol = n_test)
length_bayes_bma <- matrix(0, nrow = rep_exp, ncol = n_test)
band_bayes_bma <- array(0, dim = c(rep_exp, n_test, 2))
region_cb_bma <- array(0, dim = c(rep_exp, n_test, n_plot))
times_bayes_bma <- numeric(rep_exp)
times_cb_bma <- numeric(rep_exp)
cdfs_test_bma <- array(0,dim = c(rep_exp,n_plot,n_test))


for (j in 1:rep_exp) {
  seed <- 100 + j
  data <- load_traintest(train_frac, seed, n_origin = n_origin)
  x <- data$x_train
  y <- data$y_train
  x_test <- data$x_test
  y_test <- data$y_test
  y_plot <- data$y_plot
  dy <- data$dy
  n <- nrow(x)
  d <- ncol(x)
  n_plot <- length(y_plot)
  n_test <- nrow(x_test)
  n_train <- nrow(x)
  ml1 <- exp(loaded_logml1_m1[j])
  ml2 <- exp(loaded_logml1_m2[j])
  ml3 <- exp(loaded_logml1_m3[j])
  pmps <- c(ml1,ml2,ml3)/sum(c(ml1,ml2,ml3))
  
  # Bayes
  start <- Sys.time()
  cdf_test_bma <- pmps[1]*loaded_cdfs_test_m1[j,,] + pmps[2]*loaded_cdfs_test_m2[j,,] + pmps[3]*loaded_cdfs_test_m3[j,,] 
  
  cdfs_test_bma[j,,] <- cdf_test_bma
  for (i in 1:n_test) {
    band_bayes_bma[j,i,] <- compute_bayes_band_MCMC(alpha, y_plot, cdf_test_bma[,i])
    coverage_bayes_bma[j,i] <- (y_test[i] >= band_bayes_bma[j,i,1]) & (y_test[i] <= band_bayes_bma[j,i,2])
    length_bayes_bma[j,i] <- abs(band_bayes_bma[j,i,2] - band_bayes_bma[j,i,1])
  }
  
  end <- Sys.time()
  times_bayes_bma[j] <- end - start
  
  # Conformal Bayes BMA
  
  conf_scoes_j_m1 <- readRDS(sprintf("results/conf_scoes_%d_%s.RDS", j, paste0(dataset, "_m1")))
  conf_scoes_j_m2 <- readRDS(sprintf("results/conf_scoes_%d_%s.RDS", j, paste0(dataset, "_m2")))
  conf_scoes_j_m3 <- readRDS(sprintf("results/conf_scoes_%d_%s.RDS", j, paste0(dataset, "_m3")))
  
  
  start <- Sys.time()
  conf_scoes_j_bma <- array(0,dim = c(n_test,n_plot,n_train + 1))
  for (i in 1:n_test) {
    conf_scoes_j_bma[i,,] <- pmps[1]*loaded_PostpredMat_m1[j, ,i]*conf_scoes_j_m1[i,,] + pmps[2]*loaded_PostpredMat_m2[j, ,i]*conf_scoes_j_m2[i,,] + pmps[3]*loaded_PostpredMat_m3[j, ,i]*conf_scoes_j_m3[i,,] 
    rank_cp <- rowSums(conf_scoes_j_bma[i,,] <= conf_scoes_j_bma[i,, n + 1])
    region_true <- rank_cp > alpha * (n + 1)
    
    region_cb_bma[j,i,] <- rank_cp > alpha * (n + 1)
    coverage_cb_bma[j,i] <- region_cb_bma[j,i,which.min(abs(y_plot - y_test[i]))] # Grid coverage
    length_cb_bma[j,i] <- sum(region_cb_bma[j,i,]) * dy
  }
  
  end <- Sys.time()
  times_cb_bma[j] <- end - start
  saveRDS(conf_scoes_j_bma, sprintf("results/conf_scoes_%d_bma_diabetes.RDS", j))

  print(j)
  
  
}
sprintf("results/coverage_cb_bma_%s.RDS",dataset)

saveRDS(cdfs_test_bma, sprintf("results/cdfs_test_bma_%s.RDS",dataset))

# Save regions
saveRDS(region_cb_bma, sprintf("results/region_cb_bma_%s.RDS",dataset))
saveRDS(band_bayes_bma, sprintf("results/band_bayes_bma_%s.RDS",dataset))

saveRDS(coverage_cb_bma, sprintf("results/coverage_cb_bma_%s.RDS",dataset))
saveRDS(coverage_bayes_bma, sprintf("results/coverage_bayes_bma_%s.RDS",dataset))

saveRDS(length_cb_bma, sprintf("results/length_cb_bma_%s.RDS",dataset))
saveRDS(length_bayes_bma, sprintf("results/length_bayes_bma_%s.RDS",dataset))

saveRDS(times_cb_bma, sprintf("results/times_cb_bma_%s.RDS",dataset))
saveRDS(times_bayes_bma, sprintf("results/times_bayes_bma_%s.RDS",dataset))


loaded_coverage_cb_bma <- readRDS(sprintf("results/coverage_cb_bma_%s.RDS",dataset))
loaded_coverage_bayes_bma <- readRDS(sprintf("results/coverage_bayes_bma_%s.RDS",dataset))
loaded_length_cb_bma <- readRDS(sprintf("results/length_cb_bma_%s.RDS",dataset))
loaded_length_bayes_bma <- readRDS(sprintf("results/length_bayes_bma_%s.RDS",dataset))
loaded_times_cb_bma <- readRDS(sprintf("results/times_cb_bma_%s.RDS",dataset))
loaded_times_bayes_bma <- readRDS(sprintf("results/times_bayes_bma_%s.RDS",dataset))








library(ggplot2)
PMPS_rep <- matrix(0,nrow = 50,ncol = 3)
for (j in 1:50) {
  ml1 <- exp(loaded_logml1_m1[j])
  ml2 <- exp(loaded_logml1_m2[j])
  ml3 <- exp(loaded_logml1_m3[j])
  #ml4 <- exp(loaded_logml1_m4[j])
  
  pmps <- c(ml1,ml2,ml3)/sum(c(ml1,ml2,ml3))
  PMPS_rep[j,] <- pmps
}

boxplot(PMPS_rep[,1])
write.csv(PMPS_rep, file = sprintf("results/PMPS_rep_%s.RDS",dataset), row.names = FALSE)

df <- as.data.frame(PMPS_rep)
df$Index <- 1:nrow(df)

# Convert to long format
df_long <- tidyr::pivot_longer(df, cols = starts_with("V"), names_to = "Series", values_to = "Value")
df_long$Series <- dplyr::recode(df_long$Series, 
                         V1 = "Model 1", 
                         V2 = "Model 2", 
                         V3 = "Model 3")
# Plot using ggplot2
plot_pmps <- ggplot(df_long, aes(x = Index, y = Value, color = Series, group = Series)) +
  geom_line(size = 1.1) +
  labs(title = " ", x = "Experiment No.", y = "Posterior model probabilities") +
  theme_classic()
plot_pmps
ggsave(filename = "PMPs_plot.png", plot = plot_pmps, width = 8, height = 6)



# Coverage
#1
coverage_bma <- rowMeans(loaded_coverage_bayes_bma, na.rm = TRUE)
rep_coverage_bma <- length(coverage_bma)
mean_coverage_bma <- mean(coverage_bma)
se_coverage_bma <- sd(coverage_bma) / sqrt(rep_coverage_bma)
cat(sprintf("%s bma coverage is %.3f (%.3f)\n", methods_tot[1], mean_coverage_bma, se_coverage_bma))
#2
coverage_cb_bma <- rowMeans(loaded_coverage_cb_bma, na.rm = TRUE)
rep_coverage_cb_bma <- length(coverage_cb_bma)
mean_coverage_cb_bma <- mean(coverage_cb_bma)
se_coverage_cb_bma <- sd(coverage_cb_bma) / sqrt(rep_coverage_cb_bma)
cat(sprintf("%s cb bma coverage is %.3f (%.3f)\n", methods_tot[2], mean_coverage_cb_bma, se_coverage_cb_bma))

# Length

#1
length_bma <- rowMeans(loaded_length_bayes_bma, na.rm = TRUE)
rep_length_bma <- length(length_bma)
mean_length_bma <- mean(length_bma)
se_length_bma <- sd(length_bma) / sqrt(rep_length_bma)
cat(sprintf("%s bma length is %.3f (%.3f)\n", methods_tot[1], mean_length_bma, se_length_bma))
#2
length_cb_bma <- rowMeans(loaded_length_cb_bma, na.rm = TRUE)
rep_length_cb_bma <- length(length_cb_bma)
mean_length_cb_bma <- mean(length_cb_bma)
se_length_cb_bma <- sd(length_cb_bma) / sqrt(rep_length_cb_bma)
cat(sprintf("%s cb bma length is %.3f (%.3f)\n", methods_tot[2], mean_length_cb_bma, se_length_cb_bma))



# Times
#1
rep_times <- length(loaded_times_bayes_bma)
mean_times <- mean(loaded_times_bayes_bma)
se_times <- sd(loaded_times_bayes_bma) / sqrt(rep_times)
cat(sprintf("%s times is %.5f (%.7f)\n\n", methods_tot[1], mean_times, se_times))

#2
rep_times_cb_bma <- length(loaded_times_cb_bma)
mean_times_cb_bma <- mean(loaded_times_cb_bma)
se_times_cb_bma <- sd(loaded_times_cb_bma) / sqrt(rep_times_cb_bma)
cat(sprintf("%s cb bma times is %.5f (%.7f)\n\n", methods_tot[2], mean_times_cb_bma, se_times_cb_bma))




df_data_len <- data.frame(length_cb_bma,length_bma , length_m1,length_m2, length_m3,length_cb_m1, length_cb_m2,length_cb_m3)
colnames(df_data_len) <- c("CBMA","BMA","Bayes M1","Bayes M2","Bayes M3","CB M1","CB M2","CB M3")

df_data_cov <- data.frame(coverage_cb_bma,coverage_bma , coverage_m1,coverage_m2,coverage_m3, coverage_cb_m1, coverage_cb_m2,coverage_cb_m3)
colnames(df_data_cov) <- c("CBMA","BMA","Bayes M1","Bayes M2","Bayes M3","CB M1","CB M2","CB M3")


write.csv(df_data_cov, paste("data_cov_n",nsize,".csv"), row.names = FALSE)
write.csv(df_data_len, paste("data_len_n",nsize,".csv"), row.names = FALSE)

df_data_cov <- read.csv(paste("data_cov_n",nsize,".csv"))
df_data_len <- read.csv(paste("data_len_n",nsize,".csv"))

data_cov_long <- tidyr::gather(df_data_cov, key = "Model", value = "Coverage")

Coverage_plot <- ggplot(data_cov_long, aes(x = Model, y = Coverage, fill = Model)) +
  geom_boxplot() +
  geom_point(stat = "summary", fun = "mean", shape = 18, size = 3, color = "black") +  # Add mean points
  labs(title = paste("n = ",nsize), x = "Models", y = "Coverage probability")+
  #scale_fill_manual(values = c("#4285f4","#34a853", "#9BCE33", "#66CC99", "lightgreen","#4285f4","#34a853", "#9BCE33", "#66CC99", "lightgreen","#66CC99")) +
  geom_hline(yintercept = 1-alpha, linetype = "dashed", color = "red")  +
  # guides(fill = FALSE)+  # Adjust the y-axis limits as needed
  scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0.0, 1))+
  theme_classic() 

Coverage_plot

ggsave(paste("Coverage_plot_n",nsize,".png"), plot = Coverage_plot, width = 8, height = 6, units = "in", dpi = 300)





data_len_long <- tidyr::gather(df_data_len, key = "Model", value = "Length")

Int_length_plot <- ggplot(data_len_long, aes(x = Model, y = Length, fill = Model)) +
  geom_boxplot() +
  geom_point(stat = "summary", fun = "mean", shape = 18, size = 3, color = "black") +  # Add mean points
  labs(title = paste("n = ",nsize), x = "Models", y = "Length")+
  #scale_fill_manual(values = c("#4285f4","#34a853", "#9BCE33", "#66CC99", "lightgreen","#4285f4","#34a853", "#9BCE33", "#66CC99", "lightgreen","#66CC99")) +
  #geom_hline(yintercept = 1-alpha, linetype = "dashed", color = "red")  +
  # guides(fill = FALSE)+  # Adjust the y-axis limits as needed
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0.4, 1))+
  theme_classic() +
  theme(legend.position = "none")  # Remove the legend

Int_length_plot
ggsave(paste("Int_length_plot_n",nsize,".png"), plot = Int_length_plot, width = 8, height = 6, units = "in")


colMeans(df_data_cov)
colSds(as.matrix(df_data_cov))

colMeans(df_data_len)
colSds(as.matrix(df_data_len))



# Calculate column means and standard deviations for df_data_cov
mean_cov <- colMeans(df_data_cov)
sd_cov <- colSds(as.matrix(df_data_cov))

# Calculate column means and standard deviations for df_data_len
mean_len <- colMeans(df_data_len)
sd_len <- colSds(as.matrix(df_data_len))

# Combine the means and standard deviations into data frames
df_cov_all <- data.frame(Mean = mean_cov, SD = sd_cov)
df_len_all <- data.frame(Mean = mean_len, SD = sd_len)

# Save the data frames as CSV files
write.csv(df_cov_all, "df_cov_stats.csv", row.names = FALSE)
write.csv(df_len_all, "df_len_stats.csv", row.names = FALSE)



