getwd()  # Check current working directory
setwd("C:/Users/bhap2601/OneDrive - University of Alberta/UAlberta2024/Projects/ICLR2024_CBMA_Revision/RebuttalExperiments/CBMA_finalcode/TrueModelNotInModelSpace/Polynomialmodel/n_100")
getwd()

# Load necessary libraries
library(rstan)
library(rstanarm)
library(ggplot2)
library(tidyr)
library(dplyr)  
library(bridgesampling)
library(foreach)
library(doParallel)
library(reshape2)

#Necessary functions
compute_rank_IS <- function(logp_samp_n, logwjk) {
  n <- ncol(logp_samp_n)  # logp_samp_n is nchains*B x n
  n_plot <- nrow(logwjk) # 100 X n_test
  rank_cp <- rep(0, n_plot)
  
  # Compute importance sampling weights and normalizing
  wjk <- exp(logwjk)
  Zjk <- rowSums(wjk)
  
  # Compute predictives for y_i,x_i and y_new,x_n+1
  p_cp <- (wjk / Zjk) %*% exp(logp_samp_n)
  p_new <- rowSums(wjk^2) / Zjk
  
  # Compute nonconformity score and sort
  pred_tot <- cbind(p_cp, p_new)
  rank_cp <- rowSums(pred_tot <= pred_tot[, n + 1])
  return(list(rank_cp = rank_cp, pred_tot = pred_tot))
}

compute_cb_region_IS <- function(alpha_target , logp_samp_n, logwjk) {
  n <- ncol(logp_samp_n)  # logp_samp_n is n_chains*B x n_0
  pred_rank <- compute_rank_IS(logp_samp_n, logwjk)
  rank_cp <- pred_rank$rank_cp
  region_true <- rank_cp > alpha_target * (n + 1)
  return(list(region_true = region_true,pred_tot = pred_rank$pred_tot))
}



compute_bayes_band_MCMC <- function(alpha_target , y_plot, cdf_pred) {
  #cdf_pred <- apply(cdf_pred, 1, mean)
  
  band_bayes <- numeric(2)
  band_bayes[1] <- y_plot[which.min(abs(cdf_pred - alpha_target / 2))]
  band_bayes[2] <- y_plot[which.min(abs(cdf_pred - (1 - alpha_target / 2)))]
  
  return(band_bayes)
}

# Function to be approximated
f <- function(x, theta) {
  return(theta * exp(x) / (1 + exp(x)))
}

Ck <- function(dv, k, s_x) (dv)^(k - 1) * exp(-dv^2 / (2 * s_x^2))


###########################################################
########  CP BMA


set.seed(123)
rep_exp <- 50  # Number of repetitions

theta = 1 
sample_size = 50
n_0 <- floor(sample_size*0.60) 
alpha_target = 0.2
B = 2000
n_chains = 4
n_plot <- 100
n_test <- sample_size - n_0

M <- 11 # floor(3 * n_0^(1/3))


times = matrix(0,nrow = rep_exp, ncol = M)
for(e in 1:rep_exp){
  seed = 1234 + e
  set.seed(seed)
  x <- rweibull(n_0,1,1) 
  x_new <- rweibull(n_test,1,1) 
  x_bar <- mean((c(x,x_new)))
  s_x = sd((c(x,x_new)))
  
  #Heteroskedasticity
  y_hetero <- f(x, theta) + (0.01+x)*rnorm(n_0)
  y_hetero_new <- f(x_new, theta) + (0.01+x_new)*rnorm(n_test)
  
  
  # Step 1: Generate Design matrix of Covariates
  dv <- x - x_bar
  X <- sapply(1:M, function(m) Ck(dv, m, s_x))
  colnames(X) <- paste0("X", 1:M)
  
  
  # Step 2: Create a design matrix for new x values
  dv_new <- x_new - x_bar
  X_new <- sapply(1:M, function(m) Ck(dv_new, m, s_x))
  colnames(X_new) <- paste0("X", 1:M)
  
  # y plot grid
  y_plot <- seq(min(y_hetero)-1,max(y_hetero)+1,length.out = n_plot)
  dy <- y_plot[2] - y_plot[1]
  
  
  # Step 3: Define and fit models
  logml_models <- numeric(M) 
  for (i in 1:M) {
    suffix <- sprintf("rep_exp_%d_model_%d.RDS", e, i) 
    start <- Sys.time()
    diagnostic_file = file.path(getwd(), paste0("df_repexp_",e,"model",i,".csv"))
    formula <- as.formula(paste("y ~", paste(colnames(X)[1:i], collapse = "+")))
    fit_m <- stan_glm(formula, data = data.frame(y = y_hetero, X), prior = normal(0, 1),
                      prior_intercept = normal(0, 1),  chains = n_chains, iter = 2*B, warmup = B,
                      diagnostic_file = diagnostic_file, seed = seed)
    end <- Sys.time()
    times[j,i] <- end - start
    samples_fit_m <- as.matrix(fit_m)
    loo_fit_m <- bridge_sampler(fit_m, silent = TRUE)
    logml1_m <- loo_fit_m$logml
    logml_models[i] <- logml1_m
    saveRDS(samples_fit_m, file = sprintf("samples/samples_fit_%s.RDS", suffix))
    
  }
  
  saveRDS(logml_models, file = sprintf("samples/logml_models_rep_exp_%d.RDS", e)) 
  
  
  
}


n_cores <- parallel::detectCores() - 1  # Use all but one core
cl <- makeCluster(n_cores)
registerDoParallel(cl)

foreach(e = 1:rep_exp, .combine = rbind, .packages = c("rstanarm", "bridgesampling")) %dopar% {
  seed = 1234 + e
  set.seed(seed)
  x <- rweibull(n_0,1,1) 
  x_new <- rweibull(n_test,1,1) 
  x_bar <- mean((c(x,x_new)))
  s_x = sd((c(x,x_new)))
  
  #Heteroskedasticity
  y_hetero <- f(x, theta) + (0.01+x)*rnorm(n_0)
  y_hetero_new <- f(x_new, theta) + (0.01+x_new)*rnorm(n_test)
  
  
  # Step 1: Generate Design matrix of Covariates
  dv <- x - x_bar
  X <- sapply(1:M, function(m) Ck(dv, m, s_x))
  colnames(X) <- paste0("X", 1:M)
  
  
  # Step 2: Create a design matrix for new x values
  dv_new <- x_new - x_bar
  X_new <- sapply(1:M, function(m) Ck(dv_new, m, s_x))
  colnames(X_new) <- paste0("X", 1:M)
  
  # y plot grid
  y_plot <- seq(min(y_hetero)-1,max(y_hetero)+1,length.out = n_plot)
  dy <- y_plot[2] - y_plot[1]
  
  
  # Step 3: 
  
  for (m in 1:M) {
    suffix <- sprintf("rep_exp_%d_model_%s.RDS", e, m) 
    print(suffix)
    samples_fit_m <- readRDS(sprintf("samples/samples_fit_%s.RDS", suffix))
    
    beta_post_m <- samples_fit_m[,2:(m+1),drop=FALSE]
    intercept_post_m <- samples_fit_m[,1, drop=FALSE]
    sigma_post_m <- samples_fit_m[,(m+2),drop=FALSE]
    
    mu_train_m <- beta_post_m %*% t(X[,1:m,drop=FALSE])+matrix(intercept_post_m, nrow = n_chains*B, ncol = n_0, byrow = FALSE)
    mu_test_m <- beta_post_m %*% t(X_new[,1:m,drop=FALSE])+matrix(intercept_post_m, nrow = n_chains*B, ncol = n_test, byrow = FALSE)
    logp_samp_n_m <- sapply(1:n_0, function(k) dnorm(y_hetero[k],mu_train_m[,k],sigma_post_m, log=TRUE))
    logwjk_m <- array(0,dim = c(n_plot,n_chains*B,n_test))
    for (i in 1:n_plot) {
      logwjk_m[i,,] <- sapply(1:n_test, function(k) dnorm(y_plot[i],mu_test_m[,k],sigma_post_m, log=TRUE))
    }
    
    
    conf_scoes_m <- array(0,dim = c(n_test,n_plot, n_0+1))
    region_cb_m <- matrix(0,nrow=n_test,ncol=n_plot)
    coverage_cb_m <- numeric(n_test)
    length_cb_m <- numeric(n_test)
    PostpredMat_m <- matrix(0, nrow=n_plot,ncol=n_test)
    for (i in 1:n_test) {
      PostpredMat_m[ , i] <- rowSums(exp(logwjk_m[,,i]))/(n_chains*B)
      comp_rank_pred <- compute_cb_region_IS(alpha_target, logp_samp_n_m, logwjk_m[,,i])
      conf_scoes_m[i,,] <- comp_rank_pred$pred_tot
      region_cb_m[i,] <- comp_rank_pred$region_true
      coverage_cb_m[i] <- region_cb_m[i,which.min(abs(y_plot - y_hetero_new[i]))] # Grid coverage
      length_cb_m[i] <- sum(region_cb_m[i,]) * dy
    }
    
    mu_test_m <- beta_post_m %*% t(X_new[,1:m,drop=FALSE])+matrix(intercept_post_m, nrow = n_chains*B, ncol = n_test, byrow = FALSE)
    cdf_test_m <- sapply(1:n_test, function(i) {
      cdf_matrix <- outer(y_plot, 1:(n_chains*B) , function(y, k) pnorm(y, mean = mu_test_m[k,i], sd = sigma_post_m[k]))
      return(rowMeans(cdf_matrix))
    })
    
    band_bayes_m <- matrix(0,nrow=n_test,ncol=2)
    coverage_bayes_m <- numeric(n_test)
    length_bayes_m <- numeric(n_test)
    for (i in 1:n_test) {
      band_bayes_m[i,] <- compute_bayes_band_MCMC(alpha_target, y_plot, cdf_test_m[,i])
      coverage_bayes_m[i] <- (y_hetero_new[i] >= band_bayes_m[i,1]) & (y_hetero_new[i] <= band_bayes_m[i,2])
      length_bayes_m[i] <- abs(band_bayes_m[i,2] - band_bayes_m[i,1])
    }
    
    
    saveRDS(conf_scoes_m, sprintf("results/conf_scoes_%s.RDS",suffix))
    saveRDS(cdf_test_m, sprintf("results/cdf_test_%s.RDS", suffix))
    saveRDS(PostpredMat_m, sprintf("results/PostpredMat_%s.RDS", suffix))
    
    # Save regions
    saveRDS(region_cb_m, sprintf("results/region_cb_%s.RDS", suffix))
    saveRDS(band_bayes_m, sprintf("results/band_bayes_%s.RDS", suffix))
    
    saveRDS(coverage_cb_m, sprintf("results/coverage_cb_%s.RDS", suffix))
    saveRDS(coverage_bayes_m, sprintf("results/coverage_bayes_%s.RDS", suffix))
    
    saveRDS(length_cb_m, sprintf("results/length_cb_%s.RDS", suffix))
    saveRDS(length_bayes_m, sprintf("results/length_bayes_%s.RDS", suffix))
    
    print(c(mean(length_cb_m),mean(length_bayes_m),mean(coverage_cb_m),mean(coverage_bayes_m)))
    
  }
  
  
}


results_bma_cb <- data.frame(experiment = integer(), 
                             mean_length_bayes_bma = numeric(), 
                             mean_coverage_bayes_bma = numeric(),
                             mean_length_cb_bma = numeric(), 
                             mean_coverage_cb_bma = numeric(),
                             stringsAsFactors = FALSE)

results <- foreach(e = 1:rep_exp, .combine = rbind, .packages = c("rstanarm", "bridgesampling")) %dopar% {
  seed = 1234 + e
  set.seed(seed)
  x <- rweibull(n_0,1,1) #generate x from Weibull(1,1)
  x_new <- rweibull(n_test,1,1) #generate new x test from Weibull(1,1)
  x_bar <- mean((c(x,x_new)))
  s_x = sd((c(x,x_new)))
  
  #Heteroskedasticity
  y_hetero <- f(x, theta) + (0.01+x)*rnorm(n_0)
  y_hetero_new <- f(x_new, theta) + (0.01+x_new)*rnorm(n_test)
  
  
  # Step 1: Generate Design matrix of Covariates
  dv <- x - x_bar
  X <- sapply(1:M, function(m) Ck(dv, m, s_x))
  colnames(X) <- paste0("X", 1:M)
  
  
  # Step 2: Create a design matrix for new x values
  dv_new <- x_new - x_bar
  X_new <- sapply(1:M, function(m) Ck(dv_new, m, s_x))
  colnames(X_new) <- paste0("X", 1:M)
  
  # y plot grid
  y_plot <- seq(min(y_hetero)-1,max(y_hetero)+1,length.out = n_plot)
  dy <- y_plot[2] - y_plot[1]
  
  logml1_models <- readRDS(sprintf("samples/logml_models_rep_exp_%d.RDS", e))
  pmps <- exp(logml1_models)/sum(exp(logml1_models))
  plot(1:M,pmps)
  
  coverage_cb_bma <- numeric(n_test)
  coverage_bayes_bma <- numeric(n_test)
  length_cb_bma <- numeric(n_test)
  length_bayes_bma <- numeric(n_test)
  band_bayes_bma <- matrix(0, nrow = n_test, ncol = 2)
  region_cb_bma <- matrix(0,nrow=n_test,ncol=n_plot)
  
  
  
  #BMA
  suffix <-sprintf("rep_exp_%d_model_%s.RDS", e, 1)
  cdf_test_m <- readRDS(sprintf("results/cdf_test_%s.RDS", suffix))
  cdf_test_bma <- pmps[1]*cdf_test_m  
  for (m in 2:M) {
    suffix <- sprintf("rep_exp_%d_model_%s.RDS", e, m)
    cdf_test_m <- readRDS(sprintf("results/cdf_test_%s.RDS", suffix))
    cdf_test_bma <- cdf_test_bma + pmps[m]*cdf_test_m
  }
  
  
  for (i in 1:n_test) {
    band_bayes_bma[i,] <- compute_bayes_band_MCMC(alpha_target, y_plot, cdf_test_bma[,i])
    coverage_bayes_bma[i] <- (y_hetero_new[i] >= band_bayes_bma[i,1]) & (y_hetero_new[i] <= band_bayes_bma[i,2])
    length_bayes_bma[i] <- abs(band_bayes_bma[i,2] - band_bayes_bma[i,1])
  }
  
  result_bma <- c(mean(length_bayes_bma), mean(coverage_bayes_bma))
  
  
  
  for (i in 1:n_test) {
    suffix <- sprintf("rep_exp_%d_model_%s.RDS", e, 1) 
    #suffix <- paste0("model_", 1)
    conf_scoes_m <- readRDS(sprintf("results/conf_scoes_%s.RDS", suffix))
    PostpredMat_m <- readRDS(sprintf("results/PostpredMat_%s.RDS", suffix))
    conf_scoes_bma <- pmps[1]*PostpredMat_m[,i]*conf_scoes_m  
    for (m in 2:M) {
      suffix <- sprintf("rep_exp_%d_model_%s.RDS", e, m) 
      #suffix <- paste0("model_", m)
      conf_scoes_m <- readRDS(sprintf("results/conf_scoes_%s.RDS", suffix))
      PostpredMat_m <- readRDS(sprintf("results/PostpredMat_%s.RDS", suffix))
      conf_scoes_bma <- conf_scoes_bma + pmps[m]*PostpredMat_m[,i]*conf_scoes_m 
    }
    rank_cp <- rowSums(conf_scoes_bma[i,,] <= conf_scoes_bma[i,, n_0 + 1])
    
    region_cb_bma[i,] <- rank_cp > alpha_target * (n_0 + 1)
    coverage_cb_bma[i] <- region_cb_bma[i,which.min(abs(y_plot - y_hetero_new[i]))] # Grid coverage
    length_cb_bma[i] <- sum(region_cb_bma[i,]) * dy
  }
  
  result_cb <- c(mean(length_cb_bma), mean(coverage_cb_bma))
  
  results_bma_cb <- rbind(results_bma_cb, data.frame(experiment = e, 
                                                     mean_length_bayes_bma = result_bma[1], 
                                                     mean_coverage_bayes_bma = result_bma[2], 
                                                     mean_length_cb_bma = result_cb[1], 
                                                     mean_coverage_cb_bma = result_cb[2]))
  
  
}


stopCluster(cl)

write.csv(results, file = "results_bma_cb_summary.csv", row.names = FALSE)


results <- read.csv("results_bma_cb_summary.csv")
results_unique <- unique(results)

# Print the first few rows of the data to understand its structure
head(results)

# Calculate the mean and standard deviation for each column
mean_values <- sapply(results, mean, na.rm = TRUE)
sd_values <- sapply(results, sd, na.rm = TRUE)

# Print the results
cat("Mean values:\n")
print(mean_values)
cat("\nStandard deviation values:\n")
print(sd_values)


# Exclude the first column (assuming it's an ID column)
results_numeric <- results[, c(2,4)]

# Print the first few rows of the numeric data to verify
head(results_numeric)

# Create boxplots for each column in the numeric data frame
boxplot(results_numeric, main = "Boxplots of Experiment Results", 
        xlab = "Columns", ylab = "Values", 
        col = rainbow(ncol(results_numeric)), 
        las = 2)  # las = 2 rotates axis labels for better readability

# Exclude the first column (assuming it's an ID column)
results_numeric <- results[, c(3,5)]

# Print the first few rows of the numeric data to verify
head(results_numeric)

# Create boxplots for each column in the numeric data frame
boxplot(results_numeric, main = "Boxplots of Experiment Results", 
        xlab = "Columns", ylab = "Values", 
        col = rainbow(ncol(results_numeric)), 
        las = 2)  # las = 2 rotates axis labels for better readability



# Initialize an empty DataFrame to store mean lengths and coverages for each experiment
results_df <- data.frame(
  Experiment = 1:rep_exp,
  matrix(NA, nrow = rep_exp, ncol = 4 * M)  # 2 columns per model: mean length and mean coverage
)

# Set column names for the DataFrame
colnames(results_df)[-1] <- c(paste0("Mean_Length_bayes_Model_", 1:M), paste0("Mean_Coverage_bayes_Model_", 1:M),paste0("Mean_Length_cb_Model_", 1:M), paste0("Mean_Coverage_cb_Model_", 1:M))

for (e in 1:rep_exp) {
  for (m in 1:M) {
    suffix <- sprintf("rep_exp_%d_model_%s.RDS", e, m) 
    
    # Store the mean values in the DataFrame
    results_df[e, paste0("Mean_Length_bayes_Model_", m)] <- mean(readRDS(sprintf("results/length_bayes_%s.RDS", suffix)))
    results_df[e, paste0("Mean_Coverage_bayes_Model_", m)] <-mean(readRDS(sprintf("results/coverage_bayes_%s.RDS", suffix)))
    results_df[e, paste0("Mean_Length_cb_Model_", m)] <- mean(readRDS( sprintf("results/length_cb_%s.RDS", suffix)))
    results_df[e, paste0("Mean_Coverage_cb_Model_", m)] <- mean(readRDS(sprintf("results/coverage_cb_%s.RDS", suffix)))
    
  }
}

head(results_df)

mean_values <- colMeans(results_df[,-1], na.rm = TRUE)  # Compute mean for each column except the first
sd_values <- apply(results_df[,-1], 2, sd, na.rm = TRUE)/sqrt(rep_exp)  # Compute SD for each column except the first

# Combine mean and SD into a new DataFrame
summary_df <- data.frame(
  Statistic = rep(c("Mean", "SE"), each = ncol(results_df) - 1),
  Variable = rep(colnames(results_df)[-1], 2),
  Value = c(mean_values, sd_values)
)

# Display the summary DataFrame
print(summary_df)


library(reshape2)
library(ggplot2)

# Reshape the data for plotting Mean Lengths
length_df <- melt(results_df[, c(1, grep("Mean_Length", colnames(results_df)))], id.vars = "Experiment", variable.name = "Model", value.name = "Mean_Length")

# Reshape the data for plotting Mean Coverages
coverage_df <- melt(results_df[, c(1, grep("Mean_Coverage", colnames(results_df)))], id.vars = "Experiment", variable.name = "Model", value.name = "Mean_Coverage")

# Plot for Mean Lengths
ggplot(length_df, aes(x = Model, y = Mean_Length)) +
  geom_boxplot() +
  labs(title = "Boxplot of Mean Lengths for Each Model", x = "Model", y = "Mean Length") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot for Mean Coverages
ggplot(coverage_df, aes(x = Model, y = Mean_Coverage)) +
  geom_boxplot() +
  labs(title = "Boxplot of Mean Coverages for Each Model", x = "Model", y = "Mean Coverage") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))






# Initialize an empty DataFrame to store mean lengths and coverages for each experiment
results_df <- data.frame(
  Experiment = 1:rep_exp,
  matrix(NA, nrow = rep_exp, ncol = 4 * M + 4)  # 4 extra columns for BMA and CB method models
)

# Set column names for the DataFrame
colnames(results_df)[-1] <- c(
  paste0("Mean_Length_bayes_Model_", 1:M), 
  paste0("Mean_Coverage_bayes_Model_", 1:M),
  paste0("Mean_Length_cb_Model_", 1:M), 
  paste0("Mean_Coverage_cb_Model_", 1:M),
  "Mean_Length_bma_Model", "Mean_Coverage_bma_Model",
  "Mean_Length_cb_Method", "Mean_Coverage_cb_Method"
)

for (e in 1:rep_exp) {
  for (m in 1:M) {
    suffix <- sprintf("rep_exp_%d_model_%s.RDS", e, m) 
    
    # Store the mean values for individual models
    results_df[e, paste0("Mean_Length_bayes_Model_", m)] <- mean(readRDS(sprintf("results/length_bayes_%s.RDS", suffix)))
    results_df[e, paste0("Mean_Coverage_bayes_Model_", m)] <- mean(readRDS(sprintf("results/coverage_bayes_%s.RDS", suffix)))
    results_df[e, paste0("Mean_Length_cb_Model_", m)] <- mean(readRDS(sprintf("results/length_cb_%s.RDS", suffix)))
    results_df[e, paste0("Mean_Coverage_cb_Model_", m)] <- mean(readRDS(sprintf("results/coverage_cb_%s.RDS", suffix)))
  }
  
  # Calculate and store the mean values for BMA and CB method models
  
  results_df[e, "Mean_Length_bma_Model"] <- results_unique$mean_length_bayes_bma[e]
  results_df[e, "Mean_Coverage_bma_Model"] <- results_unique$mean_coverage_bayes_bma[e]
  results_df[e, "Mean_Length_cb_Method"] <- results_unique$mean_length_cb_bma[e]
  results_df[e, "Mean_Coverage_cb_Method"] <- results_unique$mean_coverage_cb_bma[e]
}

# Check the first few rows of the updated DataFrame
head(results_df)


write.csv(results_df, file = "results_combined_summary.csv", row.names = FALSE)
mean_values <- colMeans(results_df[,-1], na.rm = TRUE)  # Compute mean for each column except the first
sd_values <- apply(results_df[,-1], 2, sd, na.rm = TRUE)/sqrt(rep_exp)  # Compute SD for each column except the first
print(mean_values)


results_df <- read.csv(file = "results_combined_summary.csv")
mean_values <- colMeans(results_df[,-1], na.rm = TRUE)  # Compute mean for each column except the first
sd_values <- apply(results_df[,-1], 2, sd, na.rm = TRUE)/sqrt(nrow(results_df))  # Compute SD for each column except the first
print(mean_values)



# Melt the data for easier plotting with ggplot2
results_df_melt <- melt(results_df, id.vars = "Experiment")

# Dynamically create labels for models
bayes_labels <- paste0("B", 1:M)
cb_labels <- paste0("CB", 1:M)
model_labels <- c(bayes_labels, cb_labels, "BMA", "CPBMA")

# Create a custom label mapping for lengths
label_mapping_lengths <- c(
  setNames(bayes_labels, paste0("Mean_Length_bayes_Model_", 1:M)),
  setNames(cb_labels, paste0("Mean_Length_cb_Model_", 1:M)),
  "Mean_Length_bma_Model" = "BMA",
  "Mean_Length_cb_Method" =  "CPBMA" 
)

# Create a custom label mapping for coverages
label_mapping_coverages <- c(
  setNames(bayes_labels, paste0("Mean_Coverage_bayes_Model_", 1:M)),
  setNames(cb_labels, paste0("Mean_Coverage_cb_Model_", 1:M)),
  "Mean_Coverage_bma_Model"="BMA" ,
  "Mean_Coverage_cb_Method" = "CPBMA" 
)

# Define colors for each category in lengths
color_mapping_lengths <- c(
  setNames(rep("lightslateblue", M), paste0("Mean_Length_bayes_Model_", 1:M)),
  setNames(rep("royalblue", M), paste0("Mean_Length_cb_Model_", 1:M)),
  "Mean_Length_bma_Model" = "steelblue2",
  "Mean_Length_cb_Method" = "gold1"
)

# Define colors for each category in coverages
color_mapping_coverages <- c(
  setNames(rep("lightslateblue", M), paste0("Mean_Coverage_bayes_Model_", 1:M)),
  setNames(rep("royalblue", M), paste0("Mean_Coverage_cb_Model_", 1:M)),
  "Mean_Coverage_bma_Model" = "steelblue2",
  "Mean_Coverage_cb_Method" = "gold1"
)

# Plot for Mean Lengths with colors
mean_lengths <- ggplot(subset(results_df_melt, grepl("Mean_Length", variable)), aes(x = variable, y = value, fill = variable)) +
  geom_boxplot() +
  scale_x_discrete(labels = label_mapping_lengths) +
  scale_fill_manual(values = color_mapping_lengths) + # Apply custom colors
  geom_hline(yintercept = median(results_df$Mean_Length_cb_Method), linetype = "dashed", color = "red", linewidth=1.1) + # Add horizontal line at 0.80
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") + # Remove legend
  labs( x = "Method", y = "Mean Length")

# Plot for Mean Coverages with colors and horizontal line
mean_coverages <- ggplot(subset(results_df_melt, grepl("Mean_Coverage", variable)), aes(x = variable, y = value, fill = variable)) +
  geom_boxplot() +
  scale_x_discrete(labels = label_mapping_coverages) +
  scale_fill_manual(values = color_mapping_coverages) + # Apply custom colors
  geom_hline(yintercept = 0.80, linetype = "dashed", color = "red",linewidth=1.1) + # Add horizontal line at 0.80
  coord_cartesian(ylim = c(0.5, 1)) + # Set y-axis limits
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") + # Remove legend
  labs(x = "Method", y = "Mean Coverage")

# Display the plots
print(mean_lengths)
print(mean_coverages)


# Save the Mean Lengths plot
ggsave("MeanLengths_n0_100_a08.png", plot = mean_lengths, width = 10, height = 6, dpi = 300)

# Save the Mean Coverages plot
ggsave("MeanCoverage_n0_100_a08.png", plot = mean_coverages, width = 10, height = 6, dpi = 300)



# Create a table for mean and SD of lengths
lengths_summary <- data.frame(
  #Model = colnames(results_df)[grep("Length", colnames(results_df))],
  Mean = round(colMeans(results_df[, grep("Length", colnames(results_df))], na.rm = TRUE),3),
  SD = round(apply(results_df[, grep("Length", colnames(results_df))], 2, sd, na.rm = TRUE)/sqrt(rep_exp),3)
)

# Create a table for mean and SD of coverages
coverages_summary <- data.frame(
  #Model = colnames(results_df)[grep("Coverage", colnames(results_df))],
  Mean = round(colMeans(results_df[, grep("Coverage", colnames(results_df))], na.rm = TRUE),3),
  SD = round(apply(results_df[, grep("Coverage", colnames(results_df))], 2, sd, na.rm = TRUE)/sqrt(rep_exp),3)
)

# Display the summary tables
lengths_summary
coverages_summary


# Save lengths_summary as a CSV file
write.csv(lengths_summary, file = "lengths_summary.csv", row.names = FALSE)

# Save coverages_summary as a CSV file
write.csv(coverages_summary, file = "coverages_summary.csv", row.names = FALSE)












