# experiment_setup.R
# This file contains the setup for the conformal prediction experiment, including
# package dependencies and parameters.

# Load required packages
library(predictionBands)
library(FlexCoDE)
library(FNN)
library(ggplot2)

# Experiment parameters
num_runs <- 2  # Number of experiment repetitions
sigma_value <- 1.5  # Smoothing parameter for cd_fourier
n <- 2000  # Number of training samples
n_new <- 5000  # Number of test samples
alpha <- 0.1  # Significance level for prediction bands
per_train <- 0.4  # Proportion of training data
per_val <- 0.1  # Proportion of validation data
per_ths <- 0.5  # Proportion of threshold data

# Data preparation
source("R/data_preparation.R")
# data <- prepare_data(
#   data_source = "simulated",
#   data_path = "data/housing.csv",
#   target_col = "median_house_value",
#   exclude_cols = "ocean_proximity",
#   n_samples = 10000,  # Total samples (n_total)
#   data_type = "multimodal_gaussian",
#   seed = 123,
#   d = 5,
#   n_components = 7,
#   mu_base = c(-15, -10, -5, 0, 5, 10, 15),
#   sigma = c(1, 1.2, 1.5, 1, 1.5, 1.2, 1),
#   plot_dist = TRUE  # Visualize the distribution
# )
data <- prepare_data(data_source = "external",
                         dataset_name = "bio",
                         n_samples = 10000,
                         data_type = "multimodal_gaussian",
                         seed = 123,
                         d = 5,
                         plot_dist = TRUE)
x_all <- data$x_all
y_all <- data$y_all