
#####################################################################################
## This script loads the outputs generated by the jupyter notebook
## "generate_synthetic_data_for_real_data_experiments_openml_cc18_categorical.ipynb"
## and evaluates the synthetic data w.r.t. fidelity and privacy metrics.
#####################################################################################

# source utility functions
source("utility_functions_for_miav_tabpfn_iclr.R")

library(arrow)

# path to the folders storing the saved outputs
data_path <- ""

# load a large feather dataset containing the original and holdout data splits
# for the categorical datasets
df_split <- read_feather(paste0(data_path, "openml_cc18_orig_hold_data_splits_categorical.feather"))

# load a large feather dataset containing the MIAV-TabPFN-based synthetic versions
# of each data split of the original data
df_tabpfn_miav <- read_feather(paste0(data_path, "openml_cc18_syn_miav_tabpfn_categorical.feather"))

# load a large feather dataset containing the JF-TabPFN-based synthetic versions
# of each data split of the original data
df_tabpfn_jf <- read_feather(paste0(data_path, "openml_cc18_syn_jf_tabpfn_categorical.feather"))

# load a large feather dataset containing the FC-TabPFN-based synthetic versions
# of each data split of the original data
df_tabpfn_fc <- read_feather(paste0(data_path, "openml_cc18_syn_fc_tabpfn_categorical.feather"))


# load a large feather dataset containing the MIAV-TabICL-based synthetic versions
# of each data split of the original data
df_tabicl_miav <- read_feather(paste0(data_path, "openml_cc18_syn_miav_tabicl_categorical.feather"))

# load a large feather dataset containing the JF-TabICL-based synthetic versions
# of each data split of the original data
df_tabicl_jf <- read_feather(paste0(data_path, "openml_cc18_syn_jf_tabicl_categorical.feather"))

# load a large feather dataset containing the FC-TabPFN-based synthetic versions
# of each data split of the original data
df_tabicl_fc <- read_feather(paste0(data_path, "openml_cc18_syn_fc_tabicl_categorical.feather"))


## evaluate the holdout datasets
set.seed(12345)
out_hold <- EvaluateHoldoutDataCat(df_split = df_split)

## evaluate the MIAV-TabPFN datasets
set.seed(12345)
out_tabpfn_miav <- EvaluateSyntheticDataCat(df_split = df_split,
                                            df_synth = df_tabpfn_miav)

## evaluate the MIAV-TabICL datasets
set.seed(12345)
out_tabicl_miav <- EvaluateSyntheticDataCat(df_split = df_split,
                                            df_synth = df_tabicl_miav)

## evaluate the JF-TabPFN datasets
set.seed(12345)
out_tabpfn_jf <- EvaluateSyntheticDataCat(df_split = df_split,
                                          df_synth = df_tabpfn_jf)

## evaluate the JF-TabICL datasets
set.seed(12345)
out_tabicl_jf <- EvaluateSyntheticDataCat(df_split = df_split,
                                          df_synth = df_tabicl_jf)

## evaluate the FC-TabPFN datasets
set.seed(12345)
out_tabpfn_fc <- EvaluateSyntheticDataCat(df_split = df_split,
                                          df_synth = df_tabpfn_fc)

## evaluate the FC-TabICL datasets
set.seed(12345)
out_tabicl_fc <- EvaluateSyntheticDataCat(df_split = df_split,
                                          df_synth = df_tabicl_fc)

save(out_hold, 
     out_tabpfn_miav, 
     out_tabicl_miav,
     out_tabpfn_jf, 
     out_tabicl_jf,
     out_tabpfn_fc, 
     out_tabicl_fc,
     file = "outputs_real_data_rexperiments_categorical.RData",
     compress = TRUE)
