# Script to gather all data files and write their paths to text files, with a file for each (scm_type, n_nodes)
# combination.

create_file_if_not_exists <- function(file_path) {
  if (!file.exists(file_path)) {
    dir.create(dirname(file_path), recursive = TRUE, showWarnings = FALSE)
    file.create(file_path)
  }
}

create_and_open_file <- function(type, file_path_root = "R/experiments/jobs/gaussian/causal_discovery/") {
  file_path <- sprintf("%s/data_paths_%s.txt", file_path_root, type)
  create_file_if_not_exists(file_path)
  file_conn <- file(file_path, open = "w")
  return(list(file_path = file_path, file_conn = file_conn))
}


base_dir <- "experiments/3_data/gaussian"


file_connections <- list()

# Ensure file connection for this node count and type exists
file_key <- "gaussian_all_except_cd"
if (!file_key %in% names(file_connections)) {
  file_connections[[file_key]] <- create_and_open_file(file_key)
}

n_nodes_folders <- list.dirs(base_dir, recursive = FALSE, full.names = TRUE)
for (n_nodes_folder in n_nodes_folders) {
  # Extract n_nodes
  n_nodes <- as.integer(gsub("\\D", "", basename(n_nodes_folder)))
  n_nodes_folder_base <- basename(n_nodes_folder)

  dag_type_folders <- list.dirs(n_nodes_folder, recursive = FALSE)
  for (dag_type_folder in dag_type_folders) {
    dag_type <- basename(dag_type_folder)
    for (file_path in list.files(dag_type_folder, full.names = TRUE)) {
      if (grepl(".rds$", file_path)) {
        writeLines(file_path, file_connections[[file_key]]$file_conn)
      }
    }
  }
}


# Close all file connections
for (conn in file_connections) {
  close(conn$file_conn)
}
