defaults:
  - _self_
  # value must be the name of the yaml file before the extension, ex: put "churn_modelling" for the file churn_modelling.yaml corresponding to the dataset Churn Modeling
  - dataset: vehicle_insurance_claim

  # value must be the name of the yaml file before the extension, ex: put "cttvae" for the file cttvae.yaml corresponding to the data generation method CTTVAE
  - datagen_method: cttvae
  - launcher: null
  
paths:
  # Path to folder where the raw data is stored
  raw_data_dir: data/raw/

  # Path to folder where the clean data is stored (no duplicates or missing values, etc)
  clean_data_dir: data/clean/

  # Path to folder where we store the train and test subsets
  processed_data_dir: data/processed/

  # Path to folder where we store the synthetic data generated by the methods
  synth_data_dir: data/synthetic/

  # Path to folder where we store the trained models and training metrics (loss  plot and values)
  datagen_methods_dir: data/datagen_methods/

  # Path to folder where we store the trained MLE models
  MLE_models_dir: data/MLE_models/

  # Path to folder where we store the results of the evaluation
  results_dir: data/results/

run_preprocessing: False

run_training: False

run_eval: True

# log in mlflow
mlflow: True 

# if empty, will generate number of samples equal to the number of rows in the original dataset
n_to_generate:

# Seed for reproducibility, leave empty for no seed
seed: 42

# hyperparameter optimization
optimize: False
n_trials: 25
optuna_supported_datagen_methods: ["ctgan", "tvae", "copulagan", "ctabgan", "ttvae", "ttvae_tbs", "cttvae", "cttvae_tbs"]
