# @package _global_
dataset:
  _target_: src.data.mimic_iii.semi_synthetic_dataset.MIMIC3SyntheticDatasetCollection      # Will be dynamically instantiated
  seed: ${exp.seed}
  name: mimic3_synthetic
  path: data/processed/all_hourly_data.h5                  # Path with MIMIC-3 dataset (HDFStore)
  min_seq_length: 20                                       # Min sequence length in cohort
  max_seq_length: 100                                      # Max sequence length in cohort
  max_number: 2000 # 1000                                         # Maximum number of patients in cohort
  data_seed: ${exp.seed}
  projection_horizon: 1                                   # Range of tau-step-ahead prediction (tau = projection_horizon + 1)
  n_treatments_seq: 1                                     # Number of random treatments trajectories, sampled for each patient
  split:
    val: 0.1
    test: 0.2
  val_batch_size: 512                                      # Batch size for evaluation
  treatment_mode: multilabel
  autoregressive: True
  instance_noise_std: 0
  type_static_feat: null # 'true' # 'true', 'rep' or null

  vital_list:
    - heart rate
    - red blood cell count
    - sodium # 2
    - mean blood pressure
    - systemic vascular resistance
    - glucose # 5
    - chloride urine
    - glascow coma scale total
    - hematocrit
    - positive end-expiratory pressure set
    - respiratory rate
    - prothrombin time pt
    - cholesterol
    - hemoglobin
    - creatinine
    - blood urea nitrogen
    - bicarbonate
    - calcium ionized
    - partial pressure of carbon dioxide
    - magnesium
    - anion gap
    - phosphorous
    - venous pvo2
    - platelets
    - calcium urine
  static_list:
    - gender
    - ethnicity
    - age
  drop_first: False                                     # One-hot encoding of categorical static features

  synth_outcomes_list:
    - _target_: src.data.mimic_iii.SyntheticOutcomeGenerator
      exogeneous_vars:
        - heart rate
        - blood urea nitrogen
        - glucose
        - sodium
        - chloride urine
        - glascow coma scale total
        - hematocrit
        - positive end-expiratory pressure set
        - respiratory rate
        - prothrombin time pt
        - cholesterol
        - hemoglobin
      exog_dependency:
        _target_: src.data.mimic_iii.utils.RandomFourierFeaturesFunction
        input_dim: 12
        gamma: 0.005
        scale: 40.0
      exog_dependency_static_feat:
        _target_: src.data.mimic_iii.utils.RandomSVDFeaturesFunction
        input_dim: 20
        loc: 10
        scale: 1
      exog_weight: 0.9
      endo_dependency:
        _target_: src.data.mimic_iii.utils.DiscretizedRandomGPFunction
        kernels:
          - _target_: sklearn.gaussian_process.kernels.Matern
            length_scale: 25.0
            nu: 2.5
          - _target_: sklearn.gaussian_process.kernels.WhiteKernel
            noise_level: 0.005
      endo_rand_weight: 0.4
      endo_spline_weight: 2.0
      outcome_name: y1

  synth_treatments_list:
    - _target_: src.data.mimic_iii.SyntheticTreatment
      confounding_vars:
        - heart rate
        - blood urea nitrogen
        - glucose
        - sodium
        - chloride urine
        - glascow coma scale total
        - hematocrit
        - positive end-expiratory pressure set
        - respiratory rate
        - prothrombin time pt
        - cholesterol
        - hemoglobin
      confounder_outcomes:
        - y1
      confounding_dependency:
        _target_: src.data.mimic_iii.utils.RandomFourierFeaturesFunction
        input_dim: 12
        gamma: 0.5
        scale: 30.0
      window: 3
      conf_outcome_weight:  0.1
      conf_vars_weight: 0.4
      bias: 0.0
      full_effect: 2.0
      effect_window: 20
      treatment_name: t1

  treatment_outcomes_influence:                                               # dict with treatment-outcomes influences
    y1:
    - t1
exp:
  unscale_rmse: False
  percentage_rmse: False
