# @package _global_
dataset:
  max_number: 1000
model:
  name: Causal_CPC
  activation: selu
  encoder:                              # Missing hyperparameters are to be filled in command line / with tune_hparams = True / selected with +backbone/crn_hparams=...
    _target_: src.models.causal_cpc.Causal_CPCEncoder
    br_size: 18
    fc_hidden_units: 12                 # fc_hidden_units <= br_size  used in buiding  treatment  and outcome heads
    genc_hidden: 26                     # hidden dim of local features genc_hidden 
    context_latent_dim: 18              # dim of features summary 
    downsampling_factor: 1
    subsample_win_ratio: 0.05
    dropout_rate: 0.1              # Dropout of LSTM hidden layers + output layers
    num_layer: 1

    use_causalconv: False
    input_channels: 1
    hidden_channels: 16 
    kernel_size: 4
    dilation: 1
    
    batch_size: 32
    optimizer:
      non_treatment_head: 
        optimizer_cls: adamw
        learning_rate: 0.001 # instead of 0.005
        lr_scheduler: False

      treatment_head:
        optimizer_cls: sgd
        learning_rate: 0.01 # instead of 0.005
        momentum: 0.9
        lr_scheduler: False
    
    use_attention: False

    tune_hparams: False                 # Hparam tuning
    tune_range: 50
    hparams_grid: 
    resources_per_trial:

  train_decoder: True
  
  decoder:                                # Missing hyperparameters are to be filled in command line / with tune_hparams = True / selected with +backbone/crn_hparams=...
    _target_: src.models.causal_cpc.Causal_CPCDecoder
    seq_hidden_units: ${model.encoder.br_size}           # rnn_hidden_units in the original terminology should be equal to encoder.br_size
    br_size: 18                         # preferabvle to be smaller than that of encoder 
    fc_hidden_units: 12                 # fc_hidden_units <= br_size  used in buiding  treatment  and outcome heads used only in teacher_forcing
    dropout_rate:  0.1                  # Dropout of LSTM hidden layers + output layers
    num_layer: 1
    batch_size: 32
    y_dist_type: "continuous"
    teacher_forcing: False
    treat_hidden_dim: 8                

    optimizer:
      non_treatment_head: 
        optimizer_cls: adamw
        learning_rate: 0.005 # instead of 0.005
        lr_scheduler: False

      treatment_head:
        optimizer_cls: sgd
        learning_rate: 0.01 # instead of 0.005
        momentum: 0.9
        lr_scheduler: False

    tune_hparams: False                   # Hparam tuning
    tune_range: 30
    hparams_grid:
    resources_per_trial:

exp:
  weights_ema: False
  balancing: mutual_info
  alpha_recons: 0.1
  alpha_infonce: 0.5
  alpha_mse: 10
  label_smoothing: 0
  use_spectral_norm: True
  max_epochs: 1000 

  encoder: 
    early_stopping: 
      monitor: "val/loss"
      min_delta : 0.0001
      patience: 100
      verbose: False
      mode: "min"
  
  decoder: 
    early_stopping: 
      monitor: "val/loss"
      min_delta : 0.0001
      patience: 50
      verbose: False
      mode: "min"





