obs_dim: ??? # to be specified later
action_dim: ??? # to be specified later
device: ${device}
num_train_tasks: 20
use_layernorm: False # All networks

# encoder
use_rnn_encoder: False 
use_next_obs_in_context: True # True for dynamic changing envs
use_information_bottleneck: False # Prob context encoder
latent_dim: 5
enc_hidden_dim: 256
enc_hidden_depth: 2
use_tanh: True
# Auxilary loss for encoder
# contrastive
use_focal: True 
focal_loss_weight: 1 
use_infonce: False
infonce_weight: 1
infonce_tau: 0.005
infonce_radius: 100.0
# CLUB upper bound for MI(z, behavior policy)
use_club: False
club_loss_weight: 0.5
# decoder
use_decoder: False
decoder_hidden_dim: 256
decoder_hidden_depth: 2
decoder_weight: 1.0
# KL divergence if stochastic encoder
kl_weight: 1.0
# L2 regularization
use_l2_reg: False
l2_reg_weight: 0.01
# Actor
actor_hidden_dim: 256 
actor_hidden_depth: 2
log_std_min: -5
log_std_max: 2
# Critic
q_hidden_dim: 256
q_hidden_depth: 2
dropout: 0.0
# Omplicit q-learning (IQL)
expectile: 0.8
temperature: 3.0
discount: 0.99
tau: 0.005
# optims
encoder_lr: 1e-4
actor_lr: 3e-4
critic_lr: 3e-4
club_lr: 1e-4
# accelerate
compile: False
cuda_graph: False

