defaults:
  - _self_
  - override hydra/hydra_logging: disabled  
  - override hydra/job_logging: disabled  
# env
env_name: ant-dir
max_episode_steps: 200
num_tasks: 40
dataset_size: 200000
# train
seed: 0
epochs: 100
iter_per_epoch: 5000
batch_size: 64
seq_len: 5
context_batch_size: 64
device: 'cuda:0'
meta_batch_size: 16
save_every: 10 #
# eval 
num_eval_episodes: 3
num_context: 600
random_init_z: True 
random_np_steps: 100 
resample_every: ${max_episode_steps} 
# logging
use_wandb: true
project_name: C-DCWM
group: ${env_name}-${agent.name}
run_name: ${env_name}-${agent.name}-${seed}-${now:%Y-%m-%d_%H-%M-%S}
log_dir: ./logs/${env_name}/${agent.name}/seed${seed}
log_all: True
save_video: False

agent:
  name: MetaWM_${agent.world_model}
  obs_dim: ??? # to be specified later
  action_dim: ??? # to be specified later
  device: ${device}
  num_train_tasks: 20
  # context encoder
  use_next_obs_in_context: False # True for dynamic changing envs
  z_dim: 5
  context_hidden_dims: 256
  context_hidden_depth: 2
  fsq_context_levels: [5, 3]
  use_context_fsq: False # finite scaler quantization after encoder
  use_context_fsq_mean: False # finite scaler quantization after taking the mean
  use_focal: False
  focal_weight: 1.0
  use_tanh: True
  use_infonce: True
  infonce_weight: 1.0
  infonce_tau: 0.005
  infonce_radius: 100.0
  use_l2_context_norm: False
  # world model
  world_model: discrete_ce
  horizon: ${seq_len}
  enc_hidden_dim: 512
  enc_hidden_depth: 1
  model_hidden_dim: 512
  model_hidden_depth: 2
  num_rewards: 1
  latent_dim: 512
  fsq_levels: [5, 3]
  use_tar_enc: True
  unc_prop_mode: "sample"
  model_update_freq: 2
  consistency_coef: 1.0
  reward_coef: 1.0
  allow_z_model: True
  rho: 0.99
  simnorm_dim: 8
  # actor
  actor_hidden_dim: 256 
  actor_hidden_depth: 2
  log_std_min: -5
  log_std_max: 2
  # critic
  q_hidden_dim: 256
  q_hidden_depth: 2
  num_critics: 2
  # implicit q-learning
  expectile: 0.8
  temperature: 3.0
  discount: 0.99
  tau: 0.005
  # optims
  model_lr: 1e-4
  context_enc_lr: 1e-4
  actor_lr: 3e-4
  critic_lr: 3e-4
  # palnning
  mpc: False 
  plan_horizon: 3
  iterations: 6
  num_samples: 512
  num_elites: 64
  num_pi_trajs: 24
  min_std: 0.05
  max_std: 2
  plan_temperature: 0.5
  plan_unc_prop_mode: "sample-no-grad"
  mppi_use_mean: False
  plan_with_value: False
  reward_mode: 'min' # avg, min, std_penalty
  reward_penalty: 0
  # accelerate
  compile: False
  cuda_graph: False

# hydra configuration    
hydra:  
  output_subdir: null  
  run:  
    dir: .
  