defaults:
    - override hydra/launcher: submitit_local

# environment
task: cheetah-run
obs: state

# evaluation
checkpoint: ???
eval_episodes: 10
eval_freq: 50000

# training
steps: 40_000_000
batch_size: 256 # 256
alpha: 1e-4
reward_coef: 0.1
value_coef: 0.1
consistency_coef: 20
rho: 0.5
lr: 3e-4
enc_lr_scale: 0.3
grad_clip_norm: 20
tau: 0.01
discount_denom: 5
discount_min: 0.95
discount_max: 0.995
buffer_size: 200_000 
exp_name: default
data_dir: ???
scheduler_step: 5000000
scheduler_gamma: 0.1

# planning
mpc: true
iterations: 6
num_samples: 512
num_elites: 64
num_pi_trajs: 24
horizon: 3 # 3
min_std: 0.05
max_std: 2
temperature: 0.5

# actor
log_std_min: -10
log_std_max: 2
entropy_coef: 1e-4 # 1e-4
pretrain_steps: 100000
reg_coef: 0

# critic
num_bins: 101 # 101
vmin: -10 # -10
vmax: +10 # +10
use_v0: true
use_grad_pen: false

# architecture
model_size: ???
num_enc_layers: 2
enc_dim: 256
num_channels: 32
mlp_dim: 512
latent_dim: 512 # 512
task_dim: 96
num_q: 5
dropout: 0.01
simnorm_dim: 8 # 8

# logging
wandb_project: ???
wandb_entity: ???
wandb_silent: false
disable_wandb: false
save_csv: true

# misc
save_video: true
save_agent: true
seed: 1

# rnd
num_target: 5
reward_ema_coef: 0.9
rnd_dim: 256
bias_scale: 1
expert_coef: 0.8
reward_mlp_dim: 1024
num_reward_layers: 3

# convenience
load_pretrained: false # false
work_dir: ???
task_title: ???
multitask: ???
tasks: ???
obs_shape: ???
action_dim: ???
episode_length: ???
obs_shapes: ???
action_dims: ???
episode_lengths: ???
seed_steps: ???
bin_size: ???
