# environment
modality: 'state'
action_repeat: 4
discount: 0.99
max_steps: 1000/${action_repeat}
train_steps: 1000000/${action_repeat}

# planning
iterations: 6
num_samples: 512
num_elites: 64
mixture_coef: 0.05
min_std: 0.05
temperature: 0.5
momentum: 0.1
use_q: true

# learning
batch_size: 512
max_buffer_size: 1000000
horizon: 25
horizons: [25, 25, 25]
rho: 0.5
kappa: 0.1
lr: 1e-3
std_schedule: linear(0.5, ${min_std}, 25000)
horizon_schedule: linear(1, ${horizon}, 25000)
per_alpha: 0.6
per_beta: 0.4
grad_clip_norm: 10
seed_steps: 5000
update_freq: 2
tau: 0.01

# architecture
enc_dim: 256
vae_enc_dim: 64
mlp_dim: 512
latent_dim: 50

# misc
seed: 1
exp_name: default
eval_freq: 20000
eval_episodes: 20
save_video: true
save_model: true
save_heatmaps: false
first_step_qr_only: true