
# architecture
model_name: tap_tdmpc
num_enc_layers: 2
enc_dim: 256
task_dim: 96
num_q: 2
latent_action_dim: 2
mlp_dim: 256 # 512
latent_dim: ???
action_dim: ???
multitask: ???
tasks: ???
# obs_shapes: ???
# action_dims: ???
num_bins: 0
vmin: -10
vmax: +10

# planning
mpc: true
iterations: 6
num_samples: 512
num_elites: 64
num_pi_trajs: 24
horizon: 3 # 5
min_std: 0.05
max_std: 2.0
temperature: 0.5

# training 
batch_size: 512 # 256 when imitating tdmpc2
reward_coef: 0.5
value_coef: 0.1
consistency_coef: 0.0
vae_coef: 0.05
lr: 1e-4
grad_clip_norm: 10
rho: 0.5
num_noises: 10
std_noises: 0.5
temperature_noises: 0.5
loss_type: kl
