# environment
task: quadruped-run
modality: 'state'
action_repeat: ???
discount: 0.99
episode_length: 1000/${action_repeat}
train_steps: 500000/${action_repeat}

# planning
iterations: 6
num_samples: 512
num_elites: 64
mixture_coef: 0.05
min_std: 0.05
temperature: 0.5
momentum: 0.1

# learning
batch_size: 512
max_buffer_size: 1000000
horizon: 5
reward_coef: 0.5
value_coef: 0.1
consistency_coef: 2
rho: 0.5
kappa: 0.1
lr: 1e-3
std_schedule: linear(0.5, ${min_std}, 25000)
horizon_schedule: linear(1, ${horizon}, 25000)
per_alpha: 0.6
per_beta: 0.4
grad_clip_norm: 10
seed_steps: 5000
update_freq: 2
tau: 0.01

# architecture
enc_dim: 256
mlp_dim: 512
latent_dim: 50

# wandb (insert your own)
use_wandb: False
wandb_project: tdmpc_metaworld
wandb_entity: 

# misc
seed: 1 
exp_name: default
eval_freq: 100000
eval_episodes: 10
save_video: true
save_model: true

# text conditioning
text_prompt: ''

# guidance parameters
noise_level: 500 #600 #350 #450
alignment_scale: 2000 #5 #2000 #4000 #2000 #100
recon_scale: 200 #0 #200 #200 #500 #0.5

# reward method
reward_method: "sd-tadpole" #["sd-tadpole" "clip" "sparse"]
sparse_scale: 1
