defaults:
  - _self_
  - task: antmaze-medium-play-v0

pipeline_name: TDP_antmaze
mode: train
seed: 0
device: cuda:0

# Environment
normalize_reward: True
discount: 0.995

# Actor
solver: ddpm
diffusion_steps: 5
predict_noise: True
ema_rate: 0.995
actor_learning_rate: 0.0003

# Critic
hidden_dim: 256
critic_learning_rate: 0.0003
num_q: 10

# Training
gradient_steps: 2000000
batch_size: 256
ema_update_interval: 5
log_interval: 1000
save_interval: 100000
eval_interval: 10000

q_target: lcb

num_a_train: 10
max_q_backup: True


# Inference
ckpt: latest
num_envs: 50
num_episodes: 3
num_candidates: 50
temperature: 0.5
use_ema: True

# hydra
hydra:
  job:
    chdir: false

