defaults:
- _self_
- task: antmaze-medium-play-v2
pipeline_name: idql_d4rl_antmaze
mode: inference
seed: 0
device: cuda:0
discount: 0.99
iql_tau: 0.9
solver: ddpm
diffusion_steps: 5
sampling_steps: 5
predict_noise: true
ema_rate: 0.9999
actor_learning_rate: 0.0003
actor_hidden_dim: 256
actor_n_blocks: 3
actor_dropout: 0.1
critic_hidden_dim: 256
critic_learning_rate: 0.0003
gradient_steps: 2000000
batch_size: 256
log_interval: 1000
save_interval: 400
ckpt: latest
num_envs: 50
num_episodes: 3
num_candidates: 256
temperature: 0.5
use_ema: true
hydra:
  job:
    chdir: false

retrain_classifier_step: 100000
