defaults:
  - _self_
  - task: bullet-halfcheetah-expert-v0

pipeline_name: diql_d4rl_mujoco
mode: train
seed: 0
device: cuda:0

# Environment
normalize_reward: True
discount: 0.99

# IQL
iql_tau: 0.7

# Actor
solver: ddpm
diffusion_steps: 5
sampling_steps: 5
predict_noise: True
ema_rate: 0.9999
actor_learning_rate: 0.0003
actor_hidden_dim: 256
actor_n_blocks: 3
actor_dropout: 0.1

# Critic
critic_hidden_dim: 256
critic_learning_rate: 0.0003

# Training
gradient_steps: 200000
batch_size: 256
log_interval: 400
save_interval: 400
retrain_classifier_step: 100000
# Inference
ckpt: latest
num_envs: 50
num_episodes: 3
num_candidates: 256
temperature: 0.5
use_ema: True
weight_temperature: 0.5

# hydra
hydra:
  job:
    chdir: false
experiment_name: default_exp
    # ✅ 允许 sbatch 传入不同名字保存不同目录

hidden_dim: 256
