defaults:
  - _self_
  - task: halfcheetah-medium-expert-v2

# General
pipeline_name: idql_d4rl
device: cuda:0
method: idql
mode: train
seed: 0

# Environment
env_name: kitchen
normalize_reward: True
discount: 0.99

max_episode_steps: ${max_episode_steps_per_env.${env_name}}
max_episode_steps_per_env:
  kitchen: 280
  mujoco: 1000
  antmaze: 1000

# IQL
iql_tau: 0.7

# Actor
solver: ddpm
diffusion_steps: 5
sampling_steps: 5
predict_noise: True
ema_rate: 0.9999
actor_learning_rate: 0.0003
actor_hidden_dim: 256
actor_n_blocks: 3
actor_dropout: 0.1

# Critic
critic_hidden_dim: 256
critic_learning_rate: 0.0003

# Wandb
wandb_mode: online
project: cdp_baselines
group: ${method}_${env_name}
exp_name: idql
log_dir: logs

# Training
gradient_steps: 100000
batch_size: 256
ema_update_interval: 5
log_interval: 200
save_interval: 50000
eval_interval: 10000

# Inference
ckpt: latest
num_envs: 5
num_episodes: 10
num_candidates: 100
temperature: 0.5
use_ema: True

# hydra
hydra:
  job:
    chdir: false
  run:
    dir: ${log_dir}/${pipeline_name}/${task.env_name}/hydra/${seed}
  output_subdir: null

