defaults:
  - _self_
  - task: halfcheetah-medium-expert-v2

pipeline_name: dd_d4rl_mujoco
mode: stitch
rl_mode: original
seed: 0
device: cuda

# Environment
terminal_penalty: -100

rl_terminal_penalty: null
discount: 0.997
# DD
solver: ddim
## Network Architecture
emb_dim: 128
d_model: 320
n_heads: 10
depth: 2
label_dropout: 0.25
sampling_steps: 20
predict_noise: True
next_obs_loss_weight: 10.
ema_rate: 0.9999

# Training
diffusion_gradient_steps: 1000000
invdyn_gradient_steps: 1000000
rl_gradient_steps: 1000000
batch_size: 64
rl_batch_size: 256
log_interval: 1000
save_interval: 100000
rl_log_interval: 20000
vf_lr: 0.0003
qf_lr: 0.0003
actor_lr: 0.0003

# Inference
diffusion_ckpt: latest
invdyn_ckpt: latest
num_envs: 50
num_episodes: 10
temperature: 0.5
use_ema: true

# stitch
# stitch_batch_size: 1024
stitch_batch_size: 1024
top_k_R: 64
top_k_ood: 256
# stitch_round: 3000
stitch_round: 1000
add_noise: 0.0
stitch_trajectory_name:  "filter:${top_k_ood}_${top_k_R}"
#stitch_batch_size: 1000
#top_k: 1000
#stitch_round: 1


# rl_agent
actor_dropout: null
iql_deterministic: false
iql_tau: 0.7
beta: 4.0
tau: 0.005
n_episodes: 3
rl_horizon: 5

# hydra
hydra:
  job:
    chdir: false

