defaults:
  - _self_
  - task: datacollect_kitchen_lowdim_abs #kitchen_lowdim_abs

name: train_diffusion_transformer_lowdim
_target_: diffusion_policy.workspace.datacollect_diffusion_transformer_lowdim_workspace.DatacollectDiffusionTransformerLowdimWorkspace #_type1_, _type2_
checkpoint_dir: 'data/experiments/low_dim/kitchen/diffusion_policy_transformer/base_0.5/seed=42.ckpt'

obs_dim: ${task.obs_dim}
action_dim: ${task.action_dim}
task_name: ${task.name}
exp_name: "default"

gamma: 0.997
horizon: 16
n_obs_steps: 4
n_action_steps: 8
n_latency_steps: 0
past_action_visible: False
keypoint_visible_rate: 1.0
obs_as_cond: True
pred_action_steps_only: False

policy:
  _target_: diffusion_policy.policy.dpo_diffusion_transformer_lowdim_policy.DiffusionTransformerLowdimPolicy

  model:
    _target_: diffusion_policy.model.diffusion.transformer_for_diffusion.TransformerForDiffusion
    input_dim: ${eval:'${action_dim} if ${obs_as_cond} else ${obs_dim} + ${action_dim}'}
    output_dim: ${policy.model.input_dim}
    horizon: ${horizon}
    n_obs_steps: ${n_obs_steps}
    cond_dim: ${eval:'${obs_dim} if ${obs_as_cond} else 0'}

    n_layer: 8
    n_head: 4
    n_emb: 768
    p_drop_emb: 0.0
    p_drop_attn: 0.1

    causal_attn: True
    time_as_cond: True # if false, use BERT like encoder only arch, time as input
    obs_as_cond: ${obs_as_cond}
    n_cond_layers: 0 # >0: use transformer encoder for cond, otherwise use MLP
  
  noise_scheduler:
    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
    num_train_timesteps: 100
    beta_start: 0.0001
    beta_end: 0.02
    beta_schedule: squaredcos_cap_v2
    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
    clip_sample: True # required when predict_epsilon=False
    prediction_type: epsilon # or sample

  horizon: ${horizon}
  obs_dim: ${obs_dim}
  action_dim: ${action_dim}
  n_action_steps: ${n_action_steps}
  n_obs_steps: ${n_obs_steps}
  num_inference_steps: 100
  obs_as_cond: ${obs_as_cond}
  pred_action_steps_only: ${pred_action_steps_only}
  train_time_samples: 1
  gamma: ${gamma}

  # scheduler.step params
  # predict_epsilon: True

ema:
  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
  update_after_step: 0
  inv_gamma: 1.0
  power: 0.75
  min_value: 0.0
  max_value: 0.9999

training:
  device_gpu: "cuda:0"
  device_cpu: "cpu"
  seed: 42 #42 #1337 #3407
  debug: False
  resume: True
  # optimization
  lr_scheduler: cosine
  # Transformer needs LR warmup
  lr_warmup_steps: 0 #change
  num_epochs: 10 #change
  gradient_accumulate_every: 1
  use_ema: True
  # training loop control
  # in epochs
  rollout_every: 1000 #change
  checkpoint_every: 1000 #change
  val_every: 1
  sample_every: 5
  # steps per epoch
  max_train_steps: null
  max_val_steps: null
  # misc
  tqdm_interval_sec: 1.0

optimizer:
  learning_rate: 2.0e-5 # 1e-4
  weight_decay: 1.0e-4
  betas: [0.9, 0.99]

logging:
  project: diffusion_policy_debug
  resume: True
  mode: online
  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
  tags: ["${name}", "${task_name}", "${exp_name}"]
  id: null
  group: null


multi_run:
  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}

hydra:
  job:
    override_dirname: ${name}
  run:
    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
  sweep:
    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
    subdir: ${hydra.job.num}

