defaults:
  - _self_
  - task: lift_image

name: pbrl_robomimic_image
_target_: diffusion_policy.workspace.pbrl_robomimic_image_workspace.PbrlRobomimicImageWorkspace
checkpoint_dir: 'data/experiments/image/robomimic/lift/lstm-gmm/epoch=0026-test_mean_score=0.520.ckpt'

task_name: ${task.name}
shape_meta: ${task.shape_meta}
exp_name: "default"

gamma: 0.999
horizon: &horizon 10
n_obs_steps: 1
n_action_steps: 1
n_latency_steps: 0
dataset_obs_steps: *horizon
past_action_visible: False
keypoint_visible_rate: 1.0

policy:
  _target_: diffusion_policy.policy.dpo_robomimic_image_policy.RobomimicImagePolicy #cpl_, dpo_
  shape_meta: ${shape_meta}
  algo_name: bc_rnn
  obs_type: image
  # oc.select resolver: key, default
  task_name: ${oc.select:task.task_name,lift}
  dataset_type: ${oc.select:task.dataset_type,ph}
  crop_shape: [76,76]
  gamma: ${gamma}
  beta: 0.001 #cpl:0.0025; dpo:0.001
  bias_reg: 0.25
  horizon: ${horizon}
  n_obs_steps: ${n_obs_steps}
  n_action_steps: ${n_action_steps}

dataloader:
  batch_size: 12 #cpl:20
  num_workers: 16
  shuffle: True
  pin_memory: True
  persistent_workers: False
  drop_last: True

val_dataloader:
  batch_size: 12
  num_workers: 16
  shuffle: False
  pin_memory: True
  persistent_workers: False
  drop_last: True

optimizer:
  learning_rate: 0.00001 # 1e-4
  weight_decay: 0.1
  betas: [0.9, 0.95]

training:
  device_cpu: "cpu"
  device_gpu: "cuda:0"
  seed: 42
  debug: False
  resume: True
  lr_scheduler: cosine
  # Transformer needs LR warmup
  lr_warmup_steps: 0
  # optimization
  num_epochs: 150
  gradient_accumulate_every: 1
  # training loop control
  # in epochs
  grad_norm_clip: 0.5
  rollout_every: 10
  checkpoint_every: 10
  val_every: 1
  sample_every: 5
  # steps per epoch
  max_train_steps: null
  max_val_steps: null
  # misc
  tqdm_interval_sec: 1.0
  dataset_1_dir: 'data/robomimic/datasets/lift/noraml/robomimic_data_0.4.h5'
  dataset_2_dir: 'data/robomimic/datasets/lift/noraml/robomimic_data_0.7.h5'
  online:
    num_groups: 4
    all_votes: 100
    reverse_ratio: 0.2
    reverse_rate: 0.5
    reverse_freq: 1
    update_history: False

logging:
  project: diffusion_policy_debug
  resume: True
  mode: online
  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
  tags: ["${name}", "${task_name}", "${exp_name}"]
  id: null
  group: null

checkpoint:
  topk:
    monitor_key: test_mean_score
    mode: max
    k: 5
    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
  save_last_ckpt: True
  save_last_snapshot: False

multi_run:
  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}

hydra:
  job:
    override_dirname: ${name}
  run:
    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
  sweep:
    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
    subdir: ${hydra.job.num}
