_target_: consistency_policy.reward_guided_student_d4rl.guided_ctm_workspace_d4rl.GuidedCTMWorkspaceD4RL
checkpoint:
  save_last_ckpt: false
  save_last_snapshot: false
  topk:
    format_str: epoch={epoch:04d}-test_mean_scores={test_mean_scores:.3f}.ckpt
    k: 1
    mode: max
    monitor_key: test_mean_scores
dataloader:
  batch_size: 4800 
  # batch_size: 3200 
  num_workers: 8
  persistent_workers: false
  pin_memory: true
  shuffle: true
dataset_obs_steps: 4
ema:
  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
  inv_gamma: 1.0
  max_value: 0.9999
  min_value: 0.0
  power: 0.75
  update_after_step: 0
exp_name: default
horizon: 16
keypoint_visible_rate: 1.0
logging:
  group: null
  id: null
  mode: online
  name: default
  project: cm_policy_debug
  resume: false
multi_run:
  run_dir: data/outputs/cd/square/ctmpp
  wandb_name_base: dsm_test
n_action_steps: 1
n_latency_steps: 0
n_obs_steps: 1
name: train_diffusion_unet_hybrid
obs_as_global_cond: true
optimizer:
  _target_: torch.optim.AdamW
  betas:
  - 0.95
  - 0.999
  eps: 1.0e-08
  lr: 0.0001
  weight_decay: 1.0e-04
past_action_visible: false
policy:
  _target_: consistency_policy.reward_guided_student_d4rl.guided_ctm_policy_d4rl.GuidedCTMPUnetD4RLPolicy
  inference_mode: none # this will be populated automatically from training.inference_mode, do not set it here
  cond_predict_scale: true
  diffusion_step_embed_dim: 128
  down_dims:
  - 512
  - 1024
  - 2048
  reward_step_embed_dim: 32
  reward_down_dims:
  - 64
  - 128
  - 256
  dropout_rate: 0.2
  horizon: 16
  kernel_size: 5
  n_action_steps: 1
  n_groups: 8
  n_obs_steps: 1
  #extra args
  initial_ema_decay: 0.0
  delta: -1 #0.0068305197 # sqrt(160) * .00054
  special_skip: true
  chaining_times: ['D', 27, 54]
  #teacher
  teacher_path: ./Diffusion/outputs/edm/d4rl_cheetah_medium_replay/checkpoints/epoch=0850-test_mean_scores=0.541.ckpt
  #reward
  reward_path: ./Diffusion/outputs/reward/d4rl_cheetah_medium_replay/checkpoints/epoch=0035-val_loss=0.010.ckpt
  # reward_path: ./Diffusion/outputs/reward/d4rl_cheetah_medium_replay/checkpoints/epoch=0070-val_loss=0.098.ckpt
  # reward_path: ./Diffusion/outputs/reward/d4rl_cheetah_medium_replay/checkpoints/epoch=0030-val_loss=0.018.ckpt
  # reward_path: ./Diffusion/outputs/reward/d4rl_cheetah_medium_replay/checkpoints/epoch=0090-val_loss=0.026.ckpt
  #KDE
  use_kde: False
  kde_samples: 0
  #warm start
  edm: ./Diffusion/outputs/edm/d4rl_cheetah_medium_replay/checkpoints/epoch=0850-test_mean_scores=0.541.ckpt
  losses: [["ctm", "dsm", "rwd"], [8.0, 1.0, 1.0]]
  # losses: [["ctm", "dsm"], [1, 1]]
  # losses: [["rwd"], [0.01]]
  ctm_sampler: ctm
  dsm_weights: "karras"
  noise_scheduler:
    _target_: consistency_policy.diffusion.CTM_Scheduler
    time_min: 0.02
    time_max: 80.0
    rho: 7.0
    bins: 80
    solver: heun
    scaling: boundary
    use_c_in: true
    data_std: .5
    time_sampler: ctm
    clamp: true
    ode_steps_max: 1
  obs_as_global_cond: true
  # obs_encoder_group_norm: true
  shape_meta:
    action:
      shape:
      - 6
    observation:
      shape: 
      - 17
shape_meta:
    action:
      shape:
      - 6
    observation:
      shape: 
      - 17
task:
  abs_action: true
  dataset:
    _target_: diffusion_policy.dataset.d4rl_dataset.D4RLDataset
    env_name: 'halfcheetah-medium-replay-v2'
    horizon: 16
    pad_after: 0
    pad_before: 0
    seed: 42
    val_ratio: 0.0
  env_runner:
    _target_: diffusion_policy.env_runner.d4rl_lowdim_runner.D4RLLowdimRunner
    env_name: 'halfcheetah-medium-replay-v2'
    crf: 22
    fps: 80
    max_steps: 1000
    n_action_steps: 1
    n_envs: 1
    n_obs_steps: 1
    n_test: 20
    n_test_vis: 16
    n_train: 2
    n_train_vis: 2
    past_action: false
    test_start_seed: 100000
    tqdm_interval_sec: 1.0
  name: d4rl_cheetah_medium_replay
  shape_meta:
    action:
      shape:
      - 6
    observation:
      shape: 
      - 17
  task_name: d4rl_cheetah_medium_replay
task_name: d4rl_cheetah_medium_replay
training:
  inference_mode: false # Setting this to true disables the teacher, warm start, and training loop
  online_rollouts: true # Setting this to false disables the env_runner
  debug: false
  checkpoint_every: 5
  device: cuda:0
  gradient_accumulate_every: 1
  lr_scheduler: cosine
  lr_warmup_steps: 500
  max_train_steps: null
  max_val_steps: null
  num_epochs: 400
  resume: false
  resume_path: ???
  rollout_every: 5
  sample_every: 5
  seed: 42
  tqdm_interval_sec: 1.0
  use_ema: true
  val_every: 1
  val_sample_every: 5
  output_dir: ./Diffusion/outputs/guided_ctmp/d4rl_cheetah_medium_replay
  val_chaining_steps: 1
  p_epochs: 400
val_dataloader:
  batch_size: 1024
  num_workers: 8
  persistent_workers: false
  pin_memory: true
  shuffle: false