# skill_diffuser.yaml

planner:
  training:
    total_timesteps: 2000
    log_interval: 10
  inference:
    history: None
  params:
    policy: !!python/name:diffgro.experiments.skill_diffuser.SkillDiffuserPolicy ''
    batch_size: 8 # per task
  policy_kwargs: 
    horizon: 8
    lang_dim: 512
    skill_dim: 16
    activation_fn: "mish"
    lr_schedule:
      skill_prd: !!float 1e-5
      cond_diff: !!float 5e-3
      inv: !!float 5e-4 
    component_kwargs:
      plan:
        plan_horizon: 100
        n_denoise: 200
        beta_scheduler: "cosine"
        predict_epsilon: False
        lmbda: 0.01
        skill_prd_kwargs:
          emb_dim: 128
          n_heads: 4
          n_layers: 1
        vec_quant_kwargs: 
          n_codes: 20
          decay: 0.99
        cond_diff_kwargs:
          guidance_weight: 1.0
          denoise_type: "ddpm"
        temp_unet_kwargs:
          emb_dim: 128
          dim_mults: [1, 4, 8]
          attention: False
      inv:
        hid_dim: 256
        net_arch: [128, 128]