work_dir: &work_dir logs/full_seen
deterministic: False

data_dir: /path/to/AGD20K

exo_obj_file: /path/to/seen/obj_exo.pth
ego_obj_file: /path/to/seen/obj_ego.pth

load:
  encoder_ckpt: /path/to/ViT-B-16.pt
  all_ckpt: 

batch_size: 10
img_size: 224
split_type: Seen
num_exo: 1
PL_mode: refined
aug4imgRatio: 0.5

optimizer:
  lr: 0.0001
  lr_encoder_coeff: 0.1
  betas:
    - 0.9
    - 0.95
  wd: 0.01

  max_iter: 5000000
  lr_step:  5000000
  lr_gamma: 0.1
  num_epochs: 40
  accum_iter: 2
  sche_type: step

model:
  encoder_type: CLIP
  encoder_params:
    width: 768
    layers: 12
    heads: 12
    output_dim: 512
  decoder_embed_dim: 512

  pred_model_type: SAM
  pred_decoder_args:
    mlp_dim: 2048
    depth: 2
    use_up: 2
    use_additional_token: True
    conv_first: True

  margin: 0.1

loss:
  kl_loss_coeff: 1.0
  sim_loss_coeff: 10.
  exo_cls_coeff: 10.
  noun_sim_coeff: 1.
  part_sim_coeff: 0.1
