optimizer :
  type: AdamW
  kwargs:
    lr: 0.001
    weight_decay: 0.05


scheduler:
  type: CosLR
  kwargs:
    epochs: 300
    initial_epochs : 10


dataset:
  train:
    _base_: cfgs/dataset_configs/ShapeNet-55.yaml
    others: 
      subset: train
      npoints: 1024
      whole: True
  val:
    _base_: cfgs/dataset_configs/ModelNet40.yaml
    others: 
      subset: test
  extra_train:
    _base_: cfgs/dataset_configs/ModelNet40.yaml
    others: 
      subset: train


model:
  NAME: ACT_PointDistillation
  loss: cosine

  transformer_config:
    mask_ratio: 0.8
    mask_type: 'rand'
    proj: linear
    embed_dim: 384
    encoder_dims: 384
    depth: 12
    drop_path_rate: 0.1
    cls_dim: 512
    replace_pob: 0.
    num_heads: 6
    decoder_depth: 2
    decoder_num_heads: 6
    return_all_tokens: False
    cls_loss: False
    register_shallow_hook: 9

  dvae_config:
    # ViT
    visual_embed_type: vit_base_patch16_384
    # visual_embed_type: vit_small_patch16_384
    # visual_embed_type: vit_base_r50_s16_224_in21k
    # visual_embed_type: vit_base_r50_s16_384
    # DEiT
    # visual_embed_type: deit_small_distilled_patch16_224
    # visual_embed_type: deit_base_distilled_patch16_384
    # BEiT
    # visual_embed_type: beit_base_patch16_384
    # visual_embed_type: beit_large_patch16_512
    # CLIP
    # visual_embed_type: CLIP:ViT-B/16
    # MLP-Mixer
    # visual_embed_type: mixer_b16_224_in21k
    # visual_embed_type: mixer_b16_224_miil
    # visual_embed_type: resmlp_24_distilled_224
    # BERT:
    # visual_embed_type: 
    # visual_embed_dim: 384
    visual_embed_dim: 768
    visual_embed_pos: after_dgcnn1
    freeze_visual_embed: true
    num_prompt_token: 64
    # num_prompt_token: 132 # MLP-Mixer
    use_deep_prompt: true
    num_group: 64
    group_size: 32
    encoder_dims: 384
    num_tokens: 8192
    tokens_dims: 384
    decoder_dims: 384
    ckpt: 'model_zoo/ckpt_act_dvae.pth' # set the dVAE weight here

total_bs: 128
step_per_update: 1
max_epoch: 300

consider_metric: CDL1
