model:
  NAME: BaseCls
  encoder_args:
    NAME: PointViT
    in_channels: 3 
    embed_dim: 384
    depth: 12
    num_heads: 6
    mlp_ratio: 4.
    drop_rate: 0.
    attn_drop_rate: 0.1
    drop_path_rate: 0.0
    qkv_bias: True
    act_args:
      act: 'gelu'
    norm_args:
      norm: 'ln'
      eps: 1.0e-6
    embed_args:
      NAME: P3Embed
      feature_type: 'dp_df'
      reduction: 'max'
      sample_ratio: 0.25 
      normalize_dp: False 
      group_size: 32
      subsample: 'fps'
      group: 'knn'
      norm_args: bn
      conv_args:
        order: conv-norm-act
      layers: 4
    global_feat: max,cls
  cls_args: 
    NAME: ClsHead 
    num_classes: 15
    mlps: [256, 256]
    norm_args: 
      norm: 'bn1d'

pretrained_path: pretrained/imagenet/mae_s.pth
mode: finetune_encoder

# ---------------------------------------------------------------------------- #
# Dataset cfgs
# ---------------------------------------------------------------------------- #
# 1. we center the point cloud to eschew from learning translation invariant. 
# 2. we added rotation augmentation
datatransforms:
  train: [PointsToTensor, PointCloudScaling, PointCloudCenterAndNormalize, PointCloudRotation]
  vote: [PointCloudRotation]
  val: [PointsToTensor, PointCloudCenterAndNormalize]
  kwargs:
    scale: [0.9, 1.1]
    angle: [0.0, 1.0, 0.0]
    gravity_dim: 1
    normalize: False
# ---------------------------------------------------------------------------- #
# Training cfgs, same as PointMAE for fair comparasion 
# ---------------------------------------------------------------------------- #
# scheduler
criterion_args:
  NAME: CrossEntropy
  label_smoothing: 0.0

# Optimizer
lr: 5.0e-4 
optimizer:
 NAME: 'adamw'
 weight_decay: 0.05

sched: cosine
epochs: 300
t_max: 300
warmup_epochs: 10
min_lr: 1.0e-6
grad_norm_clip: 10

log_dir: log/scanobject/pix4point
wandb:
  project: pix4point-ScanObjectNN
