
default_yaml: ../finetune_3B.yaml

task:
  _name: vggsound
  data: ../../dataset/vggsound/vggsound_train_new.tsv
  valid_data: ../../dataset/vggsound/vggsound_test_new.tsv
  selected_cols: uniq_id,audio,text,duration

  max_duration: 15
  head_type: audio
  num_classes: 309

criterion:
  _name: classify_criterion

optimization:
  max_epoch: 10
  lr: [8e-5]
  update_freq: [8]

dataset:
  batch_size: 8

common:
  layer_decay: 0.95

checkpoint:
  best_checkpoint_metric: accuracy

model:
  _name: one_peace_classify
  head_scale_ratio: 2
  use_pooler: false
  pooler_dropout: 0.0
  attn_pooling: true
  encoder:
    drop_path_rate: 0.6