
default_yaml: ../finetune_3B.yaml

task:
  _name: audio_text_retrieval
  data: ../../dataset/clotho/clotho_development.tsv
  valid_data: ../../dataset/clotho/clotho_validation_new.tsv
  selected_cols: uniq_id,audio,text,duration
  valid_file: ../../dataset/clotho/validation_texts.json

  max_duration: 20
  head_type: al

criterion:
  _name: audio_text_retrieval_criterion
  label_smoothing: 0.0

optimization:
  max_epoch: 10
  lr: [1.5e-4]
  update_freq: [1]
  skip_remainder_batch: false

dataset:
  batch_size: 2
  ensure_equal_batch: true

common:
  layer_decay: 0.95
  log_file: logs/base_clotho/train.log
  tensorboard_logdir: logs/base_clotho/tb

checkpoint:
  best_checkpoint_metric: txt_r1
  save_dir: logs/base_clotho/checkpoints/base_clotho

model:
  _name: one_peace_retrieval
  copy_rel_pos_table: true
  encoder:
    drop_path_rate: 0.9