task: qa

dataset_conf:
  max_len: 5
  sample_rate: 44100
  batch_size: 1
  num_workers: 20
  max_train_samples: 100000
  max_eval_samples: 5000

encoder_conf:
  encoder_strategy: "lora"

decoder_conf:
  decoder_strategy: "lora"

optim_args:
  lr: !!float 5e-5
  name: "adamw_torch"
  weight_decay: !!float 1e-6

epochs: 100
warmup_radio: 0.1
acc_grad: 2
clip_grad: 1

teacher_forcing_ratio: 0.0
