LANG=de
MUSTC_ROOT=/path/to/datasets/MuST-C/fairest/en-${LANG}
SAVE_ROOT=/path/to/fairest/save_models
SAVE_DIR=${SAVE_ROOT}/${LANG}/pretrain_new4
pretrain_wav2vec=/path/to/fairest/pretrain_models

fairseq-train ${MUSTC_ROOT} \
    --config-yaml config_wave.yaml \
    --train-subset train_wave_joint \
    --valid-subset dev_wave_joint \
    --save-dir  ${SAVE_DIR}\
    --max-tokens 1200000  \
    --update-freq 8 \
    --max-update 3200000 \
    --task speech_to_text_wav2vec \
    --criterion label_smoothed_cross_entropy \
    --report-accuracy \
    --arch convtransformer_espnet_wav2vec \
    --w2v2-model-path ${pretrain_wav2vec}/wav2vec_small.pt \
    --optimizer adam \
    --lr 0.0001 \
    --lr-scheduler inverse_sqrt \
    --warmup-updates 25000 \
    --clip-norm 10.0 \
    --seed 1 \
    --ddp-backend=no_c10d \
    --keep-best-checkpoints 10 \
    --best-checkpoint-metric accuracy \
    --maximize-best-checkpoint-metric \
    --patience 15 \
    --max-source-positions 1000000 \
    --skip-invalid-size-inputs-valid-test \
    --dropout 0.0 --activation-dropout 0.1 --attention-dropout 0.1 \
    --encoder-layers 8 \
    --empty-cache-freq 100 \
    --ignore-prefix-size 1 \
    --fp16 \
    --tensorboard-logdir ${SAVE_DIR}/tensorboard_logs