set -x

# config for 2 A100 80G
export CUDA_VISIBLE_DEVICES=0,1

src_lang=en
tgt_lang=de

path_to_data=
path_to_ckpt=

DATA_DIR=${path_to_data}/{src_lang}-${tgt_lang}
parallel_text_data=${path_to_data}/data-bin/wmt_spm_${src_lang}_spm_${tgt_lang}

adapter=e2e

PRETRAIN_DIR=${path_to_ckpt}/checkpoints/dcm_zs_${src_lang}_asr_for_${tgt_lang}_${adapter}_shrink_ot10
pretrained_model=${PRETRAIN_DIR}/avg_last_10_epoch_checkpoint.pt

save_dir=${PRETRAIN_DIR}_jst_ot10

mkdir -p ${save_dir}

python train.py ${DATA_DIR} \
    --save-dir ${save_dir} \
    --config-yaml config_text.yaml \
    --train-subset train_st_w_src --valid-subset dev_st_w_src \
    --num-workers 8 \
    --task dcm \
    --arch s2t_dcm --share-decoder-input-output-embed \
    --user-dir examples/dcm \
    --max-epoch 100 --update-mix-data \
    --optimizer adam --lr-scheduler inverse_sqrt \
    --lr 0.001 --adam-betas '(0.9,0.98)' --update-freq 4 --clip-norm 10.0 \
    --criterion guided_label_smoothed_cross_entropy_with_ctc \
    --ctc-weight 0.3 --zero-infinity \
    --guide-alpha 0.8 --disable-text-guide-update-num 5000 \
    --label-smoothing 0.1 --max-tokens 40000 --max-sentences 450 --max-tokens-text 20000 \
    --max-positions-text 400 --seed 2 \
    --encoder-layers 12 --text-encoder-layers 6 --decoder-layers 6 \
    --dropout 0.15 --warmup-updates 20000 \
    --text-sample-ratio 0.25 --parallel-text-data ${parallel_text_data} \
    --text-input-cost-ratio 0.5 --enc-grad-mult 1.0 \
    --langpairs ${src_lang}-${tgt_lang} --noise-token '"'"'▁NOISE'"'"' \
    --mask-text-ratio 0.0 --max-tokens-valid 20000 --ddp-backend no_c10d \
    --log-format json --log-interval 100 --data-buffer-size 50 \
    --eval-bleu --eval-bleu-args '{"beam": 5, "lenpen": 0.6, "max_len_a": 1, "max_len_b": 50}' \
    --eval-bleu-detok moses --eval-bleu-remove-bpe sentencepiece \
    --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \
    --save-interval-updates 1000 --keep-interval-updates 10 --keep-last-epochs 10 \
    --finetune-from-model ${pretrained_model} \
    --shrink-ctc --adapter ${adapter} --ot-weight 10. > ${save_dir}/dcm.log 2>&1

