# evaluate phase 1

ckpt=$1  # update or epoch

src_lang=en
tgt_lang=de


path_to_data=
path_to_ckpt=

DATA_DIR=${path_to_data}/data-bin/wmt_spm_${src_lang}_spm_${tgt_lang} 

SAVE_DIR=${path_to_ckpt}/checkpoints/spm_${src_lang}_spm_${tgt_lang}_wmt_only_nmt_enc6

if [ "$ckpt" = "best" ]; then
  CHECKPOINT_FILENAME=checkpoint_${ckpt}.pt
else
  CHECKPOINT_FILENAME=avg_last_10_${ckpt}_checkpoint.pt
fi
model=${SAVE_DIR}/${CHECKPOINT_FILENAME}

if [ ! -f ${model} ];then
  python scripts/average_checkpoints.py --inputs ${SAVE_DIR} --num-${ckpt}-checkpoints 10 --output ${model}
fi

infer_results=${SAVE_DIR}/infer_results/${ckpt}

export CUDA_VISIBLE_DEVICES=0


for testset in tst-COMMON tst-HE
do
python fairseq_cli/generate.py \
  ${DATA_DIR} \
  --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \
  --gen-subset ${testset} \
  --remove-bpe sentencepiece --max-tokens 8192 \
  --beam 5 --lenpen 0.6 --max-len-a 1 --max-len-b 50 \
  --scoring sacrebleu \
  --results-path ${infer_results}
done
