#!/usr/bin/env bash
data=$1
src=$2
tgt=$3
year=$4
dropout=$5
gen_subset=$6

lr=$7
freq=$8
seed=$9
aggre=${10}
interdim=${11}
bytenum=${12}


model=${data}_${src}_${tgt}_${year}_${dropout}_${lr}

if [[ ${data} == *"byte_embeddingless"* ]] ; then
    data="byte"
fi

tokenization="--${data}-tokens"
remove_bpe=""
if [[ ${data} == *"bpe"* ]] ; then
    data="moses-bpe"
    action="detok_moses"
    remove_bpe="--remove-bpe"
    tokenization=""
fi

dataset=${data}-iwslt${year}.${src}-${tgt}
bsz=32

custom_model=byte_aggregate

echo "tokenization is for generate.py is ${tokenization}"
echo "remove-bpe is for generate.py is ${remove_bpe}"
echo "dataset is ${dataset}"
echo "model is ${model}"
echo "gen_subset ${gen_subset}"

bash avg_checkpoints.sh $aggre/$bytenum/$interdim/$seed/$model 50k
checkpoint="avg"
gen_file="results/$aggre/$bytenum/$interdim/$seed/${model}/${gen_subset}/${checkpoint}.gen"
ref_file="results/$aggre/$bytenum/$interdim/$seed/${model}/${gen_subset}/${checkpoint}.ref"
hyp_file="results/$aggre/$bytenum/$interdim/$seed/${model}/${gen_subset}/${checkpoint}.hyp"
echo "gen_file ${gen_file}"
echo "ref_file ${ref_file}"
echo "hyp_file ${hyp_file}"

# mkdir results
# mkdir results/${model}
mkdir -p results/$aggre/$bytenum/$interdim/$seed/${model}/${gen_subset}

# path=/home/mengjiao/Documents/workspace/2023S
databin=data/data-bin
PARAM="$databin/${dataset} --path checkpoints/$aggre/$bytenum/$interdim/$seed/${model}/checkpoint_${checkpoint}.pt \
--batch-size ${bsz} --beam 5 ${tokenization} --gen-subset ${gen_subset} \
--user-dir $custom_model \
--sacrebleu ${remove_bpe} --max-len-a 0 --max-len-b 10000 --max-source-positions 10000 \
--max-target-positions 10000"
echo $PARAM

# PARAM="$databin/${dataset} --path checkpoints/${model}/checkpoint_${checkpoint}.pt \
# --beam 4 --lenpen 0.6 --remove-bpe"
# echo $PARAM

python fairseq/fairseq_cli/generate.py $PARAM > ${gen_file}

cat ${gen_file} | grep -P "^T" | sort -k 1,1 | cut -f 2 > ${ref_file}
cat ${gen_file} | grep -P "^H" | sort -k 1,1 | cut -f 3 > ${hyp_file}

if [[ ${action} == *"detok_moses"* ]] ; then
    echo "detokenizing moses.."
    moses_detokenizer="perl ../data/mosesdecoder/scripts/tokenizer/detokenizer.perl"
    $moses_detokenizer < ${ref_file} > ${ref_file}.detok ;
    mv ${ref_file}.detok ${ref_file}
    $moses_detokenizer < ${hyp_file} > ${hyp_file}.detok ;
    mv ${hyp_file}.detok ${hyp_file}
fi

# python generate.py pre-processed-data-dir --path model-save-dir/averaged.pt  
# --beam 4 --lenpen 0.6 --remove-bpe | grep '^H' | sed 's/^H\-//g' | sort -t ' ' -k1,1 -n | cut -f 3-