#!/bin/bash

project_root=<project_root>
python_path=<python_path>

cd ${project_root}/exps/NLG || exit
export PYTHONPATH=${project_root}:$PYTHONPATH
export PYTHONPATH=${project_root}/exps/NLG:$PYTHONPATH

GPU_ID=$1
port=$2
args=("${@:3}")
for run_name in "${args[@]}"; do
  echo "Run name: ${run_name}"

  # 2. Generate outputs from the trained model using beam search:
  logging_dir=./logs/${run_name}
  log_pth=${logging_dir}/log.txt
  output_idx_file=${logging_dir}/predict.26290.jsonl
  output_dir=./checkpoints/${run_name}

  echo "$(date "+%Y%m%d-%H%M%S"): Start generating outputs from the trained model using beam search..." >>${log_pth}
  CUDA_VISIBLE_DEVICES=${GPU_ID} \
    ${python_path} -m torch.distributed.launch --nproc_per_node=1 --master_port ${port} src/gpt2_beam.py \
    --data ./data/e2e/test.jsonl \
    --batch_size 1 \
    --seq_len 512 \
    --eval_len 64 \
    --model_card gpt2.md \
    --init_checkpoint ${output_dir}/model.26290.pt \
    --platform local \
    --lora_dim 4 \
    --lora_alpha 32 \
    --beam 10 \
    --length_penalty 0.9 \
    --no_repeat_ngram_size 4 \
    --repetition_penalty 1.0 \
    --eos_token_id 628 \
    --work_dir ${output_dir} \
    --output_file ./../../${output_idx_file} |
    tee -a ${log_pth}
  echo "$(date "+%Y%m%d-%H%M%S"): Finish generating outputs from the trained model using beam search..." >>${log_pth}

  # 3. Decode outputs from step (2)
  output_ref_file=${logging_dir}/e2e_ref.txt
  output_pred_file=${logging_dir}/e2e_pred.txt

  echo "$(date "+%Y%m%d-%H%M%S"): Start decoding outputs from step (2)..." >>${log_pth}
  CUDA_VISIBLE_DEVICES=$GPU_ID \
    ${python_path} src/gpt2_decode.py \
    --vocab ./vocab \
    --sample_file ${output_idx_file} \
    --input_file ./data/e2e/test_formatted.jsonl \
    --output_ref_file ${output_ref_file} \
    --output_pred_file ${output_pred_file} |
    tee -a ${log_pth}
  echo "$(date "+%Y%m%d-%H%M%S"): Finish decoding outputs from step (2)..." >>${log_pth}

  # 4. Run evaluation on E2E test set
  echo "$(date "+%Y%m%d-%H%M%S"): Start running evaluation on E2E test set..." >>${log_pth}
  CUDA_VISIBLE_DEVICES=$GPU_ID \
    ${python_path} eval/e2e/measure_scores.py \
    ${output_ref_file} \
    ${output_pred_file} \
    -p |
    tee -a ${log_pth}
  echo "$(date "+%Y%m%d-%H%M%S"): Finish running evaluation on E2E test set..." >>${log_pth}

done
