#!/bin/bash

###################  Modified params  ##########################
modified_dropout_pattern=$1
modified_dropout_rate=$2
modified_aug_loss=$3
modified_aug_loss_weight=$4
GPU_ID=$5
project_root=<project_root>
python_path=<python_path>

###################  LoRA params  ##########################
task_type=SEQ_CLS    # Task type
inference_mode=False # Whether to use inference mode
r=8                  # Lora attention dimension
lora_alpha=16        # Lora alpha
lora_dropout=0.0     # Lora dropout

###################  Data  ##########################
task_name=rte
max_seq_length=512

###################  Model  ##########################
model_name_or_path=roberta-large

###################  Training params  ##########################
num_train_epochs=30
per_device_train_batch_size=16
per_device_eval_batch_size=16
gradient_accumulation_steps=4
learning_rate=4e-4
warmup_ratio=0.06
weight_decay=0.1
metric_for_best_model=accuracy
greater_is_better=True
disable_tqdm=True
run_name=glue.${TASK_NAME}

#######################  Run  ############################
export PYTHONPATH=${project_root}:$PYTHONPATH

seed_min=0
seed_max=4
for ((seed = seed_min; seed <= seed_max; seed++)); do
  TIME=$(date "+%Y%m%d-%H%M%S")
  infix=${TIME}_GPU_${GPU_ID}_sd_${seed}
  infix+=_dp_${modified_dropout_pattern}_${modified_dropout_rate}
  infix+=_ls_${modified_aug_loss}_${modified_aug_loss_weight}
  output_dir=checkpoints/glue_${task_name}/${infix}
  logging_dir=logs/glue_${task_name}_${infix}
  log_pth=${logging_dir}/log.txt

  cd ${project_root} || exit
  echo -e "Model Dir: ${output_dir} \nLog Dir: ${logging_dir}"
  mkdir -p ${output_dir} ${logging_dir}

  CUDA_VISIBLE_DEVICES=$GPU_ID \
    ${python_path} -u exps/run_glue.py \
    --model_name_or_path=${model_name_or_path} \
    --task_name ${task_name} \
    --max_seq_length ${max_seq_length} \
    --seed ${seed} \
    --do_train \
    --do_eval \
    --disable_tqdm ${disable_tqdm} \
    --per_device_train_batch_size ${per_device_train_batch_size} \
    --per_device_eval_batch_size ${per_device_eval_batch_size} \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${learning_rate} \
    --num_train_epochs ${num_train_epochs} \
    --weight_decay ${weight_decay} \
    --warmup_ratio ${warmup_ratio} \
    --logging_steps 10 \
    --save_total_limit 1 \
    --evaluation_strategy epoch \
    --save_strategy epoch \
    --load_best_model_at_end \
    --report_to "tensorboard" \
    --overwrite_output_dir \
    --metric_for_best_model ${metric_for_best_model} \
    --greater_is_better ${greater_is_better} \
    --run_name=${run_name} \
    --output_dir ${output_dir} \
    --logging_dir ${logging_dir} \
    --task_type ${task_type} \
    --inference_mode ${inference_mode} \
    --r ${r} \
    --lora_alpha ${lora_alpha} \
    --lora_dropout ${lora_dropout} \
    \
    --modified_dropout_pattern ${modified_dropout_pattern} \
    --modified_dropout_rate ${modified_dropout_rate} \
    --modified_aug_loss=${modified_aug_loss} \
    --modified_aug_loss_weight=${modified_aug_loss_weight} \
    2>&1 | tee -a ${log_pth}

done
