###################  modified params  ##########################
modified_dropout_pattern=<tuple_of_dropout_method>
modified_dropout_rate=<tuple_of_dropout_rate>
modified_aug_loss=<kl/js>
modified_aug_loss_weight=<weight_of_the_kl/js_loss>

GPU_ID=<GPU_ID>
project_root=<project_root>
python_path=<python_path>

###################  LoRA params  ##########################
task_type=SEQ_CLS    # Task type
inference_mode=False # Whether to use inference mode
r=8                  # Lora attention dimension
lora_alpha=16        # Lora alpha
lora_dropout=0.0     # Lora dropout

###################  Data  ##########################
task_name=mrpc
max_seq_length=512

###################  Model  ##########################
model_name_or_path=roberta-large

###################  Training params  ##########################
num_train_epochs=30
per_device_train_batch_size=8
per_device_eval_batch_size=8
gradient_accumulation_steps=4
learning_rate=3e-4
warmup_ratio=0.06
weight_decay=0.1
metric_for_best_model=accuracy
greater_is_better=True
disable_tqdm=True
run_name=glue.${TASK_NAME}

#######################  Run  ############################
export PYTHONPATH=${project_root}:$PYTHONPATH

seed_min=0
seed_max=4
for ((seed = seed_min; seed <= seed_max; seed++)); do
	TIME=$(date "+%Y%m%d-%H%M%S")
	infix=${TIME}_GPU_${GPU_ID}_sd_${seed}
	infix+=_dp_${modified_dropout_pattern}_${modified_dropout_rate}
	infix+=_ls_${modified_aug_loss}_${modified_aug_loss_weight}
	output_dir=checkpoints/glue_${task_name}/${infix}
	logging_dir=logs/glue_${task_name}_${infix}
	log_pth=${logging_dir}/log.txt

	cd ${project_root} || exit
	echo -e "Model Dir: ${output_dir} \nLog Dir: ${logging_dir}"
	mkdir -p ${output_dir} ${logging_dir}

	CUDA_VISIBLE_DEVICES=$GPU_ID \
		${python_path} -u exps/run_glue.py \
		--model_name_or_path=${model_name_or_path} \
		--task_name ${task_name} \
		--max_seq_length ${max_seq_length} \
		--seed ${seed} \
		--do_train \
		--do_eval \
		--disable_tqdm ${disable_tqdm} \
		--per_device_train_batch_size ${per_device_train_batch_size} \
		--per_device_eval_batch_size ${per_device_eval_batch_size} \
		--gradient_accumulation_steps ${gradient_accumulation_steps} \
		--learning_rate ${learning_rate} \
		--num_train_epochs ${num_train_epochs} \
		--weight_decay ${weight_decay} \
		--warmup_ratio ${warmup_ratio} \
		--logging_steps 10 \
		--save_total_limit 1 \
		--evaluation_strategy epoch \
		--save_strategy epoch \
		--load_best_model_at_end \
		--report_to "tensorboard" \
		--overwrite_output_dir \
		--metric_for_best_model ${metric_for_best_model} \
		--greater_is_better ${greater_is_better} \
		--run_name=${run_name} \
		--output_dir ${output_dir} \
		--logging_dir ${logging_dir} \
		--task_type ${task_type} \
		--inference_mode ${inference_mode} \
		--r ${r} \
		--lora_alpha ${lora_alpha} \
		--lora_dropout ${lora_dropout} \
		\
		--modified_dropout_pattern ${modified_dropout_pattern} \
		--modified_dropout_rate ${modified_dropout_rate} \
		--modified_aug_loss=${modified_aug_loss} \
		--modified_aug_loss_weight=${modified_aug_loss_weight} \
		2>&1 | tee -a ${log_pth}

done