#!/bin/bash
# set -x
export PYTHONPATH=.
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export TRANSFORMERS_CACHE=/root/.cache/huggingface

port=$(shuf -i25000-30000 -n1)

log_dir="logs_and_outputs/order_1/logs"
mkdir -p $log_dir
log_file="${log_dir}/train.log"

task=dbpedia
export CURRENT_TASK_NAME=${task}
export DONE_TASKS_NAME=""
done_task_lora_path=None
CUDA_VISIBLE_DEVICES=0,1 deepspeed --master_port $port src/run_adapter.py \
   --do_train \
   --do_predict \
   --task_name $task \
   --predict_with_generate \
   --model_name_or_path initial_model/t5-large \
   --done_task_lora_path $done_task_lora_path \
   --data_dir CL_Benchmark \
   --task_config_dir configs/order1_configs/dbpedia \
   --instruction_file configs/instruction_config.json \
   --instruction_strategy single \
   --output_dir logs_and_outputs/order_1/outputs/1-dbpedia \
   --per_device_train_batch_size 16 \
   --per_device_eval_batch_size 128 \
   --gradient_accumulation_steps 1 \
   --learning_rate 5e-3 \
   --num_train_epochs 1 \
   --deepspeed configs/ds_configs/stage2.config \
   --run_name order1_round1 \
   --max_source_length 512 \
   --max_target_length 50 \
   --generation_max_length 50 \
   --add_task_name True \
   --add_dataset_name True \
   --overwrite_output_dir \
   --overwrite_cache \
   --lr_scheduler_type constant \
   --warmup_steps 0 \
   --logging_strategy steps \
   --logging_steps 10 \
   --evaluation_strategy no \
   --save_strategy no \
   --save_steps 1500 \
   --lamda_1 2 \
   --lamda_2 0 \
   --threshold 1e-3 \
   --bottleneck_dim 16 > $log_file 2>&1

sleep 5

task=amazon
export CURRENT_TASK_NAME=${task}
export DONE_TASKS_NAME="dbpedia"
done_task_lora_path="logs_and_outputs/order_1/outputs/1-dbpedia"
CUDA_VISIBLE_DEVICES=0,1 deepspeed --master_port $port src/run_adapter.py \
   --do_train \
   --do_predict \
   --task_name $task \
   --predict_with_generate \
   --model_name_or_path initial_model/t5-large \
   --done_task_lora_path $done_task_lora_path \
   --data_dir CL_Benchmark \
   --task_config_dir configs/order1_configs/amazon \
   --instruction_file configs/instruction_config.json \
   --instruction_strategy single \
   --output_dir logs_and_outputs/order_1/outputs/2-amazon \
   --per_device_train_batch_size 16 \
   --per_device_eval_batch_size 128 \
   --gradient_accumulation_steps 1 \
   --learning_rate 5e-3 \
   --num_train_epochs 1 \
   --deepspeed configs/ds_configs/stage2.config \
   --run_name order1_round2 \
   --max_source_length 512 \
   --max_target_length 50 \
   --generation_max_length 50 \
   --add_task_name True \
   --add_dataset_name True \
   --overwrite_output_dir \
   --overwrite_cache \
   --lr_scheduler_type constant \
   --warmup_steps 0 \
   --logging_strategy steps \
   --logging_steps 10 \
   --evaluation_strategy no \
   --save_strategy no \
   --save_steps 1500 \
   --lamda_1 2 \
   --lamda_2 0 \
   --threshold 1e-3 \
   --bottleneck_dim 16 >> $log_file 2>&1

sleep 5

task=yahoo
export CURRENT_TASK_NAME=${task}
export DONE_TASKS_NAME="dbpedia,amazon"
done_task_lora_path="logs_and_outputs/order_1/outputs/2-amazon"
CUDA_VISIBLE_DEVICES=0,1 deepspeed --master_port $port src/run_adapter.py \
   --do_train \
   --do_predict \
   --task_name $task \
   --predict_with_generate \
   --model_name_or_path initial_model/t5-large \
   --done_task_lora_path $done_task_lora_path \
   --data_dir CL_Benchmark \
   --task_config_dir configs/order1_configs/yahoo \
   --instruction_file configs/instruction_config.json \
   --instruction_strategy single \
   --output_dir logs_and_outputs/order_1/outputs/3-yahoo \
   --per_device_train_batch_size 16 \
   --per_device_eval_batch_size 128 \
   --gradient_accumulation_steps 1 \
   --learning_rate 5e-3 \
   --num_train_epochs 1 \
   --deepspeed configs/ds_configs/stage2.config \
   --run_name order1_round3 \
   --max_source_length 512 \
   --max_target_length 50 \
   --generation_max_length 50 \
   --add_task_name True \
   --add_dataset_name True \
   --overwrite_output_dir \
   --overwrite_cache \
   --lr_scheduler_type constant \
   --warmup_steps 0 \
   --logging_strategy steps \
   --logging_steps 10 \
   --evaluation_strategy no \
   --save_strategy no \
   --save_steps 1500 \
   --lamda_1 2 \
   --lamda_2 0 \
   --threshold 1e-3 \
   --bottleneck_dim 16 >> $log_file 2>&1

sleep 5

task=agnews
export CURRENT_TASK_NAME=${task}
export DONE_TASKS_NAME="dbpedia,amazon,yahoo"
done_task_lora_path="logs_and_outputs/order_1/outputs/3-yahoo"
CUDA_VISIBLE_DEVICES=0,1 deepspeed --master_port $port src/run_adapter.py \
   --do_train \
   --do_predict \
   --task_name $task \
   --predict_with_generate \
   --model_name_or_path initial_model/t5-large \
   --done_task_lora_path $done_task_lora_path \
   --data_dir CL_Benchmark \
   --task_config_dir configs/order1_configs/agnews \
   --instruction_file configs/instruction_config.json \
   --instruction_strategy single \
   --output_dir logs_and_outputs/order_1/outputs/4-agnews \
   --per_device_train_batch_size 16 \
   --per_device_eval_batch_size 128 \
   --gradient_accumulation_steps 1 \
   --learning_rate 5e-3 \
   --num_train_epochs 1 \
   --deepspeed configs/ds_configs/stage2.config \
   --run_name order1_round4 \
   --max_source_length 512 \
   --max_target_length 50 \
   --generation_max_length 50 \
   --add_task_name True \
   --add_dataset_name True \
   --overwrite_output_dir \
   --overwrite_cache \
   --lr_scheduler_type constant \
   --warmup_steps 0 \
   --logging_strategy steps \
   --logging_steps 10 \
   --evaluation_strategy no \
   --save_strategy no \
   --save_steps 1500 \
   --lamda_1 2 \
   --lamda_2 0 \
   --threshold 1e-3 \
   --bottleneck_dim 16 >> $log_file 2>&1 

