OUTPUT_DIR='./sst2_rank_1_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12350 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 1 \
    --evaluation_strategy steps > 1002_sst2_low_rank_1_sparse_8.out


OUTPUT_DIR='./sst2_rank_2_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12350 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 2 \
    --evaluation_strategy steps > 1002_sst2_low_rank_2_sparse_8.out

OUTPUT_DIR='./sst2_rank_4_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12350 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 4 \
    --evaluation_strategy steps > 1002_sst2_low_rank_4_sparse_8.out

OUTPUT_DIR='./sst2_rank_8_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12350 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 8 \
    --evaluation_strategy steps > 1002_sst2_low_rank_8_sparse_8.out

OUTPUT_DIR='./sst2_rank_16_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12350 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 16 \
    --evaluation_strategy steps > 1002_sst2_low_rank_16_sparse_8.out


OUTPUT_DIR='./sst2_rank_32_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=4,5,6,7 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12352 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 32 \
    --evaluation_strategy steps > 1002_sst2_low_rank_32_sparse_8.out

OUTPUT_DIR='./sst2_rank_64_s1_8'
num_gpus=4
NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=12350 non-GPT-2/examples/pytorch/text-classification/run_glue.py \
    --save_total_limit 10 \
    --model_name_or_path bert-base-uncased \
    --task_name sst2 \
    --output_dir ${OUTPUT_DIR} \
    --do_train \
    --do_eval \
    --num_train_epochs 3 \
    --save_steps 100 \
    --seed 1 \
    --per_device_train_batch_size 8 \
    --max_seq_length 128 \
    --per_device_eval_batch_size 8 \
    --overwrite_output_dir \
    --logging_steps 100 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_accuracy \
    --apply_lora \
    --apply_sparse \
    --num_sparse 8 \
    --learning_rate 2e-4 \
    --lora_r 64 \
    --evaluation_strategy steps > 1002_sst2_low_rank_64_sparse_8.out
