export GLUE_DIR=/export/home/glue_data
export TASK_NAME=MNLI
for DATASEED in 100 200 300 400
do
CUDA_VISIBLE_DEVICES=0 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model const --min_temp 0.2 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/temp_const_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=1 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model const --min_temp 0.2 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/temp_const_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=2 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/temp_nn_on_reps_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &


CUDA_VISIBLE_DEVICES=3 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/temp_nn_on_reps_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=4 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 1.5 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/calib_nn_on_reps_neg_slope_1.5_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=5 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 1.5 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/calib_nn_on_reps_neg_slope_1.5_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=6 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --temp_model_on_bert_layer 10 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 1.5 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/calib_nn_on_reps_layer10_neg_slope_1.5_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=7 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --temp_model_on_bert_layer 10 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 1.5 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 6.0 \
  --split_size 350000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/calib_nn_on_reps_layer10_neg_slope_1.5_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_6epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir
done