export GLUE_DIR=/export/home/glue_data
export TASK_NAME=MNLI

CUDA_VISIBLE_DEVICES=0 python run_glue.py \
  --seed 100 \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 2.0 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 3.0 \
  --split_size 350000 --use_split train_val \
  --output_dir runs/bert-base/"$TASK_NAME"/calib_nn_on_reps_neg_slope_2.0_sgd_lr_0.01_onecycle_3epochs \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=1 python run_glue.py \
  --seed 100 \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 2.0 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 3.0 \
  --split_size 350000 --use_split train_val \
  --output_dir runs/bert-base/"$TASK_NAME"/calib_nn_on_reps_neg_slope_2.0_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_3epochs \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=2 python run_glue.py \
  --seed 100 \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 1.5 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 3.0 \
  --split_size 350000 --use_split train_val \
  --output_dir runs/bert-base/"$TASK_NAME"/calib_nn_on_reps_neg_slope_1.5_sgd_lr_0.01_onecycle_3epochs \
  --overwrite_output_dir &

CUDA_VISIBLE_DEVICES=3 python run_glue.py \
  --seed 100 \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_calib --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --num_temps 5 --temp_init_increment 0.5 --neg_slope 1.5 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 3.0 \
  --split_size 350000 --use_split train_val \
  --output_dir runs/bert-base/"$TASK_NAME"/calib_nn_on_reps_neg_slope_1.5_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_3epochs \
  --overwrite_output_dir

