export GLUE_DIR=/export/home/glue_data
export TASK_NAME=MNLI

CUDA_VISIBLE_DEVICES=0 python run_glue.py \
  --seed 100 \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 3.0 \
  --split_size 350000 --use_split train_val \
  --output_dir runs/bert-base/"$TASK_NAME"/temp_nn_on_reps_sgd_lr_0.01_onecycle_3epochs \
  --overwrite_output_dir &


CUDA_VISIBLE_DEVICES=1 python run_glue.py \
  --seed 100 \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
  --nll_weight 0.0 --ece_weight 10.0 --ece_num_partitions_train 10 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 3.0 \
  --split_size 350000 --use_split train_val \
  --output_dir runs/bert-base/"$TASK_NAME"/temp_nn_on_reps_eceloss_w_10.0_B_10_sgd_lr_0.01_onecycle_3epochs \
  --overwrite_output_dir