export GLUE_DIR=/export/home/glue_data
export TASK_NAME=MNLI
DATASEED=100
CUDA_VISIBLE_DEVICES=0 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_train --do_predict \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --use_temp --temp_model const --min_temp 0.2 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 1.0 \
  --split_size 390000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/temp_const_sgd_lr_0.01_onecycle_3epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir

export GLUE_DIR=/export/home/glue_data
export TASK_NAME=MNLI
DATASEED=100
CUDA_VISIBLE_DEVICES=1 python run_glue.py \
  --seed "$DATASEED" \
  --model_name_or_path runs/bert-base/"$TASK_NAME"/seed_1_split \
  --task_name $TASK_NAME \
  --do_eval \
  --data_dir $GLUE_DIR/$TASK_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
  --learning_rate 0.01 --weight_decay 0.0 --optimizer sgd --scheduler onecycle \
  --num_train_epochs 1.0 \
  --split_size 390000 --use_split train_val \
  --save_steps 10000000 \
  --output_dir runs/bert-base/"$TASK_NAME"/final/temp_const_sgd_lr_0.01_onecycle_3epochs/dataseed_"$DATASEED" \
  --overwrite_output_dir