export num_gpus=1
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export CUDA_VISIBLE_DEVICES=0
python examples/text-classification/run_glue_bilevel.py \
--model_name_or_path roberta-base \
--task_name rte \
--model_init_checkpoint checkpoint/roberta-base/mnli/model.9800.pt \
--arch_init_checkpoint checkpoint/roberta-base/mnli/arch.9800.pt \
--max_seq_length 512 \
--train_batch_size 32 \
--eval_batch_size 128 \
--weight_decay 0.15 \
--lr 4e-6 \
--arch_lr 2e-6 \
--weight_decay 0.12 \
--arch_weight_decay 0.1 \
--apply_lora \
--lora_r 16 \
--lora_alpha 8 \
--seed 0 \
--valid_step 10 \
--unroll_step 1 \
--warmup_step 90 \
--max_step 1500 \
--train_iters 1500 \
