CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=8821 --nproc_per_node=2 \
examples/text-classification/run_glue.py \
--model_name_or_path pretrained_model_path \
--task_name qnli \
--apply_bayes --apply_lora --lora_type bayes \
--lora_r 8  \
--lora_module query,key,value,intermediate,layer.output,attention.output \
--lora_alpha 16 \
--noise_rate 0.5 --noise_type Bernoulli --sample_num 4 \
--do_train --do_eval \
--max_seq_length 512 \
--per_device_train_batch_size 16 --learning_rate 5e-4  \
--num_train_epochs 5 --warmup_steps 500 \
--cls_dropout 0.1 --weight_decay 0.01 \
--evaluation_strategy steps --eval_steps 1000 \
--save_strategy steps --save_steps 16000 \
--logging_steps 300 \
--seed 6 \
--root_output_dir outputdir \
--overwrite_output_dir
