
TASK_NAME=$1
EXP_NAME=$2
BS=$3
LR=$4
WARMUP=$5
MAX_STEPS=$6
MAX_LENGTH=$7
ENABLE=$8
PB=$9
GB=$10
MB=$11
SQMB=$12
TRUNCATED_MODE=$13
TRUNCATED_FACTOR=$14
TRUNCATED_GLOBAL_FACTOR=$15
SCALE_TYPE=$16
ROUND_TYPE=$17
Q_ORACLE=$18
MODE=$19

DIST="--mixed_precision=fp16 --num_processes=1 --main_process_port=$MAIN_PORT"

accelerate launch $DIST \
  run_glue_no_trainer.py \
  --model_name_or_path roberta-large \
  --task_name $TASK_NAME \
  --max_length $MAX_LENGTH \
  --per_device_train_batch_size $BS \
  --weight_decay 0.1 \
  --adam_beta1 0.9 \
  --adam_beta2 0.98 \
  --adam_epsilon 1e-6 \
  --learning_rate $LR \
  --lr_scheduler_type polynomial \
  --num_warmup_steps $WARMUP \
  --num_train_epochs 10 \
  --max_train_steps $MAX_STEPS \
  --output_dir $EXP_NAME \
  --with_tracking \
  --checkpointing_steps epoch \
  --lpmm-enable $ENABLE \
  --pb $PB \
  --mb $MB \
  --sqmb $SQMB \
  --q_oracle $Q_ORACLE \
  --scale_type $SCALE_TYPE \
  --round_type $ROUND_TYPE \
  --optim_mode $MODE