# train 4-bit 64-rank llama-2-7b with LoftQ on GSM8K using one A100
CUDA_VISIBLE_DEVICES=7 python train_adanf_gsm8k.py \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --learning_rate 4e-4 \
  --seed 11 \
  --expt_name gsm8k_llama2_7b_2bit_64rank_qlora_7 \
  --output_dir exp_results/ \
  --num_train_epochs 4 \
  --per_device_train_batch_size 1 \
  --gradient_accumulation_steps 8 \
  --save_strategy "steps" \
  --save_steps 12000 \
  --save_total_limit 1 \
  --load_best_model_at_end False \
  --warmup_steps 120 \
  --logging_steps 10 \
  --evaluation_strategy "no" \
  --eval_steps 50 \
  --do_train \
  --report_to wandb \
  --run_name "qlora_gsm8k_2bit_lr_4e-4" \
  --full_precision True > llama2_test_results/output_qlora_gsm8k_2bit_lr_4e-4_gs_64_adaptive_nf_round_nearest_gridnum_10_gridstart_0.9_gridend_1.0_l3norm_epoch_4_hp_wiki.txt