export NCCL_DEBUG=WARN
lora_r=64
lr=2e-4
lr_schedule=cosine
dataset=gsm8k
output_dir_base=./output/loqa
python_file=1123_training_gsm8k.py

model=Meta-Llama-3-8B-w4a16g32
output_dir=${output_dir_base}/${model}-${dataset}-lr-${lr}-${lr_schedule}-qbas


python -m accelerate.commands.launch --main_process_port 39979 \
    --config_file=1123_deepspeed_zero2_gsm8k.yaml \
    --num_processes 8  \
        $python_file \
    --seed 11 \
    --model_path /.../${model} \
    --output_dir $output_dir \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 4 \
    --learning_rate $lr \
    --dataset $dataset \
    --lr_scheduler_type $lr_schedule \
    --num_train_epochs 3 \
    --do_eval True \
    --max_eval_samples 1000 \
    --eval_steps 250 \
    --save_total_limit 8 \
    --lora_r $lora_r \
    ---save_strategy "epoch" \
    --weight_decay 0.1 \
    --warmup_ratio 0.03