OUTPUT_PATH="output/llama3-mog"
DEVICE=0

deepspeed --master_port=16960 --include=localhost:$DEVICE finetune.py \
    --deepspeed configs/ds_config_zero2_no_offload.json \
    --base_model 'meta-llama/Meta-Llama-3.1-8B-Instruct' \
    --data_path 'commonsense_170k.json' \
    --output_dir $OUTPUT_PATH \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --num_epochs 3 \
    --learning_rate 1e-4 \
    --cutoff_len 256 \
    --val_set_size 120 \
    --logging_steps 50 \
    --eval_step 500 \
    --save_step 500 \
    --target_modules '["q_proj", "k_proj", "v_proj", "up_proj", "down_proj"]' \
    --group_size 8 \
    --group_r 220 \
    --group_alpha 80 \
    --load_best_model_at_end True \
