ACCELERATE_LOG_LEVEL=info accelerate launch \
--config_file ./zero3.yaml \
--main_process_port 29600 \
train.py \
    --model_path deepseek-ai/deepseek-math-7b-rl \
    --dataset_path /path/to/gsm8k_math_15k \
    --prompt_type deepseek-math \
    --num_train_epochs 1 \
    --gradient_checkpointing false \
    --max_length 256 \
    --output_dir models/Deepseek-Math-7B-QFT \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 4 \
