NCCL_P2P_DISABLE=1 TORCH_NCCL_BLOCKING_WAIT=1 NCCL_TIMEOUT=7200000 accelerate launch --config_file deepspeed_zero3.yaml \
    --mixed_precision=bf16 \
    --num_cpu_threads_per_process=4 \
    A11yn_train.py \
    --model_name_or_path Qwen/Qwen2.5-Coder-7B-Instruct \
    --dataset_name data/UIReq6.8K/uireq6800.json \
    --learning_rate 5e-5 \
    --weight_decay 0.1 \
    --adam_beta1 0.9 \
    --adam_beta2 0.99 \
    --lr_scheduler_type cosine \
    --adam_epsilon 1e-08 \
    --warmup_ratio 0.01 \
    --output_dir outputs \
    --beta 0.001 \
    --per_device_train_batch_size 2 \
    --per_device_eval_batch_size 3 \
    --gradient_accumulation_steps 6 \
    --num_train_epochs 2 \
    --num_generations 6 \
    --max_prompt_length 1024 \
    --max_completion_length 3072 \
    --gradient_checkpointing \
    --logging_strategy steps \
    --bf16 True \
    --bf16_full_eval True \
    --eval_strategy steps \
    --save_strategy steps \
    --logging_steps 1 \
    --eval_steps 25 \
    --save_steps 25 \
    --load_best_model_at_end True \
    --save_total_limit 15 \
    --report_to wandb \
    --use_vllm \
    --vllm_mode colocate \
    --vllm-gpu-memory-utilization 0.6 \
    --log_completions True \
    --use_peft \
    --loss_type "grpo" \

