#!/bin/bash

accelerate launch \
     --config_file accelerate_configs/deepspeed_zero3.yaml \
     --num_processes 4 \
     --num_machines 1 \
     main.py \
     --model-path "../modelscope" \
     --policy-model "Qwen/Qwen3-1.7B" \
     --output-dir "../ckpts" \
     --dataset-path "../datasets" \
     --train-dataset-file "ours/math/train/deepmath_hard_train.jsonl" \
     --eval-dataset-files "ours/math/test/aime24.jsonl,ours/math/test/aime25.jsonl,ours/math/test/amc23.jsonl,ours/math/test/minerva.jsonl" \
     --eval-dataset-split "train" \
     --eval-strategy "steps" \
     --eval-steps 50 \
     --save-strategy "steps" \
     --save-steps 50 \
     --dataset-split "train" \
     --num-train-epochs 5 \
     --warmup-ratio 0.0 \
     --learning-rate 1e-6 \
     --lr-scheduler-type "constant" \
     --per-device-eval-batch-size 4 \
     --per-device-train-batch-size 4 \
     --gradient-accumulation-steps 1 \
     --generation-batch-size 64 \
     --num-iterations 1 \
     --max-prompt-length 1024 \
     --max-completion-length 8192 \
     --num-generations 8 \
     --mask-truncated-completions \
     --gradient-checkpointing \
     --bf16 \
     --bf16-full-eval \
     --use-liger-kernel \
     --torch-compile \
     --use-vllm \
     --vllm-mode "colocate" \
     --vllm-gpu-memory-utilization 0.2 \
     --vllm-tensor-parallel-size 1 \
     --temperature 1.0 \
     --top-p 1.0 \
     --top-k -1 \
     --reward-weights 1.0 1.0 1.0 1.0 \
     --prob-reward-weight 1.0 \
     --min-prob-reward-ratio 1.0 \
     --max-prob-reward-ratio 1000.0 \
     --prob-model "self" \
     --format-wrong-reward -1.0 \
     --epsilon 0.2 \
     --epsilon-high 0.28 \
     --importance-sampling-level "token" \
     --beta 0.0 \
     --kl-estimator "k3" \
     --min-r 1e-3 \
     --max-r 1000.0 \
     --top-entropy-quantile 1.0 \
     --z-kl-beta 1.0 \
     --z-kl-constraint-coef 0.5 \
     --z-kl-learning-coef 0.5 \
     --p-grpo-loss-coef 1.0 \
     --q-grpo-loss-coef 1.0 \
     --sft-beta 0.0 \
     --logging-steps 1 \
     --logging-strategy "steps" \
     --log-completions \
     --num-completions-to-print 1 \
     "$@"

