# 1. llama3.2-1b-generated 0.0616
# ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/bo64_t05_validation_scored.json

# 1.1. llama321b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --batch-size 8 \
  --model-tag reflogp
echo "1/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_dpo.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "2/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_implicitprm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "3/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_ipvrm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "4/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama321b_qrm.json \
  --batch-size 4 \
  --model-tag rmlogp \
  --model_type rm3
echo "5/80"


# 1.2. llama323b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "6/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "7/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "8/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "9/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/qrm/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_llama323b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "10/80"


# 1.3. qwen306b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "11/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "12/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "13/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "14/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen306b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "15/80"


# 1.4. qwen38b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --batch-size 1 \
  --model-tag reflogp
echo "16/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_dpo.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "17/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_implicitprm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "18/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_ipvrm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "19/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full/prime-rl-rollouts/842_bo64_qwen38b_qrm.json \
  --batch-size 1 \
  --model-tag rmlogp \
  --model_type rm3
echo "20/80"
















# 2. llama3.2-3b-generated 0.1817
# ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json


# 2.1. llama321b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --batch-size 8 \
  --model-tag reflogp
echo "21/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_dpo.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "22/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_implicitprm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "23/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ipvrm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "24/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama321b_qrm.json \
  --batch-size 4 \
  --model-tag rmlogp \
  --model_type rm3
echo "25/80"


# 2.2. llama323b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "26/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "27/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "28/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "29/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/qrm/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_llama323b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "30/80"


# 2.3. qwen306b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "31/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "32/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "33/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "34/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "35/80"


# 2.4. qwen38b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --batch-size 1 \
  --model-tag reflogp
echo "36/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_dpo.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "37/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_implicitprm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "38/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ipvrm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "39/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_qrm.json \
  --batch-size 1 \
  --model-tag rmlogp \
  --model_type rm3
echo "40/80"
















# qwen3-0.6B-generated 0.2229
# ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/bo64_t05_validation_scored.json

# 3.1. llama321b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --batch-size 8 \
  --model-tag reflogp
echo "41/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_dpo.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "42/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_implicitprm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "43/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ipvrm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "44/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_qrm.json \
  --batch-size 4 \
  --model-tag rmlogp \
  --model_type rm3
echo "45/80"


# 3.2. llama323b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "46/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "47/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "48/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "49/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/qrm/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "50/80"


# 3.3. qwen306b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "51/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "52/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "53/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "54/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "55/80"


# 3.4. qwen38b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/bo64_t05_validation_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --batch-size 1 \
  --model-tag reflogp
echo "56/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_dpo.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "57/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_implicitprm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "58/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ipvrm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "59/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_qrm.json \
  --batch-size 1 \
  --model-tag rmlogp \
  --model_type rm3
echo "60/80"
















# qwen3-8B-generated 0.4353
# ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json


# 4.1. llama321b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --batch-size 8 \
  --model-tag reflogp
echo "61/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_dpo.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "62/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_implicitprm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "63/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ipvrm.json \
  --batch-size 8 \
  --model-tag rmlogp
echo "64/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama321b_qrm.json \
  --batch-size 4 \
  --model-tag rmlogp \
  --model_type rm3
echo "65/80"


# 4.2. llama323b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "66/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "67/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "68/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "69/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/qrm/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_llama323b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "70/80"


# 4.3. qwen306b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --batch-size 4 \
  --model-tag reflogp
echo "71/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_dpo.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "72/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_implicitprm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "73/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/implicit-drm-beta10-gamma5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ipvrm.json \
  --batch-size 4 \
  --model-tag rmlogp
echo "74/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen306b_qrm.json \
  --batch-size 2 \
  --model-tag rmlogp \
  --model_type rm3
echo "75/80"


# 4.4. qwen38b
CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/bon_test_0_842_scored.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --batch-size 1 \
  --model-tag reflogp
echo "76/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/dpo_rm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_dpo.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "77/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-prm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft  \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_implicitprm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "78/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/implicit-drm-beta5-gamma2.5 \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ipvrm.json \
  --batch-size 1 \
  --model-tag rmlogp
echo "79/80"

CUDA_VISIBLE_DEVICES=0,1,2,3 \
~/verl_250713/.conda/bin/accelerate launch \
  --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
  --model-path      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/qrm \
  --tokenizer-path  ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft \
  --input-json      ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_ref.json \
  --output-path     ~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft/prime-rl-rollouts/842_bo64_qwen38b_qrm.json \
  --batch-size 1 \
  --model-tag rmlogp \
  --model_type rm3
echo "80/80"
