#!/bin/bash

# --- 核心逻辑函数 ---
# 参数说明: 1:DEVICES, 2:MODEL_PATH, 3:TOKEN_PATH, 4:INPUT, 5:OUTPUT, 6:BS, 7:TAG, 8:PROGRESS, 9:EXTRA_ARGS
run_task() {
    local devices=$1
    local model_path=$2
    local token_path=$3
    local input_json=$4
    local output_path=$5
    local batch_size=$6
    local model_tag=$7
    local progress=$8
    local extra_args=$9

    if [ -f "$output_path" ]; then
        echo "跳过: $output_path 已存在。"
    else
        CUDA_VISIBLE_DEVICES=$devices \
        ~/verl_250713/.conda/bin/accelerate launch \
          --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
          --model-path      "$model_path" \
          --tokenizer-path  "$token_path" \
          --input-json      "$input_json" \
          --output-path     "$output_path" \
          --batch-size      "$batch_size" \
          --model-tag       "$model_tag" \
          $extra_args
    fi
    echo "$progress"
}

# --- 路径定义 ---
SFT_L1B="~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full"
SFT_L3B="~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft"
SFT_Q06B="~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft"
SFT_Q8B="~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft"

# ==============================================================================
# 1. llama3.2-1b-generated (Source)
# ==============================================================================
RO_1B="$SFT_L1B/prime-rl-rollouts"
IN_1B="$RO_1B/bo64_t05_validation_scored.json"

# 1.1 llama321b
run_task "0,1,2,3" "$SFT_L1B" "$SFT_L1B" "$IN_1B" "$RO_1B/842_bo64_llama321b_ref.json" 8 "reflogp" "1/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/dpo_rm" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_dpo.json" 8 "rmlogp" "2/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-prm" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_implicitprm.json" 8 "rmlogp" "3/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-drm-beta10-gamma5" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_ipvrm.json" 8 "rmlogp" "4/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/qrm" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm.json" 4 "rmlogp" "5/80" "--model_type rm3"

# 1.2 llama323b
run_task "0,1,2,3" "$SFT_L3B" "$SFT_L3B" "$IN_1B" "$RO_1B/842_bo64_llama323b_ref.json" 4 "reflogp" "6/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/dpo_rm" "$SFT_L3B" "$RO_1B/842_bo64_llama323b_ref.json" "$RO_1B/842_bo64_llama323b_dpo.json" 4 "rmlogp" "7/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-prm" "$SFT_L3B" "$RO_1B/842_bo64_llama323b_ref.json" "$RO_1B/842_bo64_llama323b_implicitprm.json" 4 "rmlogp" "8/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_L3B" "$RO_1B/842_bo64_llama323b_ref.json" "$RO_1B/842_bo64_llama323b_ipvrm.json" 4 "rmlogp" "9/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/qrm/qrm" "$SFT_L3B" "$RO_1B/842_bo64_llama323b_ref.json" "$RO_1B/842_bo64_llama323b_qrm.json" 2 "rmlogp" "10/80" "--model_type rm3"

# 1.3 qwen306b
run_task "0,1,2,3" "$SFT_Q06B" "$SFT_Q06B" "$IN_1B" "$RO_1B/842_bo64_qwen306b_ref.json" 4 "reflogp" "11/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/dpo_rm" "$SFT_Q06B" "$RO_1B/842_bo64_qwen306b_ref.json" "$RO_1B/842_bo64_qwen306b_dpo.json" 4 "rmlogp" "12/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-prm" "$SFT_Q06B" "$RO_1B/842_bo64_qwen306b_ref.json" "$RO_1B/842_bo64_qwen306b_implicitprm.json" 4 "rmlogp" "13/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-drm-beta10-gamma5" "$SFT_Q06B" "$RO_1B/842_bo64_qwen306b_ref.json" "$RO_1B/842_bo64_qwen306b_ipvrm.json" 4 "rmlogp" "14/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/qrm" "$SFT_Q06B" "$RO_1B/842_bo64_qwen306b_ref.json" "$RO_1B/842_bo64_qwen306b_qrm.json" 2 "rmlogp" "15/80" "--model_type rm3"

# 1.4 qwen38b
run_task "0,1,2,3" "$SFT_Q8B" "$SFT_Q8B" "$IN_1B" "$RO_1B/842_bo64_qwen38b_ref.json" 1 "reflogp" "16/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/dpo_rm" "$SFT_Q8B" "$RO_1B/842_bo64_qwen38b_ref.json" "$RO_1B/842_bo64_qwen38b_dpo.json" 1 "rmlogp" "17/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-prm" "$SFT_Q8B" "$RO_1B/842_bo64_qwen38b_ref.json" "$RO_1B/842_bo64_qwen38b_implicitprm.json" 1 "rmlogp" "18/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_Q8B" "$RO_1B/842_bo64_qwen38b_ref.json" "$RO_1B/842_bo64_qwen38b_ipvrm.json" 1 "rmlogp" "19/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/qrm" "$SFT_Q8B" "$RO_1B/842_bo64_qwen38b_ref.json" "$RO_1B/842_bo64_qwen38b_qrm.json" 1 "rmlogp" "20/80" "--model_type rm3"

# ==============================================================================
# 2. llama3.2-3b-generated (Source)
# ==============================================================================
RO_3B="$SFT_L3B/prime-rl-rollouts"
IN_3B="$RO_3B/bon_test_0_842_scored.json"

# 2.1 llama321b
run_task "0,1,2,3" "$SFT_L1B" "$SFT_L1B" "$IN_3B" "$RO_3B/842_bo64_llama321b_ref.json" 8 "reflogp" "21/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/dpo_rm" "$SFT_L1B" "$RO_3B/842_bo64_llama321b_ref.json" "$RO_3B/842_bo64_llama321b_dpo.json" 8 "rmlogp" "22/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-prm" "$SFT_L1B" "$RO_3B/842_bo64_llama321b_ref.json" "$RO_3B/842_bo64_llama321b_implicitprm.json" 8 "rmlogp" "23/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-drm-beta10-gamma5" "$SFT_L1B" "$RO_3B/842_bo64_llama321b_ref.json" "$RO_3B/842_bo64_llama321b_ipvrm.json" 8 "rmlogp" "24/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/qrm" "$SFT_L1B" "$RO_3B/842_bo64_llama321b_ref.json" "$RO_3B/842_bo64_llama321b_qrm.json" 4 "rmlogp" "25/80" "--model_type rm3"

# 2.2 llama323b
run_task "0,1,2,3" "$SFT_L3B" "$SFT_L3B" "$IN_3B" "$RO_3B/842_bo64_llama323b_ref.json" 4 "reflogp" "26/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/dpo_rm" "$SFT_L3B" "$RO_3B/842_bo64_llama323b_ref.json" "$RO_3B/842_bo64_llama323b_dpo.json" 4 "rmlogp" "27/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-prm" "$SFT_L3B" "$RO_3B/842_bo64_llama323b_ref.json" "$RO_3B/842_bo64_llama323b_implicitprm.json" 4 "rmlogp" "28/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_L3B" "$RO_3B/842_bo64_llama323b_ref.json" "$RO_3B/842_bo64_llama323b_ipvrm.json" 4 "rmlogp" "29/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/qrm/qrm" "$SFT_L3B" "$RO_3B/842_bo64_llama323b_ref.json" "$RO_3B/842_bo64_llama323b_qrm.json" 2 "rmlogp" "30/80" "--model_type rm3"

# 2.3 qwen306b
run_task "0,1,2,3" "$SFT_Q06B" "$SFT_Q06B" "$IN_3B" "$RO_3B/842_bo64_qwen306b_ref.json" 4 "reflogp" "31/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/dpo_rm" "$SFT_Q06B" "$RO_3B/842_bo64_qwen306b_ref.json" "$RO_3B/842_bo64_qwen306b_dpo.json" 4 "rmlogp" "32/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-prm" "$SFT_Q06B" "$RO_3B/842_bo64_qwen306b_ref.json" "$RO_3B/842_bo64_qwen306b_implicitprm.json" 4 "rmlogp" "33/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-drm-beta10-gamma5" "$SFT_Q06B" "$RO_3B/842_bo64_qwen306b_ref.json" "$RO_3B/842_bo64_qwen306b_ipvrm.json" 4 "rmlogp" "34/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/qrm" "$SFT_Q06B" "$RO_3B/842_bo64_qwen306b_ref.json" "$RO_3B/842_bo64_qwen306b_qrm.json" 2 "rmlogp" "35/80" "--model_type rm3"

# 2.4 qwen38b
run_task "0,1,2,3" "$SFT_Q8B" "$SFT_Q8B" "$IN_3B" "$RO_3B/842_bo64_qwen38b_ref.json" 1 "reflogp" "36/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/dpo_rm" "$SFT_Q8B" "$RO_3B/842_bo64_qwen38b_ref.json" "$RO_3B/842_bo64_qwen38b_dpo.json" 1 "rmlogp" "37/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-prm" "$SFT_Q8B" "$RO_3B/842_bo64_qwen38b_ref.json" "$RO_3B/842_bo64_qwen38b_implicitprm.json" 1 "rmlogp" "38/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_Q8B" "$RO_3B/842_bo64_qwen38b_ref.json" "$RO_3B/842_bo64_qwen38b_ipvrm.json" 1 "rmlogp" "39/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/qrm" "$SFT_Q8B" "$RO_3B/842_bo64_qwen38b_ref.json" "$RO_3B/842_bo64_qwen38b_qrm.json" 1 "rmlogp" "40/80" "--model_type rm3"

# ==============================================================================
# 3. qwen3-0.6B-generated (Source)
# ==============================================================================
RO_Q06B="$SFT_Q06B/prime-rl-rollouts"
IN_Q06B="$RO_Q06B/bo64_t05_validation_scored.json"

# 3.1 llama321b
run_task "0,1,2,3" "$SFT_L1B" "$SFT_L1B" "$IN_Q06B" "$RO_Q06B/842_bo64_llama321b_ref.json" 8 "reflogp" "41/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/dpo_rm" "$SFT_L1B" "$RO_Q06B/842_bo64_llama321b_ref.json" "$RO_Q06B/842_bo64_llama321b_dpo.json" 8 "rmlogp" "42/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-prm" "$SFT_L1B" "$RO_Q06B/842_bo64_llama321b_ref.json" "$RO_Q06B/842_bo64_llama321b_implicitprm.json" 8 "rmlogp" "43/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-drm-beta10-gamma5" "$SFT_L1B" "$RO_Q06B/842_bo64_llama321b_ref.json" "$RO_Q06B/842_bo64_llama321b_ipvrm.json" 8 "rmlogp" "44/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/qrm" "$SFT_L1B" "$RO_Q06B/842_bo64_llama321b_ref.json" "$RO_Q06B/842_bo64_llama321b_qrm.json" 4 "rmlogp" "45/80" "--model_type rm3"

# 3.2 llama323b
run_task "0,1,2,3" "$SFT_L3B" "$SFT_L3B" "$IN_Q06B" "$RO_Q06B/842_bo64_llama323b_ref.json" 4 "reflogp" "46/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/dpo_rm" "$SFT_L3B" "$RO_Q06B/842_bo64_llama323b_ref.json" "$RO_Q06B/842_bo64_llama323b_dpo.json" 4 "rmlogp" "47/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-prm" "$SFT_L3B" "$RO_Q06B/842_bo64_llama323b_ref.json" "$RO_Q06B/842_bo64_llama323b_implicitprm.json" 4 "rmlogp" "48/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_L3B" "$RO_Q06B/842_bo64_llama323b_ref.json" "$RO_Q06B/842_bo64_llama323b_ipvrm.json" 4 "rmlogp" "49/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/qrm/qrm" "$SFT_L3B" "$RO_Q06B/842_bo64_llama323b_ref.json" "$RO_Q06B/842_bo64_llama323b_qrm.json" 2 "rmlogp" "50/80" "--model_type rm3"

# 3.3 qwen306b
run_task "0,1,2,3" "$SFT_Q06B" "$SFT_Q06B" "$IN_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" 4 "reflogp" "51/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/dpo_rm" "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_dpo.json" 4 "rmlogp" "52/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-prm" "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_implicitprm.json" 4 "rmlogp" "53/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-drm-beta10-gamma5" "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_ipvrm.json" 4 "rmlogp" "54/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/qrm" "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_qrm.json" 2 "rmlogp" "55/80" "--model_type rm3"

# 3.4 qwen38b
run_task "0,1,2,3" "$SFT_Q8B" "$SFT_Q8B" "$IN_Q06B" "$RO_Q06B/842_bo64_qwen38b_ref.json" 1 "reflogp" "56/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/dpo_rm" "$SFT_Q8B" "$RO_Q06B/842_bo64_qwen38b_ref.json" "$RO_Q06B/842_bo64_qwen38b_dpo.json" 1 "rmlogp" "57/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-prm" "$SFT_Q8B" "$RO_Q06B/842_bo64_qwen38b_ref.json" "$RO_Q06B/842_bo64_qwen38b_implicitprm.json" 1 "rmlogp" "58/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_Q8B" "$RO_Q06B/842_bo64_qwen38b_ref.json" "$RO_Q06B/842_bo64_qwen38b_ipvrm.json" 1 "rmlogp" "59/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/qrm" "$SFT_Q8B" "$RO_Q06B/842_bo64_qwen38b_ref.json" "$RO_Q06B/842_bo64_qwen38b_qrm.json" 1 "rmlogp" "60/80" "--model_type rm3"

# ==============================================================================
# 4. qwen3-8B-generated (Source)
# ==============================================================================
RO_Q8B="$SFT_Q8B/prime-rl-rollouts"
IN_Q8B="$RO_Q8B/bon_test_0_842_scored.json"

# 4.1 llama321b
run_task "0,1,2,3" "$SFT_L1B" "$SFT_L1B" "$IN_Q8B" "$RO_Q8B/842_bo64_llama321b_ref.json" 8 "reflogp" "61/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/dpo_rm" "$SFT_L1B" "$RO_Q8B/842_bo64_llama321b_ref.json" "$RO_Q8B/842_bo64_llama321b_dpo.json" 8 "rmlogp" "62/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-prm" "$SFT_L1B" "$RO_Q8B/842_bo64_llama321b_ref.json" "$RO_Q8B/842_bo64_llama321b_implicitprm.json" 8 "rmlogp" "63/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-drm-beta10-gamma5" "$SFT_L1B" "$RO_Q8B/842_bo64_llama321b_ref.json" "$RO_Q8B/842_bo64_llama321b_ipvrm.json" 8 "rmlogp" "64/80"
run_task "0,1,2,3" "${SFT_L1B%/*}/qrm" "$SFT_L1B" "$RO_Q8B/842_bo64_llama321b_ref.json" "$RO_Q8B/842_bo64_llama321b_qrm.json" 4 "rmlogp" "65/80" "--model_type rm3"

# 4.2 llama323b
run_task "0,1,2,3" "$SFT_L3B" "$SFT_L3B" "$IN_Q8B" "$RO_Q8B/842_bo64_llama323b_ref.json" 4 "reflogp" "66/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/dpo_rm" "$SFT_L3B" "$RO_Q8B/842_bo64_llama323b_ref.json" "$RO_Q8B/842_bo64_llama323b_dpo.json" 4 "rmlogp" "67/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-prm" "$SFT_L3B" "$RO_Q8B/842_bo64_llama323b_ref.json" "$RO_Q8B/842_bo64_llama323b_implicitprm.json" 4 "rmlogp" "68/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_L3B" "$RO_Q8B/842_bo64_llama323b_ref.json" "$RO_Q8B/842_bo64_llama323b_ipvrm.json" 4 "rmlogp" "69/80"
run_task "0,1,2,3" "${SFT_L3B%/*}/qrm/qrm" "$SFT_L3B" "$RO_Q8B/842_bo64_llama323b_ref.json" "$RO_Q8B/842_bo64_llama323b_qrm.json" 2 "rmlogp" "70/80" "--model_type rm3"

# 4.3 qwen306b
run_task "0,1,2,3" "$SFT_Q06B" "$SFT_Q06B" "$IN_Q8B" "$RO_Q8B/842_bo64_qwen306b_ref.json" 4 "reflogp" "71/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/dpo_rm" "$SFT_Q06B" "$RO_Q8B/842_bo64_qwen306b_ref.json" "$RO_Q8B/842_bo64_qwen306b_dpo.json" 4 "rmlogp" "72/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-prm" "$SFT_Q06B" "$RO_Q8B/842_bo64_qwen306b_ref.json" "$RO_Q8B/842_bo64_qwen306b_implicitprm.json" 4 "rmlogp" "73/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/implicit-drm-beta10-gamma5" "$SFT_Q06B" "$RO_Q8B/842_bo64_qwen306b_ref.json" "$RO_Q8B/842_bo64_qwen306b_ipvrm.json" 4 "rmlogp" "74/80"
run_task "0,1,2,3" "${SFT_Q06B%/*}/qrm" "$SFT_Q06B" "$RO_Q8B/842_bo64_qwen306b_ref.json" "$RO_Q8B/842_bo64_qwen306b_qrm.json" 2 "rmlogp" "75/80" "--model_type rm3"

# 4.4 qwen38b
run_task "0,1,2,3" "$SFT_Q8B" "$SFT_Q8B" "$IN_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" 1 "reflogp" "76/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/dpo_rm" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_dpo.json" 1 "rmlogp" "77/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-prm" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_implicitprm.json" 1 "rmlogp" "78/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/implicit-drm-beta5-gamma2.5" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_ipvrm.json" 1 "rmlogp" "79/80"
run_task "0,1,2,3" "${SFT_Q8B%/*}/qrm" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm.json" 1 "rmlogp" "80/80" "--model_type rm3"

echo "所有任务处理完成！"