#!/bin/bash

# --- 核心逻辑函数 ---
# 参数说明: 1:DEVICES, 2:MODEL_PATH, 3:TOKEN_PATH, 4:INPUT, 5:OUTPUT, 6:BS, 7:TAG, 8:PROGRESS, 9:EXTRA_ARGS
run_task() {
    local devices=$1
    local model_path=$2
    local token_path=$3
    local input_json=$4
    local output_path=$5
    local batch_size=$6
    local model_tag=$7
    local progress=$8
    local extra_args=$9

    if [ -f "$output_path" ]; then
        echo "跳过: $output_path 已存在。"
    else
        CUDA_VISIBLE_DEVICES=$devices \
        ~/verl_250713/.conda/bin/accelerate launch \
          --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses.py \
          --model-path      "$model_path" \
          --tokenizer-path  "$token_path" \
          --input-json      "$input_json" \
          --output-path     "$output_path" \
          --batch-size      "$batch_size" \
          --model-tag       "$model_tag" \
          $extra_args
    fi
    echo "$progress"
}

# --- 路径定义 ---
SFT_L1B="~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full"
SFT_L3B="~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft"
SFT_Q06B="~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft"
SFT_Q8B="~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft"

# ==============================================================================
# 1. llama3.2-1b-generated (Source)
# ==============================================================================
# RO_1B="$SFT_L1B/prime-rl-rollouts"
# IN_1B="$RO_1B/bo64_t05_validation_scored.json"

# # 1.1 llama321b
# run_task "0,1,2,3" "$SFT_L1B" "$SFT_L1B" "$IN_1B" "$RO_1B/842_bo64_llama321b_ref.json" 8 "reflogp" "1/80"
# run_task "0,1,2,3" "${SFT_L1B%/*}/dpo_rm" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_dpo.json" 8 "rmlogp" "2/80"
# run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-prm" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_implicitprm.json" 8 "rmlogp" "3/80"
# run_task "0,1,2,3" "${SFT_L1B%/*}/implicit-drm-beta10-gamma5" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_ipvrm.json" 8 "rmlogp" "4/80"
# run_task "0,1,2,3" "${SFT_L1B%/*}/qrm" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm.json" 4 "rmlogp" "5/80" "--model_type rm3"

# ==============================================================================
# 2. llama3.2-3b-generated (Source)
# ==============================================================================
RO_3B="$SFT_L3B/prime-rl-rollouts"
IN_3B="$RO_3B/bon_test_0_842_scored.json"

# 2.2 llama323b
run_task "0,1,2,3,4,5,6,7" "$SFT_L3B" "$SFT_L3B" "$IN_3B" "$RO_3B/842_bo64_llama323b_ref1.json" 4 "reflogp" "26/80"

# ==============================================================================
# 3. qwen3-0.6B-generated (Source)
# ==============================================================================
RO_Q06B="$SFT_Q06B/prime-rl-rollouts"
IN_Q06B="$RO_Q06B/bo64_t05_validation_scored.json"

# 3.3 qwen306b
run_task "0,1,2,3,4,5,6,7" "$SFT_Q06B" "$SFT_Q06B" "$IN_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref1.json" 4 "reflogp" "51/80"

# ==============================================================================
# 4. qwen3-8B-generated (Source)
# ==============================================================================
RO_Q8B="$SFT_Q8B/prime-rl-rollouts"
IN_Q8B="$RO_Q8B/bon_test_0_842_scored.json"

# 4.4 qwen38b
run_task "0,1,2,3,4,5,6,7" "$SFT_Q8B" "$SFT_Q8B" "$IN_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref1.json" 1 "reflogp" "76/80"

echo "所有任务处理完成！"