#!/bin/bash

# --- 核心逻辑函数 ---
# 参数说明: 1:DEVICES, 2:MODEL_PATH, 3:TOKEN_PATH, 4:INPUT, 5:OUTPUT, 6:BS, 7:TAG, 8:PROGRESS, 9:EXTRA_ARGS
run_task() {
    local devices=$1
    local model_path=$2
    local token_path=$3
    local input_json=$4
    local output_path=$5
    local batch_size=$6
    local model_tag=$7
    local progress=$8
    local extra_args=$9

    # if [ -f "$output_path" ]; then
    #     echo "跳过: $output_path 已存在。"
    # else
    #     CUDA_VISIBLE_DEVICES=$devices \
    #     ~/verl_250713/.conda/bin/accelerate launch \
    #       --num_processes 4 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
    #       --model-path      "$model_path" \
    #       --tokenizer-path  "$token_path" \
    #       --input-json      "$input_json" \
    #       --output-path     "$output_path" \
    #       --batch-size      "$batch_size" \
    #       --model-tag       "$model_tag" \
    #       $extra_args
    # fi
    CUDA_VISIBLE_DEVICES=$devices \
        ~/verl_250713/.conda/bin/accelerate launch \
          --num_processes 8 ~/verl_250713/scripts/bon2_compute_logp_for_responses_1.py \
          --model-path      "$model_path" \
          --tokenizer-path  "$token_path" \
          --input-json      "$input_json" \
          --output-path     "$output_path" \
          --batch-size      "$batch_size" \
          --model-tag       "$model_tag" \
          $extra_args
    echo "$progress"
}

# --- 路径定义 ---
SFT_L1B="~/LLaMA-Factory-250514/saves_shuyan/llama3.2-1B-base/prime-sft-full"
SFT_L3B="~/LLaMA-Factory-250514/saves_shuyan/llama3.2-3B/prime-sft"
SFT_Q06B="~/LLaMA-Factory-250514/saves_shuyan/qwen3-0.6B-base/prime-sft"
SFT_Q8B="~/LLaMA-Factory-250514/saves_shuyan/qwen3-8B-base/prime-sft"

# ==============================================================================
# 1. llama3.2-1b-generated (Source)
# ==============================================================================
# RO_1B="$SFT_L1B/prime-rl-rollouts"
# IN_1B="$RO_1B/bo64_t05_validation_scored.json"

# # # 1.1 llama321b 
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-100/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_100.json" 4 "rmlogp" "1/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-200/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_200.json" 4 "rmlogp" "2/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-300/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_300.json" 4 "rmlogp" "3/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-400/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_400.json" 4 "rmlogp" "4/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-500/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_500.json" 4 "rmlogp" "5/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-600/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_600.json" 4 "rmlogp" "6/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_L1B%/*}/qrm1/checkpoint-700/" "$SFT_L1B" "$RO_1B/842_bo64_llama321b_ref.json" "$RO_1B/842_bo64_llama321b_qrm_700.json" 4 "rmlogp" "7/80" "--model_type rm3"

# ==============================================================================
# 2. llama3.2-3b-generated (Source)
# # ==============================================================================
# RO_3B="$SFT_L3B/prime-rl-rollouts"
# IN_3B="$RO_3B/bon_test_0_842_scored.json"

# # # 2.2 llama323b 
# run_task "0,1,2,3" "${SFT_L3B%/*}/qrm/checkpoint-470/" "$SFT_L3B" "$RO_3B/842_bo64_llama323b_ref.json" "$RO_3B/842_bo64_llama323b_qrm_470.json" 2 "rmlogp" "30/80" "--model_type rm3"

# ==============================================================================
# 3. qwen3-0.6B-generated (Source)
# ==============================================================================
# RO_Q06B="$SFT_Q06B/prime-rl-rollouts"
# IN_Q06B="$RO_Q06B/bo64_t05_validation_scored.json"

# 3.3 qwen306b
# run_task "0,1,2,3,4,5,6,7" "${SFT_Q06B%/*}/qrm1/checkpoint-200"  "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_qrm-200.json" 2  "rmlogp" "8/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_Q06B%/*}/qrm1/checkpoint-400"  "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_qrm-400.json" 2  "rmlogp" "9/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_Q06B%/*}/qrm1/checkpoint-600"  "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_qrm-600.json" 2  "rmlogp" "10/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_Q06B%/*}/qrm1/checkpoint-800"  "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_qrm-800.json" 2  "rmlogp" "11/80" "--model_type rm3"
# run_task "0,1,2,3,4,5,6,7" "${SFT_Q06B%/*}/qrm1/checkpoint-1000" "$SFT_Q06B" "$RO_Q06B/842_bo64_qwen306b_ref.json" "$RO_Q06B/842_bo64_qwen306b_qrm-1000.json" 2 "rmlogp" "12/80" "--model_type rm3"


# # ==============================================================================
# # 4. qwen3-8B-generated (Source)
# # ==============================================================================
RO_Q8B="$SFT_Q8B/prime-rl-rollouts"
IN_Q8B="$RO_Q8B/bon_test_0_842_scored.json"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-100" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_100.json" 2 "rmlogp" "1/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-200" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_200.json" 2 "rmlogp" "2/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-300" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_300.json" 2 "rmlogp" "3/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-400" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_400.json" 2 "rmlogp" "4/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-500" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_500.json" 2 "rmlogp" "5/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-600" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_600.json" 2 "rmlogp" "6/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-700" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_700.json" 2 "rmlogp" "7/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-800" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_800.json" 2 "rmlogp" "8/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-900" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_900.json" 2 "rmlogp" "9/10" "--model_type rm3"

run_task "0,1,2,3,4,5,6,7" "${SFT_Q8B%/*}/qrm3/checkpoint-1000" "$SFT_Q8B" "$RO_Q8B/842_bo64_qwen38b_ref.json" "$RO_Q8B/842_bo64_qwen38b_qrm_1000.json" 2 "rmlogp" "10/10" "--model_type rm3"




# echo "所有任务处理完成！"