#!/bin/bash
set -ex
# 环境变量设置
export VLLM_USE_FLASHINFER_SAMPLER="0"
export VLLM_ATTENTION_BACKEND="FLASH_ATTN"

# ============================================================================
# 配置参数 - 在这里修改你的设置
# ============================================================================

# 基础设置
MODEL_PATH="/chencongliang/models/Meta/Llama-3.1-8B"
MODEL_NAME="Llama-3.1-8B"
BENCHMARKS=("math500" "aime24" "aime25")
PROMPT_TYPE="cot"
N_SAMPLING=256


# 生成参数
TEMPERATURE=0.6
MAX_TOKENS=16384
TOP_P=0.95
OVERWRITE=true
SEED=42
OUTPUT_DIR="/chencongliang/project/icl-diversity/limit-of-RLVR/math/outputs"
PARALLEL_SCRIPT="/chencongliang/project/icl-diversity/limit-of-RLVR/math/examples/math_eval/sh/run_limitRLVR_test.sh"


# 多配置设置（对应你原脚本的三次生成）
SHOT_SOURCES=("r1" "r1" "o1")
NUM_SHOTS_ARRAY=(0 4 4)

# ============================================================================
# 执行评估
# ============================================================================

# 创建输出目录
mkdir -p "$OUTPUT_DIR"

echo "开始评估"
echo "Benchmarks: ${BENCHMARKS[*]}"
echo "配置数量: ${#SHOT_SOURCES[@]}"
echo "总任务数: $((${#BENCHMARKS[@]} * ${#SHOT_SOURCES[@]}))"
echo "=========================================="

# 外层循环：遍历所有benchmark
for benchmark_idx in "${!BENCHMARKS[@]}"; do
    BENCHMARK=${BENCHMARKS[$benchmark_idx]}
    
    echo ""
    echo "🔥 开始测试 Benchmark: $BENCHMARK (${benchmark_idx}/${#BENCHMARKS[@]})"
    echo "=========================================="
    
    # 内层循环：遍历不同shot配置
    for config_idx in "${!SHOT_SOURCES[@]}"; do
        SHOT_SOURCE=${SHOT_SOURCES[$config_idx]}
        NUM_SHOTS=${NUM_SHOTS_ARRAY[$config_idx]}
        
        echo ""
        echo "📊 执行配置: ${NUM_SHOTS}shots-${SHOT_SOURCE}"
        echo "   Benchmark: $BENCHMARK"
        echo "   配置进度: $((config_idx+1))/${#SHOT_SOURCES[@]}"
        echo "=========================================="
        
        # 调用并行评估脚本
        bash "$PARALLEL_SCRIPT" \
            "$PROMPT_TYPE" \
            "$MODEL_PATH" \
            "$OUTPUT_DIR" \
            "$BENCHMARK" \
            "$N_SAMPLING" \
            "$TEMPERATURE" \
            "$MAX_TOKENS" \
            "$TOP_P" \
            "$NUM_SHOTS" \
            "$SHOT_SOURCE" \
            "$OVERWRITE" \
            "$SEED" \
            "$MODEL_NAME"
        
        if [ $? -eq 0 ]; then
            echo "✅ 配置完成: $BENCHMARK - ${NUM_SHOTS}shots-${SHOT_SOURCE}"
        else
            echo "❌ 配置失败: $BENCHMARK - ${NUM_SHOTS}shots-${SHOT_SOURCE}"
            echo "退出脚本"
            exit 1
        fi
        
    done
    
    echo "🎉 Benchmark $BENCHMARK 的所有配置测试完成！"
    
done

echo ""
echo "🏆 所有评估任务完成！"
echo "📁 结果保存在: $OUTPUT_DIR"
echo "📋 测试总结:"
echo "   - Benchmarks: ${BENCHMARKS[*]}"
echo "   - 配置: 0shots-r1, 4shots-r1"
echo "   - 总任务数: $((${#BENCHMARKS[@]} * ${#SHOT_SOURCES[@]}))"