ROOT=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation
eval "$(/mnt/shared-storage-user/p1-shared/wangfuting/miniconda3/bin/conda shell.bash hook)"
conda activate verl041

DATA="/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/valid_ood_qwen3.parquet"
# DATA=$ROOT/data/luffy/valid-polaris-qwen3.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/aime24_qwen3_128.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/qwen3-4b-s1-sampled1k.parquet
# DATA=$ROOT/data/luffy/openr1.parquet
OUTPUT_DIR=$ROOT/results_feb
mkdir -p $OUTPUT_DIR
cd $ROOT

# 复用已有输出时，不再加载模型重新生成。
REUSE_EXISTING_OUTPUTS=${REUSE_EXISTING_OUTPUTS:-1}
# 如果最终评测结果 jsonl 已存在，则整项直接跳过。
SKIP_IF_OUTPUT_EXISTS=${SKIP_IF_OUTPUT_EXISTS:-0}

# 定义三个模型路径和对应的名称
declare -a MODEL_PATHS=(
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-grpo-dapo-math-minibsz32/best_model_four_sets/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-grpo-dapo-math/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-dapo-math-redo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-dapo-math/best_model/actor/huggingface_330"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-gspo-dapo-math-minibsz32/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/Qwen3-4B-Base"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-8b-base-baseline-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-8b-base-add1k-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-1.7b-base-add1k-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-1.7b-base-baseline-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/Qwen3-8B-Base/snapshots/49e3418fbbbca6ecbdf9608b4d22e5a407081db4"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/8b-baseline/best_model/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/8b-LIE/best_model_four_sets/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-1.7B-Base"
  # "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-4B-Base"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-8b-base-add1k-gspo/best_model_four_sets/actor/huggingface"
#   "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-redo1/best_model/actor/huggingface"
#   "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-redo2/best_model/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/8b-LIE/best_model_four_sets/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/8b-baseline/best_model/actor/huggingface"
  "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-semantic-dapo-math-v6/best_model_four_sets/actor/huggingface"
#   "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/LIE-512gram/best_model/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/LIE-100gram/best_model/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/gspo_length/best_model_four_sets/actor/huggingface"
)

declare -a MODEL_NAMES=(
    # "grpo-baseline-step430-ood"
    # "grpo-add1k-step320-ood"
    # "dapo-baseline-step350-ood"
    # "dapo-add1k-step330-ood"
    # "gspo-baseline-step500-ood"
    # "gspo-add1k-step600-ood"
    # "Qwen3-4B-base-ood"
    # "qwen3-8b-base-baseline-gspo-step520-ood"
    # "qwen3-8b-base-add1k-gspo-step590-ood"
    # "qwen3-1.7b-base-add1k-gspo-step610-ood"
    # "qwen3-1.7b-base-baseline-gspo-step560-ood"
    # "8b-baseline-ood"
    # "qwen3-1.7b-base-ood"
    # "8b-GSPO-ood"
    # "8b-LIE-ood"

    # "Qwen3-4B-base-ood"
    # "LIE-redo1-step530-ood"
    # "LIE-redo2-step470-ood"
    "LIE-semantic-v6-280-ood"
    # "LIE-512gram-720step-ood"
    # "LIE-100gram-690step-ood"
    # "gspo_length-ood-690step"
)

declare -a TEMPLATES=(
    "own"
    # "own" 
    # "own"
    # "own"
)

# TEMPLATE=qwen3
# TEMPLATE=luffy
# export CUDA_VISIBLE_DEVICES=0,1,2,3
# 遍历所有模型

if [[ "${#MODEL_PATHS[@]}" -ne "${#MODEL_NAMES[@]}" ]] || [[ "${#MODEL_PATHS[@]}" -ne "${#TEMPLATES[@]}" ]]; then
    echo "MODEL_PATHS / MODEL_NAMES / TEMPLATES 长度不一致，请检查配置。" >&2
    exit 1
fi

for i in "${!MODEL_PATHS[@]}"; do
    MODEL_PATH="${MODEL_PATHS[$i]}"
    MODEL_NAME="${MODEL_NAMES[$i]}"
    TEMPLATE="${TEMPLATES[$i]}"
    
    echo "正在评估模型: $MODEL_NAME"
    echo "模型路径: $MODEL_PATH"
    
    for budget in 32768; do
        OUTPUT_FILE="$OUTPUT_DIR/${MODEL_NAME}_${budget}_test.jsonl"
        DECODED_FILE="${OUTPUT_FILE%.jsonl}.decoded.jsonl"
        LOG_FILE="$OUTPUT_DIR/$MODEL_NAME-$budget.log"
        FORCE_GENERATE=False

        echo "开始执行，预算: $budget, force_generate=$FORCE_GENERATE"
        python eval_scripts/generate_vllm.py \
          --model_path "$MODEL_PATH" \
          --input_file "$DATA" \
          --remove_system True \
          --output_file "$OUTPUT_FILE" \
          --temperature 0.6 \
          --max_tokens "$budget" \
          --n 1 \
          --top_p 1.0 \
          --no-split-think True \
          --template "$TEMPLATE" \
          --force_generate "$FORCE_GENERATE" > "$LOG_FILE"
        
        echo "模型 $MODEL_NAME 评估完成"
    done
    
    echo "----------------------------------------"
done

echo "所有模型评估完成！"
