ROOT=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation
eval "$(/mnt/shared-storage-user/p1-shared/wangfuting/miniconda3/bin/conda shell.bash hook)"
conda activate verl041-test

# bash /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/verl/grpo_scripts/merge.sh
# DATA=$ROOT/data/luffy/valid.all.parquet
# DATA=$ROOT/data/luffy/valid.all_qwen3.parquet
# DATA=$ROOT/data/luffy/valid-polaris-qwen3.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/aime24_qwen3_128.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/qwen3-4b-s1-sampled1k.parquet
# DATA=$ROOT/data/luffy/openr1.parquet
DATA="/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/valid_ood_qwen3.parquet"
OUTPUT_DIR=$ROOT/results_mar
mkdir -p $OUTPUT_DIR
cd $ROOT

# Paper Figure 4 setup:
# - forcing 2048 / 4096 max thinking tokens
# - ignoring end-of-thinking 2x / 4x / 6x
# - appending "Wait"
# - temperature 0.0 (greedy)
declare -a PAPER_BUDGETS=(2048 4096)
declare -a PAPER_NUM_IGNORES=(2 4 6)
IGNORE_STR="Wait"
GENERATION_MODE="budget_forcing"
FORCE_GENERATE=True

# 定义三个模型路径和对应的名称
declare -a MODEL_PATHS=(
  "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-4B"
  "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/qwen3-4b-LIE/best_model/actor/huggingface"
  "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/qwen3-4b-baseline/best_model/actor/huggingface"
)

declare -a MODEL_NAMES=(
    "qwen3-4b-valid-ood"
    "qwen3-4b-polaris-add1k-gspo-step660-valid-ood"
    "qwen3-4b-polaris-baseline-gspo-step550-valid-ood"

)

declare -a TEMPLATES=(
    "own"
    "own"
    "own"
    # "own"
    
)
# TEMPLATE=qwen3
# TEMPLATE=luffy
# export CUDA_VISIBLE_DEVICES=0,1,2,3
# --enable_thinking False \
# 遍历所有模型
if [[ "${#MODEL_PATHS[@]}" -ne "${#MODEL_NAMES[@]}" ]] || [[ "${#MODEL_PATHS[@]}" -ne "${#TEMPLATES[@]}" ]]; then
    echo "MODEL_PATHS / MODEL_NAMES / TEMPLATES 长度不一致，请检查配置。" >&2
    exit 1
fi

for i in "${!MODEL_PATHS[@]}"; do
    MODEL_PATH="${MODEL_PATHS[$i]}"
    MODEL_NAME="${MODEL_NAMES[$i]}"
    TEMPLATE="${TEMPLATES[$i]}"
    
    echo "正在评估模型: $MODEL_NAME"
    echo "模型路径: $MODEL_PATH"
    
    for budget in "${PAPER_BUDGETS[@]}"; do
        for num_ignore in "${PAPER_NUM_IGNORES[@]}"; do
            OUTPUT_FILE="$OUTPUT_DIR/${MODEL_NAME}_bf_wait${num_ignore}x_${budget}_test.jsonl"
            LOG_FILE="$OUTPUT_DIR/${MODEL_NAME}_bf_wait${num_ignore}x_${budget}.log"

            echo "开始生成，预算: $budget, num_ignore=$num_ignore, force_generate=$FORCE_GENERATE"
            python eval_scripts/generate_vllm.py \
              --model_path "$MODEL_PATH" \
              --input_file "$DATA" \
              --remove_system True \
              --output_file "$OUTPUT_FILE" \
              --temperature 0.0 \
              --max_tokens "$budget" \
              --length_budget "$budget" \
              --generation_mode "$GENERATION_MODE" \
              --num_ignore "$num_ignore" \
              --ignore_str "$IGNORE_STR" \
              --n 1 \
              --top_p 1.0 \
              --no-split-think True \
              --enable_thinking False \
              --template "$TEMPLATE" \
              --force_generate "$FORCE_GENERATE" > "$LOG_FILE"

            echo "模型 $MODEL_NAME 评估完成: budget=$budget, num_ignore=$num_ignore"
        done
    done
    
    echo "----------------------------------------"
done

echo "所有模型评估完成！"
