ROOT=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation
eval "$(/mnt/shared-storage-user/p1-shared/wangfuting/miniconda3/bin/conda shell.bash hook)"
conda activate verl041-test

# bash /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/verl/grpo_scripts/merge.sh
# DATA=$ROOT/data/luffy/valid.all.parquet
DATA=$ROOT/data/luffy/valid.all_qwen3.parquet
# DATA=$ROOT/data/luffy/valid-polaris-qwen3.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/aime24_qwen3_128.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/qwen3-4b-s1-sampled1k.parquet
# DATA=$ROOT/data/luffy/openr1.parquet
OUTPUT_DIR=$ROOT/results_mar
mkdir -p $OUTPUT_DIR
cd $ROOT

# 定义三个模型路径和对应的名称
declare -a MODEL_PATHS=(
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-add2k/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-4b-polaris-add1k-gspo/best_model/actor/huggingface"
  # # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-4b-polaris-baseline-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-gspo-dapo-math-minibsz32-redo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-add1k5/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-redo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-4b-polaris-add1k-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/gspo-stage2-dapo-math-add1k-max12k/best_model_four_sets/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-4B"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-8b-base-baseline-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-8b-base-add1k-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-1.7b-base-add1k-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-1.7b-base-baseline-gspo/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-8B-Base"
  # "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-1.7B-Base"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/qwen3-8b-base-add1k-gspo/best_model_four_sets/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/Qwen/Qwen3-4B-Base"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/LIE/best_model/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/baseline-gspo-dapo-math-minibsz32/best_model/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/gspo_length/best_model/actor/huggingface"
#   "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-redo2/global_step_500/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-redo1/best_model/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math-redo2/best_model/actor/huggingface"

# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/skip-right-semantic-dapo-math-v6/best_model_four_sets/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/llama-length/best_model/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/LIE-512gram/best_model/actor/huggingface"
# "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/LIE-100gram/best_model/actor/huggingface"
"/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/gspo_length/best_model_four_sets/actor/huggingface"
)

declare -a MODEL_NAMES=(
    # "qwen3-4b-valid-all"
    # "gspo-add2k-step520-valid-all"
    # "qwen3-4b-polaris-add1k-gspo-step660-valid-all"
    # "gspo-stage2-dapo-math-add1k-max12k-step240-valid-all"
    # "qwen3-4b-polaris-baseline-gspo-step550-valid-all"
    # "baseline-gspo-dapo-math-minibsz32-redo-step660-valid-all"
    # "skip-right-skip-limits10-gspo-dapo-math-add1k5-step630-valid-all"
    # "skip-right-skip-limits10-gspo-dapo-math-redo-step490-valid-all"
    # "gspo_length-valid-all"

    # # "gspo-add1k-wo-repetition-step600-valid-all"
    # "qwen3-8b-base-baseline-gspo-step520-valid-all"
    # "qwen3-8b-base-add1k-gspo-step760-valid-all"
    # "qwen3-1.7b-base-add1k-gspo-step610-valid-all"
    # "qwen3-1.7b-base-baseline-gspo-step560-valid-all"
    # "qwen3-8b-base-valid-all"
    # "qwen3-1.7b-base-valid-all"
#     "Qwen3-4B-Base-aime128"
#     "LIE-aime128"
#     "gspo-baseline-aime128"
    # "LIE-redo1-step530"
    # "LIE-redo2-step470"
    # "LIE-redo2-step600"
    # "LIE-semantic-v6-280"
    # "llama-length-310"
    # "LIE-100gram-690step"
    "gspo_length-valid-690step"
)

declare -a TEMPLATES=(
    # "own"
    "own"
    # "own"
    # "own"
    
)
# TEMPLATE=qwen3
# TEMPLATE=luffy
# export CUDA_VISIBLE_DEVICES=0,1,2,3
# --enable_thinking False \
# 遍历所有模型
for i in "${!MODEL_PATHS[@]}"; do
    MODEL_PATH="${MODEL_PATHS[$i]}"
    MODEL_NAME="${MODEL_NAMES[$i]}"
    TEMPLATE="${TEMPLATES[$i]}"
    
    echo "正在评估模型: $MODEL_NAME"
    echo "模型路径: $MODEL_PATH"
    
    for budget in 32768; do
        echo "开始生成，预算: $budget"
        python eval_scripts/generate_vllm_old.py \
          --model_path $MODEL_PATH \
          --input_file $DATA \
          --remove_system True \
          --output_file $OUTPUT_DIR/${MODEL_NAME}_${budget}_test.jsonl \
          --temperature 0.6 \
          --max_tokens $budget \
          --n 1 \
          --top_p 1.0 \
          --no-split-think True \
          --template $TEMPLATE > $OUTPUT_DIR/$MODEL_NAME-$budget.log
        
        echo "模型 $MODEL_NAME 评估完成"
    done
    
    echo "----------------------------------------"
done

echo "所有模型评估完成！"