ROOT=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation
eval "$(/mnt/shared-storage-user/p1-shared/wangfuting/miniconda3/bin/conda shell.bash hook)"
conda activate verl041-test

# bash /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/verl/grpo_scripts/merge.sh
# DATA=$ROOT/data/luffy/valid.all.parquet 
# DATA=$ROOT/data/luffy/valid.all_llama.parquet
DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/valid_ood_llama.parquet
# DATA=$ROOT/data/luffy/valid-polaris-qwen3.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/luffy/aime24_qwen3_128.parquet
# DATA=/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/data/qwen3-4b-s1-sampled1k.parquet
# DATA=$ROOT/data/luffy/openr1.parquet
OUTPUT_DIR=$ROOT/results_mar
mkdir -p $OUTPUT_DIR
cd $ROOT

# 定义三个模型路径和对应的名称
declare -a MODEL_PATHS=(
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/OctoThinker-3B-Long-Base"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/llama-baseline-gspo-deepmath/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/llama-baseline-gspo-deepmath/best_model_four_sets/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/llama-add1k-gspo-deepmath/best_model/actor/huggingface"
  # "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/llama-add1k-gspo-deepmath/best_model/actor/huggingface"
  "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/llama-baseline/best_model/actor/huggingface"
  # "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/llama-LIE/best_model/actor/huggingface_210"
  
)

declare -a MODEL_NAMES=(
  #  "octothinker-valid-ood"
   "octothinker-gspo-step300-valid-ood"
  #  "octothinker-add1k-step210-valid-ood"
)

declare -a TEMPLATES=(
    # "own"
    # "own"
    "own"
    # "own"
    
)
# TEMPLATE=qwen3
# TEMPLATE=luffy
# export CUDA_VISIBLE_DEVICES=0,1,2,3
# --enable_thinking False \
# 遍历所有模型
if [[ "${#MODEL_PATHS[@]}" -ne "${#MODEL_NAMES[@]}" ]] || [[ "${#MODEL_PATHS[@]}" -ne "${#TEMPLATES[@]}" ]]; then
    echo "MODEL_PATHS / MODEL_NAMES / TEMPLATES 长度不一致，请检查配置。" >&2
    exit 1
fi

for i in "${!MODEL_PATHS[@]}"; do
    MODEL_PATH="${MODEL_PATHS[$i]}"
    MODEL_NAME="${MODEL_NAMES[$i]}"
    TEMPLATE="${TEMPLATES[$i]}"
    
    echo "正在评估模型: $MODEL_NAME"
    echo "模型路径: $MODEL_PATH"
    
    for budget in 32768; do
        OUTPUT_FILE="$OUTPUT_DIR/${MODEL_NAME}_${budget}_test.jsonl"
        DECODED_FILE="${OUTPUT_FILE%.jsonl}.decoded.jsonl"
        LOG_FILE="$OUTPUT_DIR/$MODEL_NAME-$budget.log"
        FORCE_GENERATE=True

    

        echo "开始执行，预算: $budget, force_generate=$FORCE_GENERATE"
        python eval_scripts/generate_vllm.py \
          --model_path "$MODEL_PATH" \
          --input_file "$DATA" \
          --remove_system True \
          --output_file "$OUTPUT_FILE" \
          --temperature 0.6 \
          --max_tokens "$budget" \
          --n 1 \
          --top_p 1.0 \
          --no-split-think True \
          --template "$TEMPLATE" \
          --force_generate "$FORCE_GENERATE" > "$LOG_FILE"
        
        echo "模型 $MODEL_NAME 评估完成"
    done
    
    echo "----------------------------------------"
done

echo "所有模型评估完成！"