#!/bin/bash
# --- 环境设置 ---
# 指定使用第 5 和第 6 块GPU。
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

# --- 模型和路径配置 ---
MODEL_PATH="/home/ubuntu/yangxw/codes/ouro_train/outputs/ouro-sft-reg-randomloop-lastmathnew-400K/final_checkpoint"  # 替换为你的模型路径

# 输出文件的路径
OUTPUT_PATH="../outputs/vllm_eval_new_svamp_custom.json"

# --- lm-eval 执行命令 ---
echo "Starting evaluation for Ouro model: ${MODEL_PATH}"
echo "Results will be saved to: ${OUTPUT_PATH}"

HF_ALLOW_CODE_EVAL="1" lm_eval --model vllm \
    --model_args '{
        "pretrained": "'"${MODEL_PATH}"'",
        "trust_remote_code": true,
        "dtype": "bfloat16",
        "tensor_parallel_size": 8,
        "gpu_memory_utilization": 0.9,
        "max_model_len": 4096,
        "hf_overrides": {
            "total_ut_steps": 1
        }
    }' \
    --tasks svamp_custom  \
    --include_path /home/ubuntu/yangxw/codes/ouro_train/custom_tasks \
    --batch_size auto \
    --num_fewshot 0\
    --apply_chat_template True \
    --log_samples \
    --confirm_run_unsafe_code \
    --system_instruction "You are a helpful assistant that can assist users with reasoning." \
    --output_path ${OUTPUT_PATH} \
    --gen_kwargs '{
        "max_gen_toks": 2048
    }' # <--- 在这里设置最大生成Token数

echo "Evaluation finished."