export HF_ENDPOINT=https://hf-mirror.com
hf auth login --token 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

# SOLVER_PATH="/mnt/data1/Anony/Erdos-Prover/save/train_20250919_141140/train_data_2/RL_Epoch2_full_sft_counterexample_solve"
# PROVER_PATH="/mnt/data1/Anony/Erdos-Prover/save/train_20250919_141140/train_data_2/RL_Epoch2_full_sft_formalproof_generate"

SOLVER_PATH="/mnt/data2/save_new/Fourth_epoch/train_data_8/RL_Epoch8_full_sft_counterexample_solve"
PROVER_PATH="/mnt/data2/save_new/Fourth_epoch/train_data_8/RL_Epoch8_full_sft_formalproof_generate"


# SOLVER_PATH="/mnt/data1/Anony/Erdos-Prover/models/mistral-7b_full_sft_counterexample_solve"
# PROVER_PATH="/mnt/data1/Anony/Erdos-Prover/models/dsproverv2-7b_full_sft_formalproof_generate"

# DATASET_PATH="datasets/eval/validation_bench.json"
# DATASET_PATH="datasets/eval/countermath_bench.json"
DATASET_PATH="/mnt/data1/Anony/CounterExample_benchmark/datasets/aug_veri_reason.json"
# DATASET_PATH="/mnt/data1/Anony/Erdos-Prover/datasets/train_data.json"

timestamp=$(date +%Y%m%d_%H%M%S)
mkdir -p save/eval_${timestamp}
export NCCL_TIMEOUT=300 &&
export NCCL_IB_TIMEOUT=300 && 
export VLLM_USE_FLASHINFER_SAMPLER=0 && # https://github.com/vllm-project/vllm/issues/19483
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 && python src/evaluate.py \
    --solver_name erdos \
    --prover_name erdos \
    --dataset_path "${DATASET_PATH}" \
    --solver_path "${SOLVER_PATH}" \
    --prover_path "${PROVER_PATH}" \
    --solver_k 3 \
    --prover_k 3 \
    --default_header 0 \
    --gpu 8 \
    --max_tokens 4096 \
    --temperature 0.7 \
    --top_p 0.95 \
    --batch_size 4096 \
    --num_problems 1000 \
    --save_dir "save/eval_${timestamp}"
