#!/usr/bin/env bash
set -euo pipefail

# =============================================================================
# Multi-turn Math Evaluation Script
# Edit these values as needed.
# =============================================================================
# export CUDA_VISIBLE_DEVICES=1

# Model configuration
MODEL_NAME_OR_PATH="Qwen/Qwen2.5-3B-Instruct"  # Replace with your model path
LORA_PATH=""  # Leave empty if not using LoRA
# Dataset: 'math500', 'gsm8k', 'gpqa', 'mmlu-redux', 'mmlu_pro', 'hendrycks_math', theoremqa, mtu_bench
DATASET="gpqa"
# Output configuration
OUTPUT_JSON="results/Llama3.1-8b-Instruct/tmp/${DATASET}/reward.json"

DATASET_PATH="meta-math/MetaMathQA"  # Only used when DATASET=metamathqa
DATASET_SPLIT="test"  
TYPE_PREFIX="MATH_"  # Only used when DATASET=metamathqa
DATASET_CONFIG="default"  
MAX_SAMPLES=""  # Leave empty to use all samples

# Generation parameters
MAX_TURNS=5
MAX_ACTIONS_PER_TRAJ=5
MAX_ACTIONS_PER_TURN=1
FORMAT_PENALTY=-0.1
INSTRUCTION_MAX_TOKENS=400
ACTION_SEP="||"
DISABLE_THINK=0
PROMPT_MODE="simple"   # "full" (original state/reward prompt) or "simple" (only 'Incorrect. Please think again.' after wrong)
BATCH_SIZE=256
MAX_TOKENS=1024
TEMPERATURE=0
TOP_P=1.0
SEED=42

# Hardware configuration
TENSOR_PARALLEL_SIZE=4
GPU_MEMORY_UTILIZATION=0.9
MAX_NUM_SEQS=256  # Reduce if OOM with LoRA

# LoRA configuration (set MAX_LORA_RANK to your actual lora rank)
MAX_LORA_RANK=8  # Must match or exceed your LoRA adapter's rank
LORA_EXTRA_VOCAB_SIZE=0  # Set if vocab was extended during training

# Other options
USE_CHAT_TEMPLATE=1
TRUST_REMOTE_CODE=0

# =============================================================================
# Do not edit below this line
# =============================================================================

mkdir -p "$(dirname "$OUTPUT_JSON")"

python src/eval_math_multiturn_vllm.py \
  --model_name_or_path "$MODEL_NAME_OR_PATH" \
  --output_json "$OUTPUT_JSON" \
  --dataset "$DATASET" \
  --dataset_path "$DATASET_PATH" \
  --dataset_split "$DATASET_SPLIT" \
  --type_prefix "$TYPE_PREFIX" \
  $( [[ -n "$DATASET_CONFIG" ]] && echo --dataset_config "$DATASET_CONFIG" ) \
  --max_turns "$MAX_TURNS" \
  --max_actions_per_traj "$MAX_ACTIONS_PER_TRAJ" \
  --max_actions_per_turn "$MAX_ACTIONS_PER_TURN" \
  --format_penalty "$FORMAT_PENALTY" \
  --instruction_max_tokens "$INSTRUCTION_MAX_TOKENS" \
  --action_sep "$ACTION_SEP" \
  --prompt_mode "$PROMPT_MODE" \
  --fix_mistral_regex \
  --batch_size "$BATCH_SIZE" \
  --max_tokens "$MAX_TOKENS" \
  --temperature "$TEMPERATURE" \
  --top_p "$TOP_P" \
  --seed "$SEED" \
  --tensor_parallel_size "$TENSOR_PARALLEL_SIZE" \
  --gpu_memory_utilization "$GPU_MEMORY_UTILIZATION" \
  --max_num_seqs "$MAX_NUM_SEQS" \
  --max_lora_rank "$MAX_LORA_RANK" \
  --lora_extra_vocab_size "$LORA_EXTRA_VOCAB_SIZE" \
  $( [[ -n "$LORA_PATH" ]] && echo --lora_path "$LORA_PATH" ) \
  $( [[ -n "$MAX_SAMPLES" ]] && echo --max_samples "$MAX_SAMPLES" ) \
  $( [[ "$DISABLE_THINK" == "1" ]] && echo --disable_think ) \
  $( [[ "$USE_CHAT_TEMPLATE" == "1" ]] && echo --use_chat_template ) \
  $( [[ "$TRUST_REMOTE_CODE" == "1" ]] && echo --trust_remote_code )
