#!/bin/bash
set -euo pipefail

# Generic EvalPlus evaluation wrapper.
#
# Usage:
#   CUDA_VISIBLE_DEVICES=0 ./eval.sh <model_or_path> [dataset] [backend] [temperature]
#
# Examples:
#   CUDA_VISIBLE_DEVICES=0 ./eval.sh Qwen/Qwen2.5-Coder-7B humaneval vllm 0.0
#   CUDA_VISIBLE_DEVICES=0 ./eval.sh /path/to/local/checkpoint mbpp dllm 0.1

MODEL=${1:?Usage: $0 <model_or_path> [dataset] [backend] [temperature]}
DATASET=${2:-humaneval}
BACKEND=${3:-dllm}
TEMPERATURE=${4:-0.1}

evalplus.evaluate \
  --model "$MODEL" \
  --trust_remote_code True \
  --max_new_tokens 768 \
  --diffusion_steps 768 \
  --dataset "$DATASET" \
  --backend "$BACKEND" \
  --temperature "$TEMPERATURE"
