eval_backend: VLMEvalKit
eval_config:
  model:
    -
      type: "InternVL3-2B"                                      # Match your vLLM --served-model-name
      name: CustomAPIModel
      api_base: http://localhost:8000/v1/chat/completions      # Your VLM server
      key: EMPTY
      temperature: 0.0
      # max_tokens: 8192
      timeout: 900
  data:
    - MathVista_MINI
    - MathVerse_MINI
  mode: all
  judge: "gpt-4.1-mini-2025-04-14"
  nproc: 32
  work_dir: /home/<ANONYMIZED>/Vision-R1/adaptive-reasoning-frameworks/scripts/outputs/
  limit: 256  # Match your other configs (remove for full evaluation)
  seed: 42    # Deterministic shuffling - same seed = same sample order
  shuffle: true  # Enable shuffling for representative sampling