#!/bin/bash

# names from the models.yaml file
test_model=""
judge_model=""

# main evaluation code
python nalobench_evaluation_test_only.py \
    --config ./configs/models.yaml \
    --test_model ${test_model} \
    --judge_model ${judge_model} \
    --dataset ./datasets/customrpbench_en.json \
    --output ./results/${test_model}-en.json \
    --summary ./results/summary-en.json \
    --concurrency 1