#!/usr/bin/env bash
conda activate tb

MODEL="llama" #["gpt-5", "gpt-4.1", "o3", "o4-mini", "gpt-5-med", "deepseek", "llama", "gpt-5-high"]

python ./CancerGUIDE/main_analysis/benchmark_evaluation.py --model $MODEL\
 --benchmark_dir ./CancerGUIDE/data/benchmarks/human_annotations \
 --output_dir ./CancerGUIDE/results/benchmark_results/$MODEL/human \
 --benchmark_experiment human_annotations