benchmark: ../benchmarks/nq.yaml
model:     ../models/phi4reasoning.yaml