dataset_path: lmms-lab/LLaVA-NeXT-Interleave-Bench
dataset_name: multi_view_in_domain
dataset_kwargs:
  token: True
task: "llava_interleave_bench_multi_view"
test_split: test
doc_to_target: "answer"
doc_to_visual: !function utils.doc_to_visual
doc_to_text: !function utils.doc_to_text_conversation
process_results: !function utils.interleave_process_results

metric_list:
  - metric: overall_score
    aggregation: !function utils.overall_score
    higher_is_better: true

generation_kwargs:
  max_new_tokens: 16
  temperature: 0
  do_sample: False
  image_aspect_ratio: "pad" # for multi-image, we treat each image as original aspect ratio without anyres strategy.

lmms_eval_specific_kwargs:
  default:
    oe_post_prompt: ""
    mcq_post_prompt: ""
