dataset_kwargs:
  token: true
dataset_name: webqa
dataset_path: lmms-lab/VisualWebBench
doc_to_target: !function utils.visualwebbench_doc_to_target
doc_to_text: !function utils.visualwebbench_doc_to_text
doc_to_visual: !function utils.visualwebbench_doc_to_visual
generation_kwargs:
  do_sample: false
  max_new_tokens: 16
  num_beams: 1
  temperature: 0
  top_p: 1.0
lmms_eval_specific_kwargs:
  default:
    post_prompt: ''
    pre_prompt: ''
metadata:
- version: 0.0
metric_list:
- metric: f1_score
  aggregation: !function utils.visualwebbench_f1_score_aggregate
  higher_is_better: true
output_type: generate_until
process_results: !function utils.visualwebbench_process_results_f1
task: visualwebbench_webqa
test_split: test
