{
    "uuid": "8f561f35-51ae-5330-97aa-f547f89f4d26",
    "question": "According to MSAD paper's review, which is the largest VAD benchmark datasets with multiple domain before? Additionally, in that dataset, which are the two scenarios with the most videos?",
    "answer_format": "Your answer should be a Python list of 2 elements, the first is the name of the dataset, the second is a list of 2 strings, the full name of the scenarios.",
    "tags": [
        "multiple",
        "text",
        "table",
        "image",
        "objective"
    ],
    "anchor_pdf": [
        "0e26d1b4-43af-5cf5-9626-deef7dcfc6ae"
    ],
    "reference_pdf": [
        "d1290795-a058-5b1b-b9f4-9392472d83b0",
        "9413e819-8187-5e64-8aef-3507676d0855"
    ],
    "conference": [],
    "reasoning_steps": [
        "Find the table that lists the VAD benchmark datasets.",
        "Identify the largest dataset with multiple domains.",
        "Read the corresponding paper to find the scenarios with the most videos.",
        "Read the other section and images to identify the full names."
    ],
    "evaluator": {
        "eval_func": "eval_conjunction",
        "eval_kwargs": {
            "eval_func_list": [
                "eval_string_exact_match",
                "eval_structured_object_exact_match"
            ],
            "eval_kwargs_list": [
                {
                    "gold": "CUVA",
                    "lowercase": true
                },
                {
                    "gold": ["Pedestrian Incidents", "Forbidden to Burn"],
                    "lowercase": true,
                    "ignore_order": true,
                    "ignore_blank": true
                }
            ]
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}