{
    "uuid": "46ea5bb8-9895-5439-8f45-8e1792b1ec8b",
    "question": "On the ALFWorld dataset experiments, how much did the success rate improve when the authors used their method compared to the original baseline model?",
    "answer_format": "Your answer should be a floating-point number with one decimal place.",
    "tags": ["single", "table", "objective"],
    "anchor_pdf": ["3ca4cb71-29ee-509e-abfb-cbd14fd93a8e"],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": ["First, find the information about the ALFWorld dataset in the experimental section of the paper.",
                        "Then, locate the accuracy rates of the method used by the authors and the original baseline model.",
                        "Finally, calculate the difference to obtain the final result."],
    "evaluator": {
        "eval_func": "eval_float_exact_match",
        "eval_kwargs": {"gold": 3.5}
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}