{
    "uuid": "a911834c-2700-51fa-8a37-ab3649fdd8d7",
    "question": "In section 4, what research quesitions do the authors aim to answer?",
    "answer_format": "Your answer should be be plein text DIRECTLY FROM THE PDF.",
    "tags": [
        "single",
        "subjective",
        "text"
    ],
    "conference": [],
    "reasoning_steps": [
        "Read section 4 to find the research questions the authors aim to answer."
    ],
    "evaluator": {
        "eval_func": "eval_reference_answer_with_llm",
        "eval_kwargs": {
            "reference_answer": "In this section, we aim to answer three research questions: (RQ1) How do different medical generation models perform under our evaluation? (RQ2) How is the evaluation quality of DOCLENS compared to existing metrics? (RQ3) How is the evaluation quality of DOCLENS computed with open-source evaluators compared to proprietary ones?",
            "question": "In section 4, what research quesitions do the authors aim to answer?"
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "b8ba5cee-e8d9-504b-a2a7-dc210b814ece"
    ],
    "reference_pdf": []
}