{
    "uuid": "a93430e0-ae3b-585d-8622-ed9b5844da8c",
    "question": "In Experiment Section of the paper, what is the overall framework of the baseline model achieving the second best BERTScore on the dataset LOCOMO?",
    "answer_format": "Your answer should be a python strings about the detailed overall framework of the baseline model.",
    "tags": [
        "multiple",
        "subjective",
        "text",
        "table"
    ],
    "anchor_pdf": [
        "dbad7ff2-b141-56da-869e-e2eacc675417"
    ],
    "reference_pdf": [
        "ffa706cf-0129-55f6-b463-6c5a458933c2",
        "b7f2cb42-c26b-5b4d-b8dd-6365498dbd01"
    ],
    "conference": [],
    "reasoning_steps": [
        "Find the baseline model achieving the second best BERTScore on the dataset LOCOMO, which is usually in the table of the experimental section.",
        "Locate the relative papers about the baseline model.",
        "Find the overall framework of the baseline model in the paper, which is usually in the methodology section."
    ],
    "evaluator": {
        "eval_func": "eval_reference_answer_with_llm",
        "eval_kwargs": {
            "reference_answer": "The overall framework of MemoChat pipeline is a memorization-retrieval-response loop inner thinking. Very different from traditional methods that retrieve directly on these accumulated dialogues, the chatbot will automatically builds and updates a structured on-the-fly memo, storing past dialogues in categories. Then, the retrieval is conducted over all recordings according to their topics and summaries",
            "question": "In Experiment Section of the paper, what is the overall framework of the baseline model achieving the second best BERTScore on the dataset LOCOMO?"
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}