{
    "uuid": "1cc5d12c-31dd-5e52-92d5-9227e8cfbfcd",
    "question": "What are the main questions that this paper tries to resolve or answer?",
    "answer_format": "Your answer should be a Python list of text strings, with each element being one critical problem that this paper analyzes, e.g., [\"question 1\", \"question 2\", ...].",
    "tags": [
        "single",
        "subjective",
        "text"
    ],
    "conference": [],
    "reasoning_steps": [
        "Usually, the main questions that one work tries to resolve are mentioned in the end of the introduction section or at the beiginning of the experiment section. Search the correpsonding paragraphs of these two parts.",
        "If the questions are organized as evident bullet points, directly use them as the answer.",
        "Otherwise, try to summarize the core questions from the main text."
    ],
    "evaluator": {
        "eval_func": "eval_scoring_points_with_llm",
        "eval_kwargs": {
            "scoring_points": [
                "Is it possible to determine data contamination by solely analyzing the inputs and outputs of existing LLMs?",
                "Do recent GPTs excel in Text-to-SQL tasks in a zero-shot setting both on potentially leaked data and totally unseen one?",
                "Is data contamination affecting the accuracy and reliability of an existing GPT in Text-to-SQL tasks?"
            ],
            "question": "What are the main questions that this paper tries to resolve or answer?",
            "ignore_order": true
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "effede2d-ed50-597e-b1fb-d5fc1b6dc554"
    ],
    "reference_pdf": []
}