{
    "uuid": "e89d9ee6-ed85-55bc-98fc-687823d1695f",
    "question": "What data augmentation strategies are used in the recently proposed dataset used in this paper?",
    "answer_format": "Your answer should be a python strings about the detailed data augmentation strategies.",
    "tags": [
        "multiple",
        "subjective",
        "text"
    ],
    "conference": [],
    "reasoning_steps": [
        "Find the recently proposed dataset used in the paper, which is usually in the experimental section.",
        "Locate the relative papers about the dataset.",
        "Find the data augumentation strategies in the paper."
        
    ],
    "evaluator": {
        "eval_func": "eval_reference_answer_with_llm",
        "eval_kwargs": {
            "reference_answer": "To achieve the balance between the two-way translation in language pairs, two data augmentation strategies were utilized to enrich the corpus if necessary: In cases where the number of parallel corpus falls below 1 million, we flip the entire corpus to create the corpus for the opposite translation direction. In contrast, for corpora with more than 1 million instances, we randomly flip half the amount of corpus to generate the corresponding corpus. After data augmenting, the initial corpus of 142 translation directions is substantially enriched, expanding to a significantly larger corpus of 242 translation directions.",
            "question": "What data augmentation strategies are used in the recently proposed dataset used in this paper?"
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "2c601b1c-36cd-5106-9b44-889c55a377c8"
    ],
    "reference_pdf": [
        "063cfa76-8115-5e5e-a5c3-c794ee055b2c"
    ]
}