[
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_0.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. headache causes central groin trouble\nB. central groin trouble causes headache\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_0.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_1.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L S2 radiculopathy causes L C6 radiculopathy\nB. L C6 radiculopathy causes L S2 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_1.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_2.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L Central chest pain causes L shoulder impingement\nB. L shoulder impingement causes L Central chest pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_2.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_3.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L chest disorders causes L eye problems\nB. L eye problems causes L chest disorders\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_3.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_4.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R tibia pain causes R T2 radiculopathy\nB. R T2 radiculopathy causes R tibia pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_4.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_5.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L achillodyni causes R morton trouble\nB. R morton trouble causes L achillodyni\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_5.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_6.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L jaw problems causes L ankle pain\nB. L ankle pain causes L jaw problems\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_6.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_7.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R achillodyni causes L finger trouble\nB. L finger trouble causes R achillodyni\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_7.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_8.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L forehead headache causes R C6 radiculopathy\nB. R C6 radiculopathy causes L forehead headache\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_8.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_9.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. backache causes L leg pain\nB. L leg pain causes backache\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_9.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_10.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R thumb trouble causes L Central chest pain\nB. L Central chest pain causes R thumb trouble\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_10.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_11.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L5 radiculopathy causes L L1 radiculopathy\nB. L L1 radiculopathy causes R L5 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_11.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_12.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L T5 radiculopathy causes R hip pain\nB. R hip pain causes L T5 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_12.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_13.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R T3 radiculopathy causes L arch pain\nB. L arch pain causes R T3 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_13.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_14.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R tibia pain causes L tibia pain\nB. L tibia pain causes R tibia pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_14.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_15.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. chest disorders causes R L5 radiculopathy\nB. R L5 radiculopathy causes chest disorders\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_15.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_16.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L ham problem causes R ham problem\nB. R ham problem causes L ham problem\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_16.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_17.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L S2 radiculopathy causes L front knee pain\nB. L front knee pain causes L S2 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_17.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_18.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R lateral foot pain causes R C6 radiculopathy\nB. R C6 radiculopathy causes R lateral foot pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_18.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_19.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L knee pain causes L lateral foot pain\nB. L lateral foot pain causes L knee pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_19.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_20.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L tibia pain causes L tibia pain\nB. L tibia pain causes L tibia pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_20.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_21.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L leg pain causes L lateral arm discomfort\nB. L lateral arm discomfort causes L leg pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_21.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_22.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R leg pain causes R medical obesity\nB. R medical obesity causes R leg pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_22.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_23.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R tibia pain causes L C6 radiculopathy\nB. L C6 radiculopathy causes R tibia pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_23.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_24.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L achilles problems causes L C4 radiculopathy\nB. L C4 radiculopathy causes L achilles problems\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "Neuropathic_24.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_25.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R hip pain causes L L4 radiculopathy\nB. L L4 radiculopathy causes R hip pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_25.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_26.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L lower abdominal discomfort causes L T12 radiculopathy\nB. L T12 radiculopathy causes L lower abdominal discomfort\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_26.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_27.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L groin trouble causes R L1 radiculopathy\nB. R L1 radiculopathy causes L groin trouble\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_27.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_28.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L lateral arm discomfort causes R C6 radiculopathy\nB. R C6 radiculopathy causes L lateral arm discomfort\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_28.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_29.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C6 radiculopathy causes R elbow pain\nB. R elbow pain causes R C6 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_29.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_30.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. IBS causes R L1 radiculopathy\nB. R L1 radiculopathy causes IBS\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_30.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_31.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L C5 radiculopathy causes neck pain\nB. neck pain causes L C5 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_31.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_32.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R lateral elbow pain causes R C5 radiculopathy\nB. R C5 radiculopathy causes R lateral elbow pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_32.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_33.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L C6 radiculopathy causes R hand problems\nB. R hand problems causes L C6 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_33.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_34.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L lateral obesity causes L L4 radiculopathy\nB. L L4 radiculopathy causes L lateral obesity\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_34.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_35.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R thigh pain causes R S1 radiculopathy\nB. R S1 radiculopathy causes R thigh pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_35.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_36.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L T5 radiculopathy causes L chest disorders\nB. L chest disorders causes L T5 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_36.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_37.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R intracapular problems causes L C5 radiculopathy\nB. L C5 radiculopathy causes R intracapular problems\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_37.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_38.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R Under arm discomfort causes R C6 radiculopathy\nB. R C6 radiculopathy causes R Under arm discomfort\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_38.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_39.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L L1 radiculopathy causes L medical groin trouble\nB. L medical groin trouble causes L L1 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_39.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_40.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L L1 radiculopathy causes L adductor tendon\nB. L adductor tendon causes L L1 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_40.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_41.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. IBS causes L T10 radiculopathy\nB. L T10 radiculopathy causes IBS\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_41.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_42.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L5 radiculopathy causes L back headache\nB. L back headache causes R L5 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_42.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_43.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R chest disorders causes L T5 radiculopathy\nB. L T5 radiculopathy causes R chest disorders\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_43.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_44.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L lateral elbow pain causes L C6 radiculopathy\nB. L C6 radiculopathy causes L lateral elbow pain\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_44.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_45.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L medical groin trouble causes L L1 radiculopathy\nB. L L1 radiculopathy causes L medical groin trouble\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_45.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_46.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R Lumbago causes L L4 radiculopathy\nB. L L4 radiculopathy causes R Lumbago\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_46.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_47.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R Ischias causes R L5 radiculopathy\nB. R L5 radiculopathy causes R Ischias\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_47.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_48.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L C7 radiculopathy causes L finger trouble\nB. L finger trouble causes L C7 radiculopathy\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "Neuropathic_48.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_49.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. lower abdominal discomfort causes R L2 radiculopathy\nB. R L2 radiculopathy causes lower abdominal discomfort\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "Neuropathic_49.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_0.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L5 radiculopathy causes R morton trouble\nB. R morton trouble causes R L5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_0.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_1.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L obesity causes R S1 radiculopathy\nB. R S1 radiculopathy causes L obesity\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_1.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_2.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L finger trouble causes R C7 radiculopathy\nB. R C7 radiculopathy causes L finger trouble\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_2.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_3.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L1 radiculopathy causes IBS\nB. IBS causes R L1 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_3.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_4.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. back headache causes R C3 radiculopathy\nB. R C3 radiculopathy causes back headache\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_4.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_5.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. toracal dysfunction causes L T6 radiculopathy\nB. L T6 radiculopathy causes toracal dysfunction\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_5.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_6.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R lateral foot pain causes L S1 radiculopathy\nB. L S1 radiculopathy causes R lateral foot pain\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_6.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_7.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R front shoulder discomfort causes R C5 radiculopathy\nB. R C5 radiculopathy causes R front shoulder discomfort\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_7.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_8.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R medical obesity causes R L4 radiculopathy\nB. R L4 radiculopathy causes R medical obesity\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_8.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_9.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R heel pain causes L S1 radiculopathy\nB. L S1 radiculopathy causes R heel pain\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_9.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_10.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R lateral groin trouble causes R L2 radiculopathy\nB. R L2 radiculopathy causes R lateral groin trouble\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_10.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_11.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L T10 radiculopathy causes IBS\nB. IBS causes L T10 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_11.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_12.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L L5 radiculopathy causes R PFS\nB. R PFS causes L L5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_12.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_13.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L fainting causes R L5 radiculopathy\nB. R L5 radiculopathy causes L fainting\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_13.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_14.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C5 radiculopathy causes L arm discomfort\nB. L arm discomfort causes R C5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_14.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_15.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C7 radiculopathy causes L shoulder problems\nB. L shoulder problems causes R C7 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_15.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_16.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L S1 radiculopathy causes R heel pain\nB. R heel pain causes L S1 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_16.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_17.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L5 radiculopathy causes R leg pain\nB. R leg pain causes R L5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_17.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_18.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C5 radiculopathy causes R shoulder impingement\nB. R shoulder impingement causes R C5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_18.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_19.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L bend of arm problems causes L C6 radiculopathy\nB. L C6 radiculopathy causes L bend of arm problems\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_19.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_20.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L3 radiculopathy causes L leg pain\nB. L leg pain causes R L3 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_20.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_21.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L hip pain causes L L4 radiculopathy\nB. L L4 radiculopathy causes L hip pain\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_21.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_22.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L intracapular problems causes L C6 radiculopathy\nB. L C6 radiculopathy causes L intracapular problems\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_22.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_23.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C7 radiculopathy causes R shoulder trouble\nB. R shoulder trouble causes R C7 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_23.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_24.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R S2 radiculopathy causes R adductor tendon\nB. R adductor tendon causes R S2 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_24.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_25.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R bend of arm problems causes R C6 radiculopathy\nB. R C6 radiculopathy causes R bend of arm problems\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_25.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_26.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L back headache causes R L3 radiculopathy\nB. R L3 radiculopathy causes L back headache\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_26.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_27.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. upper abdominal discomfort causes L T7 radiculopathy\nB. L T7 radiculopathy causes upper abdominal discomfort\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_27.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_28.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. toracal dysfunction causes R T4 radiculopathy\nB. R T4 radiculopathy causes toracal dysfunction\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_28.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_29.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R S1 radiculopathy causes L little toe problems\nB. L little toe problems causes R S1 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_29.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_30.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R medical knee joint disorder causes R L5 radiculopathy\nB. R L5 radiculopathy causes R medical knee joint disorder\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_30.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_31.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R L5 radiculopathy causes L front knee pain\nB. L front knee pain causes R L5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_31.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_32.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. chest discomfort causes L T2 radiculopathy\nB. L T2 radiculopathy causes chest discomfort\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_32.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_33.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L C7 radiculopathy causes L shoulder problems\nB. L shoulder problems causes L C7 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_33.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_34.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L big toe problems causes L L5 radiculopathy\nB. L L5 radiculopathy causes L big toe problems\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_34.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_35.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R knee pain causes L L4 radiculopathy\nB. L L4 radiculopathy causes R knee pain\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_35.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_36.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C7 radiculopathy causes L Under arm discomfort\nB. L Under arm discomfort causes R C7 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_36.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_37.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L shoulder trouble causes L C4 radiculopathy\nB. L C4 radiculopathy causes L shoulder trouble\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_37.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_38.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. intracapular problems causes R C6 radiculopathy\nB. R C6 radiculopathy causes intracapular problems\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_38.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_39.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R C6 radiculopathy causes intracapular problems\nB. intracapular problems causes R C6 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_39.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_40.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L C5 radiculopathy causes L shoulder trouble\nB. L shoulder trouble causes L C5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_40.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_41.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. R knee pain causes R L5 radiculopathy\nB. R L5 radiculopathy causes R knee pain\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_41.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_42.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L L5 radiculopathy causes backache\nB. backache causes L L5 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_42.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_43.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L chest disorders causes R T5 radiculopathy\nB. R T5 radiculopathy causes L chest disorders\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_43.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_44.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. lower abdominal discomfort causes R T11 radiculopathy\nB. R T11 radiculopathy causes lower abdominal discomfort\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_44.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_45.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. abdominal discomfort causes L L2 radiculopathy\nB. L L2 radiculopathy causes abdominal discomfort\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_45.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_46.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Lumbago causes R S1 radiculopathy\nB. R S1 radiculopathy causes Lumbago\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_46.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_47.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L shoulder problems causes R C4 radiculopathy\nB. R C4 radiculopathy causes L shoulder problems\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_47.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_48.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L L4 radiculopathy causes R thigh pain\nB. R thigh pain causes L L4 radiculopathy\nPlease answer with A or B.",
        "answer": "A",
        "data_files": [
            "Neuropathic_pairwise_48.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The neuropathic pain diagnosis mainly consists of symptom diagnosis, pattern diagnosis, and pathophysiological diagnosis. The csv file Neuropathic_pairwise_49.csv contains neuropathic pain diagnosis records in the form of tables of which the row represents different patients and the column represents different diagnostic labels.",
        "question": "Which cause-and-effect relationship is more likely?\nA. L front knee pain causes R L5 radiculopathy\nB. R L5 radiculopathy causes L front knee pain\nPlease answer with A or B.",
        "answer": "B",
        "data_files": [
            "Neuropathic_pairwise_49.csv"
        ],
        "meta_data": {
            "reference": "Neuropathic pain dataset",
            "keywords": [
                "Causality",
                "Pairwise causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_tot_precip causes Residual_GH_mean\nB. Residual_GH_mean causes Residual_tot_precip\nC. The causal relation is double sided between Residual_tot_precip and Residual_GH_mean\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_u10m causes Residual_longwave\nB. Residual_longwave causes Residual_u10m\nC. The causal relation is double sided between Residual_u10m and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_cover causes Residual_longwave\nB. Residual_longwave causes Residual_cloud_cover\nC. The causal relation is double sided between Residual_cloud_cover and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "A",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_v10m causes Residual_v10m\nB. Residual_v10m causes Residual_v10m\nC. The causal relation is double sided between Residual_v10m and Residual_v10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_v10m causes Residual_tot_precip\nB. Residual_tot_precip causes Residual_v10m\nC. The causal relation is double sided between Residual_v10m and Residual_tot_precip\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_cover causes Residual_sea_ice\nB. Residual_sea_ice causes Residual_cloud_cover\nC. The causal relation is double sided between Residual_cloud_cover and Residual_sea_ice\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_cloud_cover\nB. Residual_cloud_cover causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_cloud_cover\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_cloud_water\nB. Residual_cloud_water causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_cloud_water\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_tot_precip\nB. Residual_tot_precip causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_tot_precip\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_u10m causes Residual_u10m\nB. Residual_u10m causes Residual_u10m\nC. The causal relation is double sided between Residual_u10m and Residual_u10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_SLP\nB. Residual_SLP causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_SLP\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_u10m causes Residual_GH_mean\nB. Residual_GH_mean causes Residual_u10m\nC. The causal relation is double sided between Residual_u10m and Residual_GH_mean\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_heat_flux causes Residual_longwave\nB. Residual_longwave causes Residual_heat_flux\nC. The causal relation is double sided between Residual_heat_flux and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_sea_ice\nB. Residual_sea_ice causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_sea_ice\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_RH causes Residual_GH_mean\nB. Residual_GH_mean causes Residual_RH\nC. The causal relation is double sided between Residual_RH and Residual_GH_mean\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "B",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_SLP causes Residual_tot_precip\nB. Residual_tot_precip causes Residual_SLP\nC. The causal relation is double sided between Residual_SLP and Residual_tot_precip\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_heat_flux causes Residual_SLP\nB. Residual_SLP causes Residual_heat_flux\nC. The causal relation is double sided between Residual_heat_flux and Residual_SLP\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_shortwave causes Residual_GH_mean\nB. Residual_GH_mean causes Residual_shortwave\nC. The causal relation is double sided between Residual_shortwave and Residual_GH_mean\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_heat_flux causes Residual_v10m\nB. Residual_v10m causes Residual_heat_flux\nC. The causal relation is double sided between Residual_heat_flux and Residual_v10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_RH causes Residual_longwave\nB. Residual_longwave causes Residual_RH\nC. The causal relation is double sided between Residual_RH and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "A",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_tot_precip causes Residual_tot_precip\nB. Residual_tot_precip causes Residual_tot_precip\nC. The causal relation is double sided between Residual_tot_precip and Residual_tot_precip\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_sea_ice causes Residual_SLP\nB. Residual_SLP causes Residual_sea_ice\nC. The causal relation is double sided between Residual_sea_ice and Residual_SLP\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_GH_mean causes Residual_SLP\nB. Residual_SLP causes Residual_GH_mean\nC. The causal relation is double sided between Residual_GH_mean and Residual_SLP\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_GH_mean causes Residual_GH_mean\nB. Residual_GH_mean causes Residual_GH_mean\nC. The causal relation is double sided between Residual_GH_mean and Residual_GH_mean\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_RH causes Residual_cloud_cover\nB. Residual_cloud_cover causes Residual_RH\nC. The causal relation is double sided between Residual_RH and Residual_cloud_cover\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_u10m causes Residual_shortwave\nB. Residual_shortwave causes Residual_u10m\nC. The causal relation is double sided between Residual_u10m and Residual_shortwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_sea_ice causes Residual_u10m\nB. Residual_u10m causes Residual_sea_ice\nC. The causal relation is double sided between Residual_sea_ice and Residual_u10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_u10m\nB. Residual_u10m causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_u10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_u10m causes Residual_heat_flux\nB. Residual_heat_flux causes Residual_u10m\nC. The causal relation is double sided between Residual_u10m and Residual_heat_flux\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_RH causes Residual_cloud_water\nB. Residual_cloud_water causes Residual_RH\nC. The causal relation is double sided between Residual_RH and Residual_cloud_water\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_GH_mean causes Residual_sea_ice\nB. Residual_sea_ice causes Residual_GH_mean\nC. The causal relation is double sided between Residual_GH_mean and Residual_sea_ice\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_heat_flux causes Residual_tot_precip\nB. Residual_tot_precip causes Residual_heat_flux\nC. The causal relation is double sided between Residual_heat_flux and Residual_tot_precip\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_SLP causes Residual_SLP\nB. Residual_SLP causes Residual_SLP\nC. The causal relation is double sided between Residual_SLP and Residual_SLP\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_RH causes Residual_v10m\nB. Residual_v10m causes Residual_RH\nC. The causal relation is double sided between Residual_RH and Residual_v10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_v10m causes Residual_longwave\nB. Residual_longwave causes Residual_v10m\nC. The causal relation is double sided between Residual_v10m and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_GH_mean causes Residual_heat_flux\nB. Residual_heat_flux causes Residual_GH_mean\nC. The causal relation is double sided between Residual_GH_mean and Residual_heat_flux\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_RH causes Residual_RH\nB. Residual_RH causes Residual_RH\nC. The causal relation is double sided between Residual_RH and Residual_RH\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_longwave causes Residual_longwave\nB. Residual_longwave causes Residual_longwave\nC. The causal relation is double sided between Residual_longwave and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_heat_flux\nB. Residual_heat_flux causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_heat_flux\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_longwave causes Residual_cloud_water\nB. Residual_cloud_water causes Residual_longwave\nC. The causal relation is double sided between Residual_longwave and Residual_cloud_water\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "B",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_tot_precip causes Residual_RH\nB. Residual_RH causes Residual_tot_precip\nC. The causal relation is double sided between Residual_tot_precip and Residual_RH\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_cover causes Residual_u10m\nB. Residual_u10m causes Residual_cloud_cover\nC. The causal relation is double sided between Residual_cloud_cover and Residual_u10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_heat_flux causes Residual_shortwave\nB. Residual_shortwave causes Residual_heat_flux\nC. The causal relation is double sided between Residual_heat_flux and Residual_shortwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_GH_mean causes Residual_longwave\nB. Residual_longwave causes Residual_GH_mean\nC. The causal relation is double sided between Residual_GH_mean and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "A",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_cloud_water causes Residual_v10m\nB. Residual_v10m causes Residual_cloud_water\nC. The causal relation is double sided between Residual_cloud_water and Residual_v10m\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_SLP causes Residual_shortwave\nB. Residual_shortwave causes Residual_SLP\nC. The causal relation is double sided between Residual_SLP and Residual_shortwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_sea_ice causes Residual_RH\nB. Residual_RH causes Residual_sea_ice\nC. The causal relation is double sided between Residual_sea_ice and Residual_RH\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_sea_ice causes Residual_shortwave\nB. Residual_shortwave causes Residual_sea_ice\nC. The causal relation is double sided between Residual_sea_ice and Residual_shortwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_longwave causes Residual_sea_ice\nB. Residual_sea_ice causes Residual_longwave\nC. The causal relation is double sided between Residual_longwave and Residual_sea_ice\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "C",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The arctic dataset in arctic.csv is on the drivers of arctic sea ice thickness (or coverage): what causes the arctic sea coverage to increase or decrease? Variables in the dataset include total cloud water path, sea level pressure, geopotential height, meridional and zonal wind at 10m, net shortwave, longwave flux at the surface and so on.",
        "question": "Which cause-and-effect relationship is more likely?\nA. Residual_SLP causes Residual_longwave\nB. Residual_longwave causes Residual_SLP\nC. The causal relation is double sided between Residual_SLP and Residual_longwave\nD. No causal relationship exists\nPlease answer with A, B, C, or D.",
        "answer": "D",
        "data_files": [
            "arctic.csv"
        ],
        "meta_data": {
            "reference": "Arctic sea ice dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c",
                "d"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. p44/42 causes p44/42\nB. p44/42 causes p44/42\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. P38 causes plcg\nB. plcg causes P38\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pjnk causes plcg\nB. plcg causes pjnk\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pakts473 causes pjnk\nB. pjnk causes pakts473\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. P38 causes P38\nB. P38 causes P38\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP3 causes PIP2\nB. PIP2 causes PIP3\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pakts473 causes pakts473\nB. pakts473 causes pakts473\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pakts473 causes p44/42\nB. p44/42 causes pakts473\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKA causes PIP2\nB. PIP2 causes PKA\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP3 causes praf\nB. praf causes PIP3\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes PIP2\nB. PIP2 causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP3 causes pakts473\nB. pakts473 causes PIP3\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP2 causes pakts473\nB. pakts473 causes PIP2\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes p44/42\nB. p44/42 causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pjnk causes pjnk\nB. pjnk causes pjnk\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. praf causes plcg\nB. plcg causes praf\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes pmek\nB. pmek causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP2 causes PIP2\nB. PIP2 causes PIP2\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. praf causes pjnk\nB. pjnk causes praf\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP2 causes plcg\nB. plcg causes PIP2\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP3 causes PKA\nB. PKA causes PIP3\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pjnk causes P38\nB. P38 causes pjnk\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. p44/42 causes PIP2\nB. PIP2 causes p44/42\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. praf causes PKA\nB. PKA causes praf\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes PIP3\nB. PIP3 causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKA causes plcg\nB. plcg causes PKA\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes plcg\nB. plcg causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKA causes PKA\nB. PKA causes PKA\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes PIP3\nB. PIP3 causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP3 causes P38\nB. P38 causes PIP3\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes PKC\nB. PKC causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes PKC\nB. PKC causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pjnk causes PKC\nB. PKC causes pjnk\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes p44/42\nB. p44/42 causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. p44/42 causes praf\nB. praf causes p44/42\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pakts473 causes pmek\nB. pmek causes pakts473\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes pjnk\nB. pjnk causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes PKA\nB. PKA causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKC causes praf\nB. praf causes PKC\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes PIP2\nB. PIP2 causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. plcg causes pakts473\nB. pakts473 causes plcg\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pjnk causes PIP2\nB. PIP2 causes pjnk\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes PKA\nB. PKA causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. P38 causes p44/42\nB. p44/42 causes P38\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PKA causes pakts473\nB. pakts473 causes PKA\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "A",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. P38 causes PKA\nB. PKA causes P38\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. PIP3 causes PIP3\nB. PIP3 causes PIP3\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes praf\nB. praf causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. pmek causes plcg\nB. plcg causes pmek\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The data set in flow.csv offers continuous measurements of expression levels of multiple phosphorylated proteins and phospholipid components in human immune system cells. It contains 7466 cells (n = 7466) and flow cytometry measurements of 11 (p = 11) phosphorylated proteins and phospholipids.",
        "question": "Which cause-and-effect relationship is more likely?\nA. P38 causes pakts473\nB. pakts473 causes P38\nC. No causal relationship exists\nPlease answer with A, B, or C.",
        "answer": "C",
        "data_files": [
            "flow.csv"
        ],
        "meta_data": {
            "reference": "Flow cytometry dataset",
            "keywords": [
                "Causality",
                "Full graph causal discovery",
                "Observational data"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "The CSV file ihdp_0.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.02",
        "data_files": [
            "ihdp_0.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_0.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_0.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_0.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.02",
        "data_files": [
            "ihdp_0.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_1.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.05",
        "data_files": [
            "ihdp_1.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_1.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_1.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_1.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.06",
        "data_files": [
            "ihdp_1.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_2.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.10",
        "data_files": [
            "ihdp_2.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_2.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_2.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_2.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.12",
        "data_files": [
            "ihdp_2.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_3.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.27",
        "data_files": [
            "ihdp_3.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_3.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_3.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_3.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.34",
        "data_files": [
            "ihdp_3.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_4.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.16",
        "data_files": [
            "ihdp_4.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_4.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_4.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_4.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.20",
        "data_files": [
            "ihdp_4.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_5.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_5.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_5.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_5.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_5.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_5.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_6.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "3.99",
        "data_files": [
            "ihdp_6.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_6.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_6.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_6.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "3.99",
        "data_files": [
            "ihdp_6.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_7.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "3.85",
        "data_files": [
            "ihdp_7.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_7.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_7.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_7.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "3.82",
        "data_files": [
            "ihdp_7.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_8.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "10.47",
        "data_files": [
            "ihdp_8.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_8.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_8.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_8.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "11.94",
        "data_files": [
            "ihdp_8.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_9.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect of the dataset? Please round to the nearest hundredth.",
        "answer": "4.59",
        "data_files": [
            "ihdp_9.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_9.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the treated of the dataset? Please round to the nearest hundredth.",
        "answer": "4.00",
        "data_files": [
            "ihdp_9.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file ihdp_9.csv contains data obtained from the Infant Health and Development Program (IHDP). The study is designed to evaluate the effect of home visit from specialist doctors on the cognitive test scores of premature infants. The confounders x (x1-x25) correspond to collected measurements of the children and their mothers, including measurements on the child (birth weight, head circumference, weeks born preterm, birth order, first born, neonatal health index, sex, twin status), as well as behaviors engaged in during the pregnancy (smoked cigarettes, drank alcohol, took drugs) and measurements on the mother at the time she gave birth (age, marital status, educational attainment, whether she worked during pregnancy, whether she received prenatal care) and the site (8 total) in which the family resided at the start of the intervention. There are 6 continuous covariates and 19 binary covariates.",
        "question": "What is the Average treatment effect on the control of the dataset? Please round to the nearest hundredth.",
        "answer": "4.72",
        "data_files": [
            "ihdp_9.csv"
        ],
        "meta_data": {
            "reference": "IHDP dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the control",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_0.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.074",
        "data_files": [
            "jobs_0.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_1.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.025",
        "data_files": [
            "jobs_1.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_2.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.081",
        "data_files": [
            "jobs_2.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_3.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.110",
        "data_files": [
            "jobs_3.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_4.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.075",
        "data_files": [
            "jobs_4.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_5.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.097",
        "data_files": [
            "jobs_5.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_6.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.100",
        "data_files": [
            "jobs_6.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_7.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.075",
        "data_files": [
            "jobs_7.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_8.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.094",
        "data_files": [
            "jobs_8.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The CSV file jobs_9.csv contains data obtained from the National Supported Work program. The study is designed to evaluate the effect of job training (t) on the income and employment status after training (y). The confounders x (x0-x16) correspond to covariates such as age and education, as well as previous earnings.",
        "question": "Please estimate the Average treatment effect on the treated (ATT) of the dataset. Please round to the nearest thousandth.",
        "answer": "0.073",
        "data_files": [
            "jobs_9.csv"
        ],
        "meta_data": {
            "reference": "Jobs dataset",
            "keywords": [
                "Causality",
                "Average treatment effect on the treated",
                "Observational data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the impacts of online class format on exam outcomes, the dataset online_classroom.csv was used to compare students in online classes versus face-to-face classes. In this analysis, the treatment effect is measured as the difference in exam scores between the two groups. Each row of the dataset contains a student's exam outcome (the variable falsexam), their classroom format (online, face-to-face, or blended), and other variables like gender and ethnicity.",
        "question": "What is the average treatment effect (ATE) of taking classes online on exam scores? Please use linear regression analysis considering only the students in face-to-face (format_blended = 0 and format_ol = 0) and online classes (format_ol = 1), do not consider other variables, and provide the ATE rounded to the nearest hundredth.",
        "answer": "-4.91",
        "data_files": [
            "online_classroom.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 5",
            "keywords": [
                "Causality",
                "Observational data",
                "Linear regression",
                "Average treatment effect"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the impact of an additional year of education on hourly wage, we look at a sample size representing individuals with varying levels of education and their hourly wages. The dataset wage.csv contains the necessary data for our analysis. The columns in this dataset include 'wage', representing the total income; 'hours', representing the total hours worked; and 'educ', representing the years of education; and other variables like the parents' education and the person's IQ score.",
        "question": "What percentage increase in hourly wage can be expected for each additional year of education, based on the data described? Please use linear regression and do not consider variables other than 'wage', 'hours', and 'educ'. Please provide your answer to the nearest hundredth of a percent.",
        "answer": "5.36%",
        "data_files": [
            "wage.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 5",
            "keywords": [
                "Causality",
                "Observational data",
                "Regression analysis"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the impact of an additional year of education on hourly wage, we look at a sample size representing individuals with varying levels of education and their hourly wages. The dataset wage.csv contains the necessary data for our analysis. The columns in this dataset include 'wage', representing the total income; 'hours', representing the total hours worked; and 'educ', representing the years of education; and other variables like the parents' education and the person's IQ score.",
        "question": "What percentage increase in hourly wage can be expected for each additional year of education, based on the data described? Please use linear regression and take all provided variables into consideration. Please provide your answer to the nearest hundredth of a percent.",
        "answer": "4.11%",
        "data_files": [
            "wage.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 5",
            "keywords": [
                "Causality",
                "Observational data",
                "Regression analysis"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the impact of education on hourly wage, we look at a sample size representing individuals with varying levels of education and their hourly wages. The dataset wage.csv contains the necessary data for our analysis. The columns in this dataset include 'wage', representing the total income; 'hours', representing the total hours worked; and 'educ', representing the years of education; and other variables like the parents' education and the person's IQ score.",
        "question": "Please estimate the effect of graduating 12th grade on hourly wage. Please use linear regression and do not consider variables other than 'wage', 'hours', and 'educ'. Please provide your answer to the nearest hundredth.",
        "answer": "4.90",
        "data_files": [
            "wage.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 6",
            "keywords": [
                "Causality",
                "Observational data",
                "Regression analysis"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "In a study to determine the causal effect of sending an email reminder on the repayment of debts, a fintech company conducted a random test involving 5000 customers who were late on their payments. Each customer was randomly assigned to either receive an email about negotiating their debt or to be part of a control group that did not receive the email. Data was collected on the amounts paid by the late customers after this intervention. The dataset collections_email.csv contains variables including the amount paid (`payments`), whether the email was sent (`email`), whether the email was opened (`opened`), whether the customer contacted the collections department to negotiate their debt after having received the email (`agreement`), the customer's credit limit before being late (`credit_limit`), and the customer's risk score prior to the delivery of the email (`risk_score`).",
        "question": "What is the average treatment effect (ATE) on payments from sending the email reminder to late-paying customers? Please choose the variables to adjust for, conduct linear regression, and round your answer to the nearest hundredth.",
        "answer": "4.43",
        "data_files": [
            "collections_email.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 7",
            "keywords": [
                "Causality",
                "Randomized experiment",
                "Regression analysis",
                "Average treatment effect",
                "Confounder selection"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The dataset hospital_treatment.csv includes data from a randomized trial conducted by two hospitals. Both of them are conducting randomised trials on a new drug to treat a certain illness. The outcome of interest is days hospitalised. If the treatment is effective, it will lower the amount of days the patient stays in the hospital. For one of the hospitals, the policy regarding the random treatment is to give it to 90% of its patients while 10% get a placebo. The other hospital has a different policy: it gives the drug to a random 10% of its patients and 90% get a placebo. You are also told that the hospital that gives 90% of the true drug and 10% of placebo usually gets more severe cases of the illness to treat. The CSV file contains columns for `hospital` indicating the hospital a patient belongs to, `treatment` signifying if the patient received the new drug or placebo, `severity` reflecting the severity of the illness, and `days` representing the number of days the patient was hospitalized.",
        "question": "What is the average treatment effect (ATE) of the new drug on the amount of days the patient stays in the hospital? Please choose the variables to adjust for, conduct linear regression, and round your answer to the nearest hundredth. If the new drug reduces the amount of days the patient stays in the hospital, the answer should be negative.",
        "answer": "-7.59",
        "data_files": [
            "hospital_treatment.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 7",
            "keywords": [
                "Causality",
                "Randomized experiment",
                "Regression analysis",
                "Average treatment effect",
                "Confounder selection"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The dataset hospital_treatment.csv includes data from a randomized trial conducted by two hospitals. Both of them are conducting randomised trials on a new drug to treat a certain illness. The outcome of interest is days hospitalised. If the treatment is effective, it will lower the amount of days the patient stays in the hospital. For one of the hospitals, the policy regarding the random treatment is to give it to 90% of its patients while 10% get a placebo. The other hospital has a different policy: it gives the drug to a random 10% of its patients and 90% get a placebo. You are also told that the hospital that gives 90% of the true drug and 10% of placebo usually gets more severe cases of the illness to treat. The CSV file contains columns for `hospital` indicating the hospital a patient belongs to, `treatment` signifying if the patient received the new drug or placebo, `severity` reflecting the severity of the illness, and `days` representing the number of days the patient was hospitalized.",
        "question": "We are estimating the average treatment effect (ATE) of the new drug on the amount of days the patient stays in the hospital, and we already controlled for the severity. Should we also control for the 'hospital' variable? Please answer with 'yes' or 'no'.",
        "answer": "no",
        "data_files": [
            "hospital_treatment.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 7",
            "keywords": [
                "Causality",
                "Randomized experiment",
                "Regression analysis",
                "Average treatment effect",
                "Confounder selection"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "yes",
                "no"
            ]
        }
    },
    {
        "data_description": "In a study to determine the causal effect of sending an email reminder on the repayment of debts, a fintech company conducted a random test involving 5000 customers who were late on their payments. Each customer was randomly assigned to either receive an email about negotiating their debt or to be part of a control group that did not receive the email. Data was collected on the amounts paid by the late customers after this intervention. The dataset collections_email.csv contains variables including the amount paid (`payments`), whether the email was sent (`email`), whether the email was opened (`opened`), whether the customer contacted the collections department to negotiate their debt after having received the email (`agreement`), the customer's credit limit before being late (`credit_limit`), and the customer's risk score prior to the delivery of the email (`risk_score`).",
        "question": "We are estimating the average treatment effect (ATE) on payments from sending the email reminder to late-paying customers. Should we control for the 'opened' variable? Please answer with 'yes' or 'no'.",
        "answer": "no",
        "data_files": [
            "collections_email.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 7",
            "keywords": [
                "Causality",
                "Randomized experiment",
                "Regression analysis",
                "Average treatment effect",
                "Confounder selection"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "yes",
                "no"
            ]
        }
    },
    {
        "data_description": "In a study to determine the causal effect of sending an email reminder on the repayment of debts, a fintech company conducted a random test involving 5000 customers who were late on their payments. Each customer was randomly assigned to either receive an email about negotiating their debt or to be part of a control group that did not receive the email. Data was collected on the amounts paid by the late customers after this intervention. The dataset collections_email.csv contains variables including the amount paid (`payments`), whether the email was sent (`email`), whether the email was opened (`opened`), whether the customer contacted the collections department to negotiate their debt after having received the email (`agreement`), the customer's credit limit before being late (`credit_limit`), and the customer's risk score prior to the delivery of the email (`risk_score`).",
        "question": "We are estimating the average treatment effect (ATE) on payments from sending the email reminder to late-paying customers. Should we control for the 'agreement' variable? Please answer with 'yes' or 'no'.",
        "answer": "no",
        "data_files": [
            "collections_email.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 7",
            "keywords": [
                "Causality",
                "Randomized experiment",
                "Regression analysis",
                "Average treatment effect",
                "Confounder selection"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "yes",
                "no"
            ]
        }
    },
    {
        "data_description": "In a study to determine the causal effect of sending an email reminder on the repayment of debts, a fintech company conducted a random test involving 5000 customers who were late on their payments. Each customer was randomly assigned to either receive an email about negotiating their debt or to be part of a control group that did not receive the email. Data was collected on the amounts paid by the late customers after this intervention. The dataset collections_email.csv contains variables including the amount paid (`payments`), whether the email was sent (`email`), whether the email was opened (`opened`), whether the customer contacted the collections department to negotiate their debt after having received the email (`agreement`), the customer's credit limit before being late (`credit_limit`), and the customer's risk score prior to the delivery of the email (`risk_score`).",
        "question": "We are estimating the average treatment effect (ATE) on payments from sending the email reminder to late-paying customers. Should we control for the 'risk_score' variable? Please answer with 'yes' or 'no'.",
        "answer": "yes",
        "data_files": [
            "collections_email.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 7",
            "keywords": [
                "Causality",
                "Randomized experiment",
                "Regression analysis",
                "Average treatment effect",
                "Confounder selection"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "yes",
                "no"
            ]
        }
    },
    {
        "data_description": "The dataset provided, ak91.csv, contains information on individuals' log wages, years of schooling, year of birth, quarter of birth, and state of birth. The purpose of the analysis is to estimate the effect of education on wage, using linear regression and quarter of birth as an instrumental variable (IV). This idea takes advantage of US compulsory attendance law. Usually, they state that a kid must have turned 6 years by January 1 of the year they enter school. For this reason, kids that are born at the beginning of the year will enter school at an older age. Compulsory attendance law also requires students to be in school until they turn 16, at which point they are legally allowed to drop out. The result is that people born later in the year have, on average, more years of education than those born in the beginning of the year.",
        "question": "What is the average additional percentage wage increase associated with each additional year of education based on the instrumental variable of whether the individual is born in the last quarter q4? Please adjust for all the other variables, and round your answer to the nearest hundredth of a percent.",
        "answer": "8.53%",
        "data_files": [
            "ak91.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 8",
            "keywords": [
                "Causality",
                "Observational data",
                "Regression analysis",
                "Average treatment effect",
                "Instrumental variables"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "A study is conducted to measure the effect of a marketing push on user engagement, specifically in-app purchases. Some customers who were assigned to receive the push are not receiving it, because they probably have an older phone that doesn\u2019t support the kind of push the marketing team designed.\nThe dataset app_engagement_push.csv contains records for 10,000 random customers. Each record includes whether an in-app purchase was made (in_app_purchase), if a marketing push was assigned to the user (push_assigned), and if the marketing push was successfully delivered (push_delivered).",
        "question": "What is the Local Average Treatment Effect (LATE) of receiving the marketing push on in-app purchases, as estimated using linear regression and instrumental variable, rounded to two decimal places?",
        "answer": "3.29",
        "data_files": [
            "app_engagement_push.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 9",
            "keywords": [
                "Causality",
                "Observational data",
                "Regression analysis",
                "Local average treatment effect",
                "Instrumental variables"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "A study is conducted to measure the effect of a marketing push on user engagement, specifically in-app purchases. Some customers who were assigned to receive the push are not receiving it, because they probably have an older phone that doesn\u2019t support the kind of push the marketing team designed.\nThe dataset app_engagement_push.csv contains records for 10,000 random customers. Each record includes whether an in-app purchase was made (in_app_purchase), if a marketing push was assigned to the user (push_assigned), and if the marketing push was successfully delivered (push_delivered).",
        "question": "Consider we estimate the Average Treatment Effect of receiving the marketing push on in-app purchases, by conducting linear regression with the formula \"in_app_purchase ~ 1 + push_assigned + push_delivered\". Will the true impact be A) higher than, B) lower than, C) the same with our estimated impact? Please answer with A, B, or C.",
        "answer": "B",
        "data_files": [
            "app_engagement_push.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 9",
            "keywords": [
                "Causality",
                "Observational data",
                "Regression analysis",
                "Average treatment effect",
                "Instrumental variables"
            ],
            "question_type": "multiple_choice",
            "multiple_choices": [
                "a",
                "b",
                "c"
            ]
        }
    },
    {
        "data_description": "To investigate the effect of a medication on the number of days it takes for a patient to recover from an illness, we have a dataset that includes several confounding variables like severity, sex, and age. The dataset medicine_impact_recovery.csv contains data on patients who have been prescribed medication and those who haven't. The variables include sex (0 or 1), age, the severity of the condition, whether the patient was on medication (0 or 1), and the number of days it took for each patient to recover.",
        "question": "What is the average treatment effect of the medication on the recovery time when controlling for severity, sex, and age using the K nearest neighbour matching? Please scale the features, use the euclidean norm as the matching measurement, and also adjust for the matching bias. Provide your answer rounded to two decimal places. The answer should be positive if the mediation makes the recovery time longer.",
        "answer": "-7.36",
        "data_files": [
            "medicine_impact_recovery.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 10",
            "keywords": [
                "Causality",
                "Observational data",
                "Matching",
                "Average treatment effect"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The National Study of Learning Mindsets is a randomised study conducted in U.S. public high schools which aims at finding the impact of a growth mindset. The way it works is that students receive from the school a seminar to instill in them a growth mindset. Then, they follow up the students in their college years to measure how well they\u2019ve performed academically. This measurement was compiled into an achievement score and standardized. The CSV file learning_mindset.csv contains simulated data of this research.\nVariable Description\nintervention: the intervention of the growth mindset;\nachievement_score: the standardized academic achievement score;\nschoolid: identifier of the student\u2019s school;\nsuccess_expect: self-reported expectations for success in the future, a proxy for prior achievement, measured prior to random assignment;\nethnicity: categorical variable for student race/ethnicity;\ngender: categorical variable for student identified gender;\nfrst_in_family: categorical variable for student first-generation status, i.e. first in family to go to college;\nschool_urbanicity: school-level categorical variable for urbanicity of the school, i.e. rural, suburban, etc;\nschool_mindset: school-level mean of students\u2019 fixed mindsets, reported prior to random assignment, standardized;\nschool_achievement: school achievement level, as measured by test scores and college preparation for the previous 4 cohorts of students, standardized;\nschool_ethnic_minority: school racial/ethnic minority composition, i.e., percentage of student body that is Black, Latino, or Native American, standardized;\nschool_poverty: school poverty concentration, i.e., percentage of students who are from families whose incomes fall below the federal poverty line, standardized;\nschool_size: total number of students in all four grade levels in the school, standardized.",
        "question": "What is the average treatment effect of the growth mindset on the achievement score? Please use propensity score weighting in estimation, use logistic regression to estimate the propensity score, and round the final answer to the nearest hundredth.",
        "answer": "0.39",
        "data_files": [
            "learning_mindset.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 11",
            "keywords": [
                "Causality",
                "Observational data",
                "Matching",
                "Propensity score",
                "Average treatment effect"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The National Study of Learning Mindsets is a randomised study conducted in U.S. public high schools which aims at finding the impact of a growth mindset. The way it works is that students receive from the school a seminar to instill in them a growth mindset. Then, they follow up the students in their college years to measure how well they\u2019ve performed academically. This measurement was compiled into an achievement score and standardized. The CSV file learning_mindset.csv contains simulated data of this research.\nVariable Description\nintervention: the intervention of the growth mindset;\nachievement_score: the standardized academic achievement score;\nschoolid: identifier of the student\u2019s school;\nsuccess_expect: self-reported expectations for success in the future, a proxy for prior achievement, measured prior to random assignment;\nethnicity: categorical variable for student race/ethnicity;\ngender: categorical variable for student identified gender;\nfrst_in_family: categorical variable for student first-generation status, i.e. first in family to go to college;\nschool_urbanicity: school-level categorical variable for urbanicity of the school, i.e. rural, suburban, etc;\nschool_mindset: school-level mean of students\u2019 fixed mindsets, reported prior to random assignment, standardized;\nschool_achievement: school achievement level, as measured by test scores and college preparation for the previous 4 cohorts of students, standardized;\nschool_ethnic_minority: school racial/ethnic minority composition, i.e., percentage of student body that is Black, Latino, or Native American, standardized;\nschool_poverty: school poverty concentration, i.e., percentage of students who are from families whose incomes fall below the federal poverty line, standardized;\nschool_size: total number of students in all four grade levels in the school, standardized.",
        "question": "What is the average treatment effect of the growth mindset on the achievement score? Please conduct doubly robust estimation, use logistic regression to estimate the propensity score, and round the final answer to the nearest hundredth.",
        "answer": "0.39",
        "data_files": [
            "learning_mindset.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 12",
            "keywords": [
                "Causality",
                "Observational data",
                "Matching",
                "Propensity score",
                "Average treatment effect"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "The dataset billboard_impact.csv details information from a quasi-experiment assessing the influence of billboards on bank deposits in two cities: Porto Alegre (treatment group) and Florianopolis (control group). The csv file contains records with three variables: deposits (average bank deposits in Brazilian Reais), poa (A dummy indicator for the city of Porto Alegre. When it is zero, it means the samples are from Florianopolis.), and jul (A dummy for the month of July, or for the post intervention period. When it is zero it refers to samples from May, the pre-intervention period).",
        "question": "What was the average increase in bank deposits per customer in Porto Alegre after the billboard intervention, as estimated by the difference-in-differences approach? Please round the final answer to the nearest hundredth.",
        "answer": "6.52",
        "data_files": [
            "billboard_impact.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 13",
            "keywords": [
                "Causality",
                "Observational data",
                "Difference in differences"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the effect of cigarette taxation on its consumption, data from cigarette sales were collected and analyzed across 39 states in the United States from the years 1970 to 2000. Proposition 99, a Tobacco Tax and Health Protection Act passed in California in 1988, imposed a 25-cent per pack state excise tax on tobacco cigarettes and implemented additional restrictions, including the ban on cigarette vending machines in public areas accessible by juveniles and a ban on the individual sale of single cigarettes. Revenue generated was allocated for environmental and health care programs along with anti-tobacco advertising. We aim to determine if the imposition of this tax and the subsequent regulations led to a reduction in cigarette sales. The data is in the CSV file smoking2.csv.",
        "question": "By the year 2000, what was the estimated reduction in per-capita cigarette sales (in packs) in California as a result of Proposition 99, based on the synthetic control method? Please round the final answer to the nearest hundredth.",
        "answer": "24.83",
        "data_files": [
            "smoking2.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 15",
            "keywords": [
                "Causality",
                "Observational data",
                "Synthetic control"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the impacts of alcohol on death, we could use the fact that legal drinking age imposes a discontinuity on nature. In the US, those just under 21 years don\u2019t drink (or drink much less) while those just older than 21 do drink. The csv file drinking.csv contains mortality data aggregated by age. Each row is the average age of a group of people and the average mortality by all causes (all), by moving vehicle accident (mva) and by suicide (suicide).",
        "question": "How much is the effect of alcohol consumption on death of all causes at 21 years? Please only consider data from people that are no older than 22 years and no younger than 20 years. Please answer with the magnitude of change in the number of deaths and round to the nearest hundredth.",
        "answer": "0.10",
        "data_files": [
            "drinking.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 16",
            "keywords": [
                "Causality",
                "Observational data",
                "Difference in differences"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To evaluate the effectiveness of email marketing on customer investment decisions, the dataset invest_email_rnd_train.csv is used for training, and invest_email_rnd_train.csv is used for testing. They contain customer data including id, age, income, insurance, invested amount, binary indicators for whether they received three different emails (em1, em2, em3), and a binary outcome variable indicating whether the customer converted (invested vs. didn't invest) after receiving an email. The goal is to personalize email marketing by sending email-1 only to customers predicted to have the highest increase in the probability of conversion.",
        "question": "Please train a boosted tree model to predict the conditional average treatment effect (CATE) of sending email-1 (em1) on the conversion rate, using demographic and financial attributes of the customers. What is the predicted increase in the probability that customer 6958 will purchase the investment product, if they are sent email-1, according to the CATE estimate provided by the model? Please provide the answer as a percentage, rounded to two decimal places.",
        "answer": "10.57%",
        "data_files": [
            "invest_email_rnd_train.csv",
            "invest_email_rnd_test.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 20",
            "keywords": [
                "Causality",
                "Observational data",
                "Conditional Average Treatment Effect",
                "Target transformation"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To find the effect of price on ice cream sales, the dataset ice_cream_sales.csv is used for training, and ice_cream_sales_rnd.csv is used for testing. Our test set has randomly assigned prices but our training data has only observational prices, which is potentially biased. Each unit is a day. For each day, we know which day of a week it is, what was the cost we had to make the ice cream (you can think of the cost as a proxy for quality) and the average temperature for that day. Then, we have our treatment, price, and our outcome, the number of ice cream sold.",
        "question": "Using the debiased machine learning approach after correcting for confounding biases related to temperature, cost, and weekday effects, what is the estimated average treatment effect (ATE) of the ice cream price on sales, rounded to two decimal places?",
        "answer": "-3.92",
        "data_files": [
            "ice_cream_sales.csv",
            "ice_cream_sales_rnd.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 22",
            "keywords": [
                "Causality",
                "Observational data",
                "Average treatment effect",
                "Debiased ML"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the effect of cigarette taxation on its consumption, data from cigarette sales were collected and analyzed across 39 states in the United States from the years 1970 to 2000. Proposition 99, a Tobacco Tax and Health Protection Act passed in California in 1988, imposed a 25-cent per pack state excise tax on tobacco cigarettes and implemented additional restrictions, including the ban on cigarette vending machines in public areas accessible by juveniles and a ban on the individual sale of single cigarettes. Revenue generated was allocated for environmental and health care programs along with anti-tobacco advertising. We aim to determine if the imposition of this tax and the subsequent regulations led to a reduction in cigarette sales. The data is in the CSV file smoking2.csv.",
        "question": "What is the Average Treatment Effect on the treated (ATT) of Proposition 99 on cigarette sales, as estimated by the difference-in-differences approach? Please round the final answer to the nearest hundredth. Do not consider variables other than cigsale, california, and after_treatment. The answer is positive if the proposition increases the sale.",
        "answer": "-27.35",
        "data_files": [
            "smoking2.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 25",
            "keywords": [
                "Causality",
                "Observational data",
                "Average treatment effect on the treated",
                "Difference in differences"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "To estimate the effect of cigarette taxation on its consumption, data from cigarette sales were collected and analyzed across 39 states in the United States from the years 1970 to 2000. Proposition 99, a Tobacco Tax and Health Protection Act passed in California in 1988, imposed a 25-cent per pack state excise tax on tobacco cigarettes and implemented additional restrictions, including the ban on cigarette vending machines in public areas accessible by juveniles and a ban on the individual sale of single cigarettes. Revenue generated was allocated for environmental and health care programs along with anti-tobacco advertising. We aim to determine if the imposition of this tax and the subsequent regulations led to a reduction in cigarette sales. The data is in the CSV file smoking2.csv.",
        "question": "What is the Average Treatment Effect on the treated (ATT) of Proposition 99 on cigarette sales, as estimated by the synthetic control approach? Please round the final answer to the nearest hundredth. Do not consider variables other than year, state, cigsale, california, and after_treatment. The answer is positive if the proposition increases the sale.",
        "answer": "-19.51",
        "data_files": [
            "smoking2.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 25",
            "keywords": [
                "Causality",
                "Observational data",
                "Average treatment effect on the treated",
                "Synthetic control"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "We are trying to estimate the effect of a trainee program on earnings. Data in the CSV file trainee_unique_on_age.csv contains trainee status, age, and earnings of each unit. Trainees are much younger than non trainees, which indicates that age is probably a confounder.",
        "question": "What is the Average Treatment Effect on the treated (ATT) of the trainee program on earnings, as estimated by a matching estimator? Please round the final answer to the nearest hundredth. The answer is positive if the trainee program increases the earnings.",
        "answer": "2457.89",
        "data_files": [
            "trainee_unique_on_age.csv"
        ],
        "meta_data": {
            "reference": "Causal Inference for the Brave and True 10",
            "keywords": [
                "Causality",
                "Observational data",
                "Average treatment effect on the treated",
                "Matching"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "Researchers conducted randomized policy experiment in India where, since the mid-1990s, one-third of village council heads have been randomly reserved for female politicians. The CSV data set women.csv contains a subset of this data from West Bengal. The policy was implemented at the level of government called Gram Panchayat or GP. Each GP contains many villages. For this study, two villages were selected at random within each GP for detailed data collection. Each observation in the data set represents a village and there are two villages associated with each GP.\n\nVariable Description\nGP: identifier for the Gram Panchayat (GP)\nvillage: identifier for each village\nreserved: binary variable indicating whether the GP was reserved for women leaders or not\nfemale: binary variable indicating whether the GP had a female leader or not\nirrigation: variable measuring the number of new or repaired irrigation facilities in the village since the reserve policy started\nwater: variable measuring the number of new or repaired drinking water facilities in the village since the reservation policy started",
        "question": "To explore if female politicians are more likely to support policies about drinking water, please estimate the average causal effects of the reservation policy on the number of new or repaired drinking water facilities since the reserve policy started. Please round to the nearest hundredth.",
        "answer": "9.25",
        "data_files": [
            "women.csv"
        ],
        "meta_data": {
            "reference": "Quantitative Social Science 4.3.1",
            "keywords": [
                "Social science",
                "Causality",
                "Average treatment effect",
                "Interventional data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "Three social scientists conducted an RCT in which they investigated whether social pressure within neighborhoods increases participation. Specifically, during a primary election in the state of Michigan, they randomly assigned registered voters to receive different get-out-the-vote (GOTV) messages and examined whether sending postcards with these messages increased turnout. The researchers exploited the fact that the turnout of individual voters is public information in the United States. The GOTV message of particular interest was designed to induce social pressure by telling voters that after the election their neighbors would be informed about whether they voted in the election or not. The researchers hypothesized that such a namingand-shaming GOTV strategy would increase participation.\nThere are three treatment groups: voters who receive either the social pressure message, the civic duty message, or the Hawthorne effect message. The Hawthorne effect refers to the phenomenon where study subjects behave differently because they know they are being observed by researchers. The experiment also has a control group which consists of those voters receiving no message. The researchers randomly assigned each voter to one of the four groups and examined whether the voter turnout was different across the groups.\n\nThe data is in the file social.csv.\nVariable Description\nhhsize: household size of the voter\nmessages: GOTV messages the voter received (Civic Duty, Control, Neighbors, Hawthorne)\nsex: sex of the voter (female or male)\nyearofbirth: year of birth of the voter\nprimary2004: whether the voter voted in the 2004 primary election (1=voted, 0=abstained)\nprimary2006: whether the voter turned out in the 2006 primary election (1=voted, 0=abstained)",
        "question": "What is the difference in the average causal effect of the Neighbors message on whether the voter voted in the 2006 primary election between those who voted in the 2004 primary election and those who did not? Please round the result to the nearest thousandth.",
        "answer": "0.027",
        "data_files": [
            "social.csv"
        ],
        "meta_data": {
            "reference": "Quantitative Social Science 4.3.3",
            "keywords": [
                "Social science",
                "Causality",
                "Heterogenous treatment effects",
                "Interventional data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "Three social scientists conducted an RCT in which they investigated whether social pressure within neighborhoods increases participation. Specifically, during a primary election in the state of Michigan, they randomly assigned registered voters to receive different get-out-the-vote (GOTV) messages and examined whether sending postcards with these messages increased turnout. The researchers exploited the fact that the turnout of individual voters is public information in the United States. The GOTV message of particular interest was designed to induce social pressure by telling voters that after the election their neighbors would be informed about whether they voted in the election or not. The researchers hypothesized that such a namingand-shaming GOTV strategy would increase participation.\nThere are three treatment groups: voters who receive either the social pressure message, the civic duty message, or the Hawthorne effect message. The Hawthorne effect refers to the phenomenon where study subjects behave differently because they know they are being observed by researchers. The experiment also has a control group which consists of those voters receiving no message. The researchers randomly assigned each voter to one of the four groups and examined whether the voter turnout was different across the groups.\n\nThe data is in the file social.csv.\nVariable Description\nhhsize: household size of the voter\nmessages: GOTV messages the voter received (Civic Duty, Control, Neighbors, Hawthorne)\nsex: sex of the voter (female or male)\nyearofbirth: year of birth of the voter\nprimary2004: whether the voter voted in the 2004 primary election (1=voted, 0=abstained)\nprimary2006: whether the voter turned out in the 2006 primary election (1=voted, 0=abstained)",
        "question": "What is the average causal effect of the Neighbors message on whether the voter voted in the 2006 primary election if the voter's age was 25 in 2006? Please estimate a linear regression model with both message and age as predictors, and round the result to the nearest thousandth.",
        "answer": "0.064",
        "data_files": [
            "social.csv"
        ],
        "meta_data": {
            "reference": "Quantitative Social Science 4.3.3",
            "keywords": [
                "Social science",
                "Causality",
                "Heterogenous treatment effects",
                "Interventional data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "Three social scientists conducted an RCT in which they investigated whether social pressure within neighborhoods increases participation. Specifically, during a primary election in the state of Michigan, they randomly assigned registered voters to receive different get-out-the-vote (GOTV) messages and examined whether sending postcards with these messages increased turnout. The researchers exploited the fact that the turnout of individual voters is public information in the United States. The GOTV message of particular interest was designed to induce social pressure by telling voters that after the election their neighbors would be informed about whether they voted in the election or not. The researchers hypothesized that such a namingand-shaming GOTV strategy would increase participation.\nThere are three treatment groups: voters who receive either the social pressure message, the civic duty message, or the Hawthorne effect message. The Hawthorne effect refers to the phenomenon where study subjects behave differently because they know they are being observed by researchers. The experiment also has a control group which consists of those voters receiving no message. The researchers randomly assigned each voter to one of the four groups and examined whether the voter turnout was different across the groups.\n\nThe data is in the file social.csv.\nVariable Description\nhhsize: household size of the voter\nmessages: GOTV messages the voter received (Civic Duty, Control, Neighbors, Hawthorne)\nsex: sex of the voter (female or male)\nyearofbirth: year of birth of the voter\nprimary2004: whether the voter voted in the 2004 primary election (1=voted, 0=abstained)\nprimary2006: whether the voter turned out in the 2006 primary election (1=voted, 0=abstained)",
        "question": "What is the average causal effect of the Neighbors message on whether the voter voted in the 2006 primary election if the voter's age was 65 in 2006? Please estimate a linear regression model with both message and age as predictors, and round the result to the nearest thousandth.",
        "answer": "0.089",
        "data_files": [
            "social.csv"
        ],
        "meta_data": {
            "reference": "Quantitative Social Science 4.3.3",
            "keywords": [
                "Social science",
                "Causality",
                "Heterogenous treatment effects",
                "Interventional data"
            ],
            "question_type": "numerical"
        }
    },
    {
        "data_description": "We consider how much politicians can increase their personal wealth due to holding office. Scholars investigated this question by analyzing members of Parliament (MPs) in the United Kingdom.6 The authors of the original study collected information about personal wealth at the time of death for several hundred competitive candidates who ran for office in general elections between 1950 and 1970.\n\nThe data are contained in the CSV file MPs.csv.\nVariable Description\nsurname: surname of the candidate\nfirstname: first name of the candidate\nparty: party of the candidate (labour or tory)\nln.gross: log gross wealth at the time of death\nln.net: log net wealth at the time of death\nyob: year of birth of the candidate\nyod: year of death of the candidate\nmargin.pre: margin of the candidate\u2019s party in the previous election\nregion: electoral region\nmargin: margin of victory (vote share)",
        "question": "What is the average causal effect of becoming members of Parliament on the log net wealth for Tory candidates? The difference in predicted values at the point of discontinuity, i.e., a zero margin of victory, between the two regressions represents the average causal effect on personal wealth of serving as an MP. Please round the result to the nearest integer.",
        "answer": "255051",
        "data_files": [
            "MPs.csv"
        ],
        "meta_data": {
            "reference": "Quantitative Social Science 4.3.4",
            "keywords": [
                "Social science",
                "Causality",
                "Average treatment effect",
                "Discontinuity"
            ],
            "question_type": "numerical"
        }
    }
]