dataset_info = {
    'esnli': {
        'train': ['train', 549367],
        'dev': ['validation', 9842],
        'test': ['test', 9824],
        'num_classes': 3,
        'classes': ['entailment', 'neutral', 'contradiction'],
        'delimiters': {
            'hypothesis': 'hypothesis:',
            'premise': 'premise:',
        },
        'hf': 'esnli',
        'file': None,
    },
    'ecqa': {
        'train': ['train', 7598],
        'dev': ['validation', 1090],
        'test': ['test', 2194],
        'num_classes': 5,
        'classes': ['A', 'B', 'C', 'D', 'E'],
        'delimiters': {
            'question': 'question:',
            'choices': 'answer choices:',
        },
        'hf': 'yangdong/ecqa',
        'file': None,
        'rationale_file': {
            'train_gpt-neox': '../data/ecqa/generated-rationales/gpt-neox/train.jsonl',
            'dev_gpt-neox': '../data/ecqa/generated-rationales/gpt-neox/dev.jsonl',
            'test_gpt-neox': '../data/ecqa/generated-rationales/gpt-neox/test.jsonl',
        },
    },
    'openbookqa': {
        'train': ['train', 4957],
        'dev': ['validation', 500],
        'test': ['test', 500],
        'num_classes': 4,
        'classes': ['A', 'B', 'C', 'D'],
        'delimiters': {
            'question': 'question:',
            'choices': 'answer choices:',
        },
        'hf': ['openbookqa', 'additional'],
        'file': None,
        'rationale_file': {
            'train_gpt-neox': '../data/openbookqa/generated-rationales/gpt-neox/train.jsonl',
            'dev_gpt-neox': '../data/openbookqa/generated-rationales/gpt-neox/dev.jsonl',
            'test_gpt-neox': '../data/openbookqa/generated-rationales/gpt-neox/test.jsonl',
        },
    },
    'strategyqa': {
        'train': ['train', 1290],
        'dev': ['dev', 500],
        'test': ['test', 499],
        'num_classes': 2,
        'classes': ['False', 'True'],
        'delimiters': None,
        'hf': None,
        'file': {
            'train': '../data/strategyqa/raw/train.json',
            'dev': '../data/strategyqa/raw/dev.json',
            'test': '../data/strategyqa/raw/test.json',
        },
        'rationale_file': {
            'train_gpt-neox': '../data/strategyqa/generated-rationales/gpt-neox/train.jsonl',
            'dev_gpt-neox': '../data/strategyqa/generated-rationales/gpt-neox/dev.jsonl',
            'test_gpt-neox': '../data/strategyqa/generated-rationales/gpt-neox/test.jsonl',
        },
    },
    'creak': {
        'train': ['train', 10176],
        'dev': ['dev', 1371],
        'test': ['dev', 1371],# The test set is unavailable, so we use the original validation set as the test set
        'num_classes': 2,
        'classes': ['true', 'false'],
        'delimiters': None,
        'hf': None,
        'file': {
            'train': '../data/creak/raw/train.json',
            'dev': '../data/creak/raw/dev.json',
            'test': '../data/creak/raw/dev.json',
        },
    },
    'qasc': {
        'train': ['train', 8134],
        'dev': ['validation', 926],
        'test': ['validation', 926],# The test set is unavailable, so we use the original validation set as the test set
        'num_classes': 8,
        'classes': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],
        'delimiters': {
            'question': 'question:',
            'choices': 'answer choices:',
        },
        'hf': 'qasc',
        'file': None,
        'rationale_file': {
            'train': '../data/eqasc/raw/eqasc_train_grc.json',
            'dev': '../data/eqasc/raw/eqasc_dev_grc.json',
            'test': '../data/eqasc/raw/eqasc_test_grc.json',
            'train_gpt-neox': '../data/qasc/generated-rationales/gpt-neox/train.jsonl',
            'dev_gpt-neox': '../data/qasc/generated-rationales/gpt-neox/dev.jsonl',
            'test_gpt-neox': '../data/qasc/generated-rationales/gpt-neox/test.jsonl',
        },
    },
    'quartz': {
        'train': ['train', 2696],
        'dev': ['dev', 384],
        'test': ['test', 784],
        'num_classes': 2,
        'classes': ['A', 'B'],
        'delimiters': {
            'question': 'question:',
            'choices': 'answer choices:',
        },
        'hf': None,
        'file': {
            'train': '../data/quartz/raw/train.jsonl',
            'dev': '../data/quartz/raw/dev.jsonl',
            'test': '../data/quartz/raw/test.jsonl',
        },
    },
    'aqua_rat': {
        'train': ['train', 97467],
        'dev': ['validation', 254],
        'test': ['test', 254],
        'num_classes': 5,
        'classes': ['A', 'B', 'C', 'D', 'E'],
        'delimiters': {
            'question': 'question:',
            'choices': 'answer choices:',
        },
        'hf': ['aqua_rat', 'raw'],
        'file': None,
    },
    'winowhy': {
        'train': ['train', 191],
        'dev': ['test', 48],
        'test': ['test', 48],
        'num_classes': 2,
        'classes': ['A', 'B'],
        'delimiters': {
            'text': 'text:',
            'choices': 'answer choices:',
        },
        'hf': None,
        'file': {
            'train': '../data/WinoWhy/training.json',
            'dev': '../data/WinoWhy/test.json',
            'test': '../data/WinoWhy/test.json',
        },
    },
    'comve': {
        'train': ['train', 10000],
        'dev': ['dev', 997],
        'test': ['test', 1000],
        'num_classes': 2,
        'classes': ['sent0', 'sent1'],
        'delimiters': {
            'sent0': 'sent0',
            'sent1': 'sent1',
        },
        'hf': None,
        'file': {
            'train': '../data/comve/processed/train.json',
            'dev': '../data/comve/processed/dev.json',
            'test': '../data/comve/processed/test.json',
        }
    },
    'mnli_matched': {
        'train': ['train', 392702],
        'dev': ['validation_matched', 9815],
        'test': ['validation_matched', 9815],# The test set is unavailable, so we use the original validation set as the test set
        'num_classes': 3,
        'classes': ['entailment', 'neutral', 'contradiction'],
        'delimiters': {
            'hypothesis': 'hypothesis:',
            'premise': 'premise:',
        },
        'hf': ['glue', 'mnli'],
        'file': None,
    },
    'anli_r1': {
        'train': ['train_r1', 16946],
        'dev': ['dev_r1', 1000],
        'test': ['test_r1', 1000],
        'num_classes': 3,
        'classes': ['entailment', 'neutral', 'contradiction'],
        'delimiters': {
            'hypothesis': 'hypothesis:',
            'premise': 'premise:',
        },
        'hf': 'anli',
        'file': None,
    }
}

dataset_info['ecqa_unk'] = dataset_info['ecqa']

dataset_info['qasc_unk'] = dataset_info['qasc']
dataset_info['eqasc'] = dataset_info['qasc']

dataset_info['aqua_rat_unk'] = dataset_info['aqua_rat']

for cross in range(5):
    dataset_info['winowhy_{}'.format(cross)] = dataset_info['winowhy'].copy()
    if cross == 4:
        dataset_info['winowhy_{}'.format(cross)]['train'] = ['train', 192]
        dataset_info['winowhy_{}'.format(cross)]['dev'] = ['test', 47]
        dataset_info['winowhy_{}'.format(cross)]['test'] = ['test', 47]
    dataset_info['winowhy_{}'.format(cross)]['file'] = dataset_info['winowhy']['file'].copy()
    for mode in ('train', 'dev', 'test'):
        dataset_info['winowhy_{}'.format(cross)]['file'][mode] = dataset_info['winowhy']['file'][mode].replace('WinoWhy', 'winowhy_{}'.format(cross))

dataset_info['comve_unk'] = dataset_info['comve']

dataset_info['mnli_mismatched'] = dataset_info['mnli_matched'].copy()
dataset_info['mnli_mismatched']['dev'], dataset_info['mnli_mismatched']['test'] = ['validation_mismatched', 9832], ['validation_mismatched', 9832]

dataset_info['anli_r2'] = dataset_info['anli_r1'].copy()
dataset_info['anli_r2']['train'], dataset_info['anli_r2']['dev'], dataset_info['anli_r2']['test'] = ['train_r2', 45460], ['dev_r2', 1000], ['test_r2', 1000]
dataset_info['anli_r3'] = dataset_info['anli_r1'].copy()
dataset_info['anli_r3']['train'], dataset_info['anli_r3']['dev'], dataset_info['anli_r3']['test'] = ['train_r3', 100459], ['dev_r3', 1200], ['test_r3', 1200]

monitor_dict = {
    'esnli': 'dev_acc_metric_epoch',
    'ecqa': 'dev_acc_metric_epoch', 'ecqa_unk': 'dev_acc_metric_epoch',
    'openbookqa': 'dev_acc_metric_epoch',
    'strategyqa': 'dev_acc_metric_epoch',
    'creak': 'dev_acc_metric_epoch',
    'aqua_rat': 'dev_acc_metric_epoch', 'aqua_rat_unk': 'dev_acc_metric_epoch', 
    'quartz': 'dev_acc_metric_epoch',
    'qasc': 'dev_acc_metric_epoch', 'qasc_unk': 'dev_acc_metric_epoch', 'eqasc': 'dev_acc_metric_epoch',
    'comve': 'dev_acc_metric_epoch', 'comve_unk': 'dev_acc_metric_epoch',
    'winowhy_0': 'dev_acc_metric_epoch', 'winowhy_1': 'dev_acc_metric_epoch', 'winowhy_2': 'dev_acc_metric_epoch', 'winowhy_3': 'dev_acc_metric_epoch', 'winowhy_4': 'dev_acc_metric_epoch',
    'mnli_matched': 'dev_acc_metric_epoch', 'mnli_mismatched': 'dev_acc_metric_epoch',
    'anli_r1': 'dev_acc_metric_epoch', 'anli_r2': 'dev_acc_metric_epoch', 'anli_r3': 'dev_acc_metric_epoch',
}

data_keys = {
    't5-small': ['item_idx', 'example', 'rationale', 'label', 'target_seq', 'token_type'],
    't5-base': ['item_idx', 'example', 'rationale', 'label', 'target_seq', 'token_type'],
    't5-large': ['item_idx', 'example', 'rationale', 'label', 'target_seq', 'token_type'],
    't5-3b': ['item_idx', 'example', 'rationale', 'label', 'target_seq', 'token_type'],
}