import json
import os.path as osp

from datasets import Dataset, DatasetDict

from opencompass.registry import LOAD_DATASET

from .base import BaseDataset


@LOAD_DATASET.register_module()
class CMBDataset(BaseDataset):

    @staticmethod
    def load(path: str):
        with open(osp.join(path, 'val.json'), 'r', encoding='utf-8') as f:
            val_data = json.load(f)
        for d in val_data:
            d['option_str'] = '\n'.join(
                [f'{k}. {v}' for k, v in d['option'].items() if len(v) > 1])
            d['answer'] = 'NULL'
        val_dataset = Dataset.from_list(val_data)

        with open(osp.join(path, 'test.json'), 'r', encoding='utf-8') as f:
            test_data = json.load(f)
        for d in test_data:
            d['option_str'] = '\n'.join(
                [f'{k}. {v}' for k, v in d['option'].items() if len(v) > 1])
        test_dataset = Dataset.from_list(test_data)

        return DatasetDict({'val': val_dataset, 'test': test_dataset})
