import os
import json

from datasets import load_dataset

from dataset.base import BaseDataset

class MathVista(BaseDataset):
    def __init__(self):
        super(MathVista, self).__init__()
        self.ann = load_dataset("AI4Math/MathVista")['testmini']
        self.img_root = f"./data/MathVista/"

         
    def get_data(self):
        data = [
            {
                'pid': ins['pid'],
                "img_path": os.path.join(self.img_root, ins['image']),
                "question": ins['query'],
                "label": ins['answer']
            }
            for ins in self.ann
        ]

        return data, ['pid']