""" Similar to other answer cluster data but are for simple QA """


from functools import lru_cache
from typing import (
    Text, List, Union, Optional,
    Iterable, Generator
)
import ujson as json
import re
from dataclasses import dataclass, field


@dataclass(eq=True)
class SimpleQAPhraseSamplingInstance:
    index: int
    answer_type: Text
    question: Text
    gold_answer: Text
    sampled_answers: List[Text]
    
    
class SimpleQAPhraseSamplingDataReader:
    def __init__(self, data_path: Union[List[Text], Text]):
        if not isinstance(data_path, List):
            data_path = [data_path]
            
        self._data_paths = data_path
        
    @lru_cache(maxsize=3)
    def _read_file(self, file_path: Text) -> List[SimpleQAPhraseSamplingInstance]:
        with open(file_path, "r", encoding='utf-8') as file_:
            datalist = [
                SimpleQAPhraseSamplingInstance(**json.loads(line))
                for line in file_
            ]

        return datalist
    
    def __iter__(self) -> Iterable[SimpleQAPhraseSamplingInstance]:
        for file_path in self._data_paths:
            for item in self._read_file(file_path):
                yield item