""" """

from functools import lru_cache
from typing import (
    Text, List, Union, Optional,
    Iterable, Generator
)
import ujson as json
from dataclasses import dataclass, field


@dataclass
class Answer:
    phrase: Text
    declarativized: Optional[Text] = None


@dataclass
class Cluster:
    claim: Text
    _id: Text
    answers: List[Answer]
    size: int = field(init=False)
    
    def __post_init__(self):
        self.size = len(self.answers)


@dataclass
class ClusteredItem:
    question: Text
    _id: Text # unique identifier
    clusters: List[Cluster]
    
    def __post_init__(self):
        self.clusters = sorted(self.clusters, key=lambda x: x.size, reverse=True)


class HyperClusteredDataReader:
    def __init__(self, data_path: Union[List[Text], Text]):
        if not isinstance(data_path, List):
            data_path = [data_path]
            
        self._data_paths = data_path
    
    def _parse_item(self, line_text: Text) -> ClusteredItem:
        """ """
        item_dict = json.loads(line_text)
        question = item_dict['question']
        example_id = item_dict['example_id']
        meta_clusters = item_dict['meta_clusters']
        
        clusters = []

        for mcluster in meta_clusters:
            answers = [Answer(phrase=sentence) for sentence in mcluster['sentences']]
            clusters.append(
                Cluster(
                    _id=mcluster['meta_cluster_id'],
                    claim=mcluster['meta-claim'],
                    answers=answers
                )
            )

        return ClusteredItem(
            question=question,
            _id=example_id,
            clusters=clusters
        )
        
    @lru_cache(maxsize=3)
    def _read_file(self, file_path: Text) -> List[ClusteredItem]:
        with open(file_path, "r", encoding='utf-8') as file_:
            return [self._parse_item(line) for line in file_]
        
    def __iter__(self) -> Iterable[ClusteredItem]:
        for file_path in self._data_paths:
            for item in self._read_file(file_path):
                yield item