from typing import Union, List
from pathlib import Path
import json
import copy
from collections import defaultdict


CHECK_VERSION: bool = True


class OmniLabel:
    """
    Container to load an OmniLabel annotation file (`json`) and structure the data for easy use as
    data loader and evaluating prediction results.
    """

    def __init__(self, path_json: Union[str, Path]):
        """
        Arg:
            path_json (Path, str): Path to OmniLabel ground truth file (`json`)
        """
        if isinstance(path_json, str):
            path_json = Path(path_json)
        assert path_json.exists(), f"Path to JSON '{path_json}' does not work!"
        with open(path_json, "r") as fid:
            data_json = json.load(fid)

        if "info" in data_json and "name" in data_json["info"]:
            ds_name = data_json["info"]["name"]
        else:
            ds_name = "unknown"
        if "omnilabel" in ds_name.lower() and CHECK_VERSION:
            assert "info" in data_json and "version" in data_json["info"]
            ol_version = data_json["info"]["version"]
            ol_url = data_json["info"]["url"]
            ol_version_latest = "0.1.3"
            if not ol_version == ol_version_latest:
                print("\n\nWARNING: An out-dated version of the ground truth was loaded "
                      f"(v{ol_version}). The latest version is v{ol_version_latest}, please see "
                      f"{ol_url}\n")

        assert "images" in data_json, "Faulty JSON file: 'images' key missing"
        assert "descriptions" in data_json, "Faulty JSON file: 'descriptions' key missing"
        self.has_boxes = has_boxes = "annotations" in data_json

        # Re-structure the data for easier access
        imgid_to_descrs = defaultdict(list)
        for descr in data_json["descriptions"]:
            descr["type"] = "D" if descr["anno_info"]["type"] == "object_description" else "C"
            for imgid in descr["image_ids"]:
                imgid_to_descrs[imgid].append(descr)
            descr["llm_generated"] = "llm_generated" in descr["anno_info"]
            # Remove redundant info to keep samples lean and clean
            descr.pop("anno_info")
        self.descr_id_to_descr = {descr["id"]: descr for descr in data_json["descriptions"]}

        if has_boxes:
            imgid_to_boxes = defaultdict(list)
            for box in data_json["annotations"]:
                box["area"] = max(0, box["bbox"][2] * box["bbox"][3])
                imgid_to_boxes[box["image_id"]].append(box)

        for img in data_json["images"]:
            img["labelspace"] = imgid_to_descrs[img["id"]]
            if has_boxes:
                img["instances"] = imgid_to_boxes[img["id"]]
        self.samples = {d["id"]: d for d in data_json["images"]}    

    def load_res(self, result_json):
        """
        Loads result JSON annotations, updates the samples with prediction results,
        and removes samples without predictions from both self.samples and res.

        Arg:
            result_json (list): List of dictionaries representing detection results. Each dictionary 
            should include 'image_id', 'bbox', and optionally other keys such as 'score'.
        """
        res = copy.deepcopy(self)
        imgid_to_boxes = defaultdict(list)
        cnt = 123  # This should not be 0 to avoid confusion in the evaluator
        for box in result_json:
            box["area"] = max(0, box["bbox"][2] * box["bbox"][3])
            box["id"] = cnt
            imgid_to_boxes[box["image_id"]].append(box)
            cnt += 1
        for img_id, img in res.samples.items():
            img["instances"] = imgid_to_boxes[img_id]
        return res


    @property
    def num_images(self):
        return len(self.samples)

    @property
    def image_ids(self):
        return sorted(list(self.samples.keys()))

    @property
    def descr_ids(self):
        return sorted(list(self.descr_id_to_descr.keys()))

    @property
    def has_annotations(self):
        return self.has_boxes

    def get_image_sample(self, image_id: int):
        """
        Retrieve image sample by image_id. The image sample is a dict with fields for `file_name`,
        `labelspace` and `instances` (only for validation set containing ground truth).

        Arg:
            image_id (int)

        Returns:
            dict
        """
        assert isinstance(image_id, int), f"image_id should by integer, but is '{type(image_id)}'"
        assert image_id in self.samples, f"image_id '{image_id}' is invalid, not found"
        return self.samples[image_id]

    def get_description(self, description_id: int):
        """
        Retrieve description by description_id.

        Arg:
            description_id (int)

        Returns:
            dict
        """
        assert isinstance(description_id, int), \
            f"description_id should by integer, but is '{type(description_id)}'"
        assert description_id in self.descr_id_to_descr, \
            f"description_id '{description_id}' is invalid, not found"
        return self.descr_id_to_descr[description_id]
