#!/usr/bin/python3
"""
Cellosaurus cell line encyclopedia knowledge base.

Author(s):
    Anonymized Authors @anonymized-authors

Licensed under the Apache License, Version 2.0. Copyright Anonymized, Inc. 2025.
"""
from Bio.ExPASy import cellosaurus
from io import TextIOWrapper
from typing import Any, Dict, Final, List, NamedTuple, Optional, Set
from urllib.request import urlopen

from .base import BioEntity, KnowledgeBase


class CellName(BioEntity):
    def __new__(cls, name: str, synonyms: Optional[List[str]] = None):
        """
        Args:
            name: the official name of the cell.
            synonyms: optional synonyms of the cell.
        """
        return BioEntity("cell", name, synonyms)


class CellosaurusCell(NamedTuple):
    name: CellName
    diseases: List[str]
    sex: str
    age: str

    @property
    def id_(self) -> str:
        """
        Returns the ID of the cell.
        Input:
            None.
        Returns:
            The ID of the cell.
        """
        return "".join(filter(str.isalnum, str(self.name).lower()))

    def __repr__(self) -> str:
        """
        Returns a string representation of the cell.
        Input:
            None.
        Returns:
            A string representation of the cell.
        """
        diseases = self.diseases
        if len(diseases) > 1:
            diseases[-1] = "and " + diseases[-1]
        if len(diseases) > 0:
            diseases[0] = " with " + diseases[0]
        synonyms: Set[str] = getattr(self.name, "_synonyms", set([]))
        synonyms.discard(str(self.name))
        if len(synonyms) == 0:
            joined_synonyms = ""
        else:
            joined_synonyms = " (" + "; ".join(synonyms) + ")"
        return "{name}: cell line from {age} {sex}{diseases}".format(
            name=(str(self.name) + joined_synonyms),
            age=self.age,
            sex=self.sex,
            diseases=", ".join(diseases)
        )

    def __str__(self) -> str:
        """
        Returns a string representation of the cell.
        Input:
            None.
        Returns:
            A string representation of the cell.
        """
        return repr(self)


class CellosaurusKnowledgeBase(KnowledgeBase):
    url = "https://ftp.expasy.org/databases/cellosaurus/cellosaurus.txt"

    def __init__(self, *args: Any, **kwargs: Dict[str, Any]):
        del args, kwargs
        super(CellosaurusKnowledgeBase, self).__init__(top_k=1)
        textstream = TextIOWrapper(urlopen(self.url), "utf-8")
        self.records = filter(
            lambda rec: "Homo sapiens" in rec["OX"][0],
            cellosaurus.parse(textstream)
        )
        self.records: Final[List[CellosaurusCell]] = [  # type: ignore
            CellosaurusCell(
                CellName(
                    record["ID"],
                    synonyms=[
                        sy.strip() for sy in str(record["SY"]).split(";")
                    ]
                ),
                record["DI"],
                record["SX"],
                record["AG"]
            )
            for record in self.records
        ]

    def retrieve(self, query: str) -> str:
        """
        Retrieves the information associated with a query cell line.
        Input:
            query: the cell line to retrieve information on.
        Returns:
            A string of the retrieved information.
        """
        key = "".join(filter(str.isalnum, query.lower()))
        record = list(filter(lambda rec: rec.id_ == key, self.records))
        if len(record) == 0:
            return "No information found."
        return str(record[0])

    @property
    def cell_lines(self) -> List[str]:
        """
        Returns a list of the cell lines with information available.
        Input:
            None.
        Returns:
            A list of the cell lines with information available.
        """
        return sorted(
            list(set(list(map(lambda rc: str(rc.name), self.records))))
        )

    @classmethod
    def knowledge_description(
        cls, *args: Any, **kwargs: Dict[str, Any]
    ) -> str:
        """
        Returns a description of the knowledge.
        Input:
            None.
        Returns:
            The description of the knowledge base.
        """
        del args, kwargs
        return "Retrieves information about published cell lines."

    @classmethod
    def query_description(cls) -> str:
        """
        Returns a description of the expected query type.
        Input:
            None.
        Returns:
            The description of the expected query type.
        """
        return "The cell line to retrieve information about."
