import json
from abc import abstractmethod
from copy import deepcopy
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Dict, Generator, List

from ftagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem
from ftagent.core.conf import RD_AGENT_SETTINGS
from ftagent.core.evaluation import Evaluator, Feedback
from ftagent.core.evolving_agent import RAGEvaluator
from ftagent.core.evolving_framework import QueriedKnowledge
from ftagent.core.experiment import Task, Workspace
from ftagent.core.utils import multiprocessing_wrapper
from ftagent.log import ftagent_logger as logger

if TYPE_CHECKING:
    from ftagent.core.scenario import Scenario

# TODO:
# 1. It seems logically sound, but we currently lack a scenario to apply it.
# 2. If it proves to be useful, relocate it to a more general location.
#
# class FBWorkspaceExeFeedback(Feedback):
#     """
#     It pairs with FBWorkspace in the abstract level.
#     """
#     # ws: FBWorkspace   # potential
#     stdout: str


@dataclass
class CoSTEERSingleFeedback(Feedback):
    """
    The feedback for the data loader evaluation.
    It is design align the phases of the implemented code
    - Execution -> Return Value -> Code -> Final Decision
    """
    execution: str  # Summarized execution feedback
    # execution_feedback
    return_checking: str | None  # including every check in the testing (constraints about the generated value)
    # value_feedback, shape_feedback, value_generated_flag
    code: str
    final_decision: bool
    raw_execution: str = ""  # Full raw stdout for UI display
    source_feedback: Dict[str, bool] = field(
        default_factory=dict
    )  # Record the source of the feedback since it might be merged from multiple feedbacks, stores the mapping from source tag to its final_decision, this dict also includes the feedback source of itself

    @staticmethod
    def val_and_update_init_dict(data: dict) -> dict:
        """
        Validates and converts the 'final_decision' field in the given data dictionary.

        Args:
            data (dict): The data dictionary containing the 'final_decision' field.

        Returns:
            dict: The updated data dictionary with 'final_decision' as a boolean.

        Raises:
            ValueError: If 'final_decision' is not present or not a boolean.
        """
        if "final_decision" not in data:
            raise ValueError("'final_decision' is required")

        if isinstance(data["final_decision"], str):
            if data["final_decision"] == "false" or data["final_decision"] == "False":
                data["final_decision"] = False
            elif data["final_decision"] == "true" or data["final_decision"] == "True":
                data["final_decision"] = True

        if not isinstance(data["final_decision"], bool):
            raise ValueError(f"'final_decision' must be a boolean, not {type(data['final_decision'])}")

        for attr in "execution", "return_checking", "code":
            if data.get(attr) is not None and not isinstance(data[attr], str):
                data[attr] = json.dumps(data[attr], indent=2, ensure_ascii=False)
        return data

    @classmethod
    def merge(cls, feedback_li: list["CoSTEERSingleFeedback"]) -> "CoSTEERSingleFeedback":
        # NOTE:
        # Here we don't know the detailed design of each feedback, we just know they are CoSTEERSingleFeedback
        # So we merge them only based on CoSTEERSingleFeedback's attributes
        # **So some information may be lost when we have different types of feedbacks**
        # If you have more sophisticated sub class of CoSTEERSingleFeedback, you should override this method
        # to avoid the loss of information.

        fb = deepcopy(feedback_li[0])

        # for all the evaluators, aggregate the final_decision from `task_id`
        fb.final_decision = all(fb.final_decision for fb in feedback_li)
        for attr in "execution", "return_checking", "code":
            setattr(
                fb,
                attr,
                "\n\n".join([getattr(_fb, attr) for _fb in feedback_li if getattr(_fb, attr) is not None]),
            )
        fb.source_feedback = {}
        for _fb in feedback_li:
            for tag, decision in _fb.source_feedback.items():
                fb.source_feedback[tag] = decision
        return fb

    def __str__(self) -> str:
        return f"""------------------Execution------------------
{self.execution}
------------------Return Checking------------------
{self.return_checking if self.return_checking is not None else 'No return checking'}
------------------Code------------------
{self.code}
------------------Final Decision------------------
This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}.
"""

    def __bool__(self):
        return self.final_decision


class CoSTEERSingleFeedbackDeprecated(CoSTEERSingleFeedback):
    """This class is a base class for all code generator feedback to single implementation"""

    def __init__(
        self,
        execution_feedback: str = None,
        shape_feedback: str = None,
        code_feedback: str = None,
        value_feedback: str = None,
        final_decision: bool = None,
        final_feedback: str = None,
        value_generated_flag: bool = None,
        final_decision_based_on_gt: bool = None,
    ) -> None:
        self.execution_feedback = execution_feedback
        self.code_feedback = code_feedback
        self.value_feedback = value_feedback
        self.final_decision = final_decision
        self.final_feedback = final_feedback
        self.value_generated_flag = value_generated_flag
        self.final_decision_based_on_gt = final_decision_based_on_gt

        # TODO:
        # Not general enough. So we should not put them in the general costeer feedback
        # Instead, we should create subclass for it.
        self.shape_feedback = shape_feedback  # Not general enough. So

    @property
    def execution(self):
        return self.execution_feedback

    @execution.setter
    def execution(self, value):
        self.execution_feedback = value

    @property
    def return_checking(self):
        if self.value_generated_flag:
            return f"value feedback: {self.value_feedback}\n\nshape feedback: {self.shape_feedback}"
        return None

    @return_checking.setter
    def return_checking(self, value):
        # Since return_checking is derived from value_feedback and shape_feedback,
        # we don't need to do anything here
        self.value_feedback = value
        self.shape_feedback = value

    @property
    def code(self):
        return self.code_feedback

    @code.setter
    def code(self, value):
        self.code_feedback = value

    def __str__(self) -> str:
        return f"""------------------Execution Feedback------------------
{self.execution_feedback if self.execution_feedback is not None else 'No execution feedback'}
------------------Shape Feedback------------------
{self.shape_feedback if self.shape_feedback is not None else 'No shape feedback'}
------------------Code Feedback------------------
{self.code_feedback if self.code_feedback is not None else 'No code feedback'}
------------------Value Feedback------------------
{self.value_feedback if self.value_feedback is not None else 'No value feedback'}
------------------Final Feedback------------------
{self.final_feedback if self.final_feedback is not None else 'No final feedback'}
------------------Final Decision------------------
This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}.
"""


class CoSTEERMultiFeedback(Feedback):
    """Feedback contains a list, each element is the corresponding feedback for each factor implementation."""

    def __init__(self, feedback_list: List[CoSTEERSingleFeedback]) -> None:
        self.feedback_list = feedback_list

    def __getitem__(self, index: int) -> CoSTEERSingleFeedback:
        return self.feedback_list[index]

    def __len__(self) -> int:
        return len(self.feedback_list)

    def append(self, feedback: CoSTEERSingleFeedback) -> None:
        self.feedback_list.append(feedback)

    def __iter__(self):
        return iter(self.feedback_list)

    def is_acceptable(self) -> bool:
        return all(feedback.is_acceptable() for feedback in self.feedback_list)

    def finished(self) -> bool:
        """
        In some implementations, tasks may fail multiple times, leading agents to skip the implementation.
        This results in None feedback. However, we want to accept the correct parts and ignore None feedback.
        """
        return all(feedback.final_decision for feedback in self.feedback_list if feedback is not None)

    def __bool__(self) -> bool:
        return all(feedback.final_decision for feedback in self.feedback_list)


class CoSTEEREvaluator(Evaluator):
    def __init__(
        self,
        scen: "Scenario",
    ) -> None:
        self.scen = scen

    # TODO:
    # I think we should have unified interface for all evaluates, for examples.
    # So we should adjust the interface of other factors
    # Based on the implementation, I think a better name is some name like task-implement evaluator
    @abstractmethod
    def evaluate(
        self,
        target_task: Task,
        implementation: Workspace,
        gt_implementation: Workspace,
        **kwargs,
    ) -> CoSTEERSingleFeedback:
        raise NotImplementedError("Please implement the `evaluator` method")


class CoSTEERMultiEvaluator(RAGEvaluator):
    """This is for evaluation of experiment. Due to we have multiple tasks, so we will return a list of evaluation feebacks"""

    def __init__(self, single_evaluator: CoSTEEREvaluator | list[CoSTEEREvaluator], scen: "Scenario") -> None:
        super().__init__()
        self.scen = scen
        self.single_evaluator = single_evaluator

    def evaluate_iter(
        self,
        queried_knowledge: QueriedKnowledge = None,
        **kwargs,
    ) -> Generator[CoSTEERMultiFeedback, EvolvingItem | None, CoSTEERMultiFeedback]:
        evo = yield CoSTEERMultiFeedback(
            []
        )  # it will receive the evo first, so the first yield is for get the sent evo instead of generate useful feedback

        eval_l = self.single_evaluator if isinstance(self.single_evaluator, list) else [self.single_evaluator]

        # 1) Evaluate each sub_task
        task_li_feedback_li = []
        # task_li_feedback_li: List[List[CoSTEERSingleFeedback]]
        # Example:
        # If there are 2 evaluators and 3 sub_tasks in evo, and each evaluator's evaluate returns a list of 3 CoSTEERSingleFeedbacks,
        # Then task_li_feedback_li will be:
        # [
        #   [feedback_1_1, feedback_1_2, feedback_1_3],  # results from the 1st evaluator for all sub_tasks
        #   [feedback_2_1, feedback_2_2, feedback_2_3],  # results from the 2nd evaluator for all sub_tasks
        # ]
        # Where feedback_i_j is the feedback from the i-th evaluator for the j-th sub_task.
        for ev in eval_l:
            multi_implementation_feedback = multiprocessing_wrapper(
                [
                    (
                        ev.evaluate,
                        (
                            evo.sub_tasks[index],
                            evo.sub_workspace_list[index],
                            evo.sub_gt_implementations[index] if evo.sub_gt_implementations is not None else None,
                            queried_knowledge,
                        ),
                    )
                    for index in range(len(evo.sub_tasks))
                ],
                n=RD_AGENT_SETTINGS.multi_proc_n,
            )
            # None received, we skip the rest and return the overall feedback directly
            evo_next_iter = yield CoSTEERMultiFeedback(multi_implementation_feedback)
            task_li_feedback_li.append(multi_implementation_feedback)
            if evo_next_iter is None:
                break
            evo = evo_next_iter

        # 2) merge the feedbacks along the sub_tasks to aggregate the multiple evaluation feedbacks
        merged_task_feedback = []
        # task_li_feedback_li[0] is a list of feedbacks of different tasks for the 1st evaluator
        for task_id, fb in enumerate(task_li_feedback_li[0]):
            fb = fb.merge([fb_li[task_id] for fb_li in task_li_feedback_li])
            merged_task_feedback.append(fb)
        # merged_task_feedback: List[CoSTEERSingleFeedback]
        # Example:
        # [
        #   CoSTEERSingleFeedback(final_decision=True, execution="...", return_checking="...", code="..."),
        #   CoSTEERSingleFeedback(final_decision=False, execution="...", return_checking="...", code="..."),
        #   ...
        # ]
        # Each element corresponds to the merged feedback for one sub-task across all evaluators.
        # merged_task_feedback[i] is the merged feedback for the i-th sub_task

        final_decision = [
            None if single_feedback is None else single_feedback.final_decision
            for single_feedback in merged_task_feedback
        ]
        logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")

        # TODO: this is to be compatible with factor_implementation;
        for index in range(len(evo.sub_tasks)):
            if final_decision[index]:
                evo.sub_tasks[index].factor_implementation = True

        return CoSTEERMultiFeedback(merged_task_feedback)
