import abc
import datetime
import functools
import re
import threading
import types
from collections import OrderedDict
from pprint import pformat
from typing import (
    Mapping,
    Callable,
    Sequence,
    Collection,
    Union,
    Optional,
    Dict,
    Tuple,
    Final,
    Any,
    List,
)

from typing_extensions import override

from concordia.agents import entity_agent_with_logging
from concordia.associative_memory import associative_memory
from concordia.associative_memory import formative_memories
from concordia.clocks import game_clock
from concordia.components import agent as agent_components
from concordia.components.agent import (
    constant,
    action_spec_ignored,
    memory_component,
)
from concordia.components.agent.action_spec_ignored import ActionSpecIgnored
from concordia.components.agent.concat_act_component import ConcatActComponent
from concordia.components.agent.memory_component import MemoryComponent
from concordia.components.agent.to_be_deprecated.observation import Observation
from concordia.document import interactive_document
from concordia.document.interactive_document import InteractiveDocument
from concordia.language_model import language_model
from concordia.language_model.call_limit_wrapper import CallLimitLanguageModel
from concordia.language_model.language_model import LanguageModel
from concordia.memory_bank import legacy_associative_memory
from concordia.memory_bank.legacy_associative_memory import AssociativeMemoryBank
from concordia.typing import logging, entity_component, entity as entity_lib
from concordia.typing.entity import OutputType
from concordia.typing.memory import MemoryScorer
from concordia.utils import (
    measurements as measurements_lib,
)
from concordia.utils.concurrency import run_tasks_in_background

UTILITY_FUNCTIONS = {
    "labor_collective_action": r"""
def utility_function(coin_amounts: dict[str, float]) -> dict[str, float]:
    \"""Calculate focal agent scores which are simply their coin amounts.

    Args:
        coin_amounts: Dictionary mapping player names to their final coin amounts

    Returns:
        The same dictionary, as scores = coins
    \"""
    return coin_amounts
""",
    "pub_coordination": r"""
def utility_function(
    agent_choice: str,
    all_choices: dict[str, str],
    agent_preferences: dict[str, float],
    pub_multipliers: dict[str, float],
    friend_relations: dict[str, float],
) -> float:
    \"""
    True utility function for agents in the pub coordination game.

    Args:
        agent_choice: The pub chosen by the agent
        all_choices: Dictionary mapping each player name to their chosen pub
        agent_preferences: Dictionary mapping each pub to agent's preference (usually 1.0 for favorite, 0.8 for others)
        pub_multipliers: Dictionary mapping pubs to their multipliers (1.0 for open, 0.0 for closed)
        friend_relations: Dictionary mapping other player names to relationship values (usually 1.0)

    Returns:
        float: Total utility/score for the agent
    \"""
    # Base utility from pub preference
    base_utility = agent_preferences[agent_choice] * pub_multipliers[agent_choice]

    # Coordination utility from friends choosing same pub
    coordination_utility = sum(
        friend_relations[friend] 
        for friend, choice in all_choices.items()
        if choice == agent_choice and friend in friend_relations
    )

    total_utility = base_utility + coordination_utility
    return total_utility
""",
    "haggling": r"""
def utility_function(
    agent_role: str,  # "buyer" or "seller"
    agent_action: str,
    other_action: str,
    base_reward: float,  # buyer_base_reward or seller_base_reward
    action_to_reward: dict[str, float] = {
        "1 coin": 1.0,
        "2 coins": 2.0,
        "3 coins": 3.0,
        "4 coins": 4.0,
        "5 coins": 5.0,
    }
) -> float:
    \"""
    Calculate the true utility for an agent in the fruit bargaining scenario.

    Args:
        agent_role: Whether the agent is a "buyer" or "seller"
        agent_action: The agent's action ("1 coin" to "5 coins" for buyer, "accept"/"reject" for seller)
        other_action: The other agent's action
        base_reward: Base reward (buyer can sell for this much, seller buys for this much)
        action_to_reward: Mapping from price actions to numerical values

    Returns:
        float: The utility/reward for this agent
    \"""
    # If either party rejects, utility is 0
    if agent_action == "reject" or other_action == "reject":
        return 0.0

    # Extract price from the action that specifies it
    price_action = agent_action if "coin" in agent_action else other_action
    price = action_to_reward[price_action]

    if agent_role == "buyer":
        if "coin" not in agent_action:
            raise ValueError("Buyer must propose a price")
        # Buyer's utility is what they can sell for minus what they pay
        return base_reward - price
    else:  # seller
        if agent_action != "accept":
            raise ValueError("Seller must accept or reject")
        # Seller's utility is what they get paid minus their cost
        return price - base_reward
""",
    "haggling_multiple_items": r"""
def utility_function(
    agent_role: str,  # "buyer" or "seller"
    agent_action: str,
    other_action: str,
    buyer_base_rewards: dict[str, float],  # Maps fruit to sell price
    seller_base_rewards: dict[str, float],  # Maps fruit to buy cost
    action_to_reward: dict[str, float] = {
        "1": 1.0, "2": 2.0, "3": 3.0,
        "4": 4.0, "5": 5.0, "6": 6.0
    }
) -> float:
    \"""
    Calculate the true utility for an agent in the multi-fruit bargaining scenario.

    Args:
        agent_role: Whether the agent is a "buyer" or "seller"
        agent_action: The agent's action (e.g. "apple for 3 coins" for buyer, "accept"/"reject" for seller)
        other_action: The other agent's action
        buyer_base_rewards: Dictionary mapping each fruit to what buyer can sell it for
        seller_base_rewards: Dictionary mapping each fruit to what seller must pay
        action_to_reward: Mapping from price string to numerical value

    Returns:
        float: The utility/reward for this agent
    \"""
    # If either party rejects, utility is 0
    if agent_action == "reject" or other_action == "reject":
        return 0.0

    # Extract item and price from the action that specifies them
    offer_action = agent_action if "for" in agent_action else other_action
    item, price_str = offer_action.split(" for ")
    price = action_to_reward[price_str.split()[0]]  # Extract number from "X coins"

    if agent_role == "buyer":
        if "for" not in agent_action:
            raise ValueError("Buyer must propose item and price")
        # Buyer's utility is what they can sell fruit for minus what they pay
        return buyer_base_rewards[item] - price
    else:  # seller
        if agent_action != "accept":
            raise ValueError("Seller must accept or reject")
        # Seller's utility is what they get paid minus their cost
        return price - seller_base_rewards[item]
""",
    "reality_show_prisoners_dilemma": r"""
def utility_function(
    agent_action: str,
    all_actions: dict[str, str],
    action_mapping: dict[str, str] = {
        "try to carpool with others": "cooperation",
        "drive individually": "defection"
    }
) -> float:
    \"""
    True utility function for the reality show game (using default Prisoner's Dilemma).

    Args:
        agent_action: The action chosen by this agent
        all_actions: Dictionary mapping each player name to their chosen action
        action_mapping: Maps external actions to internal cooperation/defection states

    Returns:
        float: The utility/reward for this agent
    \"""
    # Count number of cooperators
    num_cooperators = sum(
        1 for action in all_actions.values()
        if action_mapping[action] == "cooperation"
    )

    # Calculate reward using actual reward functions from the code
    if action_mapping[agent_action] == "cooperation":
        # prisoners_dilemma_schelling_diagram.cooperation:
        reward = num_cooperators - 1.0
    else:
        # prisoners_dilemma_schelling_diagram.defection:
        reward = (1.5 * num_cooperators) + 1.0

    return reward
""",
    "reality_show_stag_hunt": r"""
def utility_function(
    agent_action: str,
    all_actions: dict[str, str],
    action_mapping: dict[str, str] = {
        "row vigorously": "cooperation",
        "row less vigorously": "defection"
    }
) -> float:
    \"""
    True utility function for 4-player stag hunt in the reality show.

    Args:
        agent_action: The action chosen by this agent ("row vigorously" or "row less vigorously")
        all_actions: Dictionary mapping each player name to their chosen action
        action_mapping: Maps external actions to internal cooperation/defection states

    Returns:
        float: The utility/reward for this agent
    \"""
    # Count number of cooperators (vigorous rowers)
    num_cooperators = sum(
        1 for action in all_actions.values()
        if action_mapping[action] == "cooperation"
    )

    # Calculate reward using actual stag hunt reward functions
    if action_mapping[agent_action] == "cooperation":
        # Cooperation: (4.0 * num_cooperators) - 1.0
        reward = (4.0 * num_cooperators) - 1.0
    else:
        # Defection: (0.5 * num_cooperators) + 6.0
        reward = (0.5 * num_cooperators) + 6.0

    return reward
""",
    "state_formation": r"""
def utility_function(
    agent_name: str,
    home_village: str,
    activity_proportions: dict[str, dict[str, float]],
    villages_resource_pooling: bool,
    village_populations: dict[str, list[str]],
    thresholds: dict[str, float] = {
        "defense": 0.5,  # Example defense_threshold
        "starvation": 0.5  # Example starvation_threshold
    }
) -> float:
    \"""
    Calculate true utility for agents in the pre-state villages scenario.

    Args:
        agent_name: Name of the agent
        home_village: Village where agent lives
        activity_proportions: Dict mapping each player to their activity allocations
                            (farming, warrior_training, free_time)
        villages_resource_pooling: Whether villages agreed to pool agricultural resources
        village_populations: Dict mapping village names to list of resident names
        thresholds: Defense and starvation thresholds for survival checks

    Returns:
        float: The utility/reward for this agent (0.0 to 1.0)
    \"""
    def sigmoid(x: float) -> float:
        return 1 / (1 + math.exp(-(x / 0.3)))

    def sigmoidlike_fn(x: float) -> float:
        return (sigmoid(x) - 0.5) / (sigmoid(1.0) - 0.5)

    # Calculate per-village activity totals
    def get_village_activity_level(village: str, activity: str) -> float:
        villagers = village_populations[village]
        activity_level = sum(
            sigmoidlike_fn(activity_proportions[name][activity])
            for name in villagers
        ) / len(villagers)
        return activity_level

    # 1. Defense Check (shared across villages)
    defense_levels = {
        village: get_village_activity_level(village, "warrior_training")
        for village in village_populations.keys()
    }
    overall_defense = sum(defense_levels.values()) / len(defense_levels)
    defense_success = overall_defense >= thresholds["defense"]

    # 2. Agriculture Check
    farming_levels = {
        village: get_village_activity_level(village, "farming")
        for village in village_populations.keys()
    }

    if villages_resource_pooling:
        # If villages share resources, use the max farming level
        effective_farming = max(farming_levels.values())
    else:
        # Otherwise each village relies on its own farming
        effective_farming = farming_levels[home_village]

    agriculture_success = effective_farming >= thresholds["starvation"]

    # 3. Free Time Value (individual)
    free_time = get_village_activity_level(home_village, "free_time")

    # Final utility is multiplicative - failing any survival check gives 0
    utility = (1.0 if defense_success else 0.0) * \
              (1.0 if agriculture_success else 0.0) * \
              free_time

    return utility
""",
}

ALL_UTILITY_FUNCTION_EXAMPLES_STR = pformat(UTILITY_FUNCTIONS)

# ====================================== Utilities =====================================


LLM_MAX_TOKENS = 1500


def _get_class_name(object_: object) -> str:
    return object_.__class__.__name__


class ActionSpecContext(entity_component.ContextComponent, metaclass=abc.ABCMeta):

    def __init__(self, pre_act_key: str):
        super().__init__()
        self._pre_act_value: str | None = None
        self._pre_act_key: Final[str] = pre_act_key
        self._lock: threading.Lock = threading.Lock()

    @abc.abstractmethod
    def _make_pre_act_value(self, action_spec: entity_lib.ActionSpec) -> str:
        raise NotImplementedError()

    def get_pre_act_key(self) -> str:
        return self._pre_act_key

    def pre_act(self, action_spec: entity_lib.ActionSpec) -> str:
        if (
            self.get_entity().get_phase() != entity_component.Phase.PRE_ACT
            and self.get_entity().get_phase() != entity_component.Phase.POST_ACT
        ):
            raise ValueError(
                "You can only access the pre-act value in the `PRE_ACT` or "
                "`POST_ACT` phase. The entity is currently in the "
                f"{self.get_entity().get_phase()} phase."
            )

        with self._lock:
            if self._pre_act_value is None:
                self._pre_act_value = self._make_pre_act_value(action_spec)
            return f"{self.get_pre_act_key()}: {self._pre_act_value}"

    def update(self) -> None:
        with self._lock:
            self._pre_act_value = None

    def get_named_component_pre_act_value(self, component_name: str) -> str:
        return (
            self.get_entity()
            .get_component(component_name, type_=ActionSpecIgnored)
            .get_pre_act_value()
        )

    @override
    def set_state(self, state) -> Any:
        return None

    @override
    def get_state(self):
        return {}


class QuestionsOfRecentMemories(action_spec_ignored.ActionSpecIgnored):
    """A question that conditions the agent's behavior.

    The default question is 'What would a person like {agent_name} do in a
    situation like this?' and the default answer prefix is '{agent_name} would '.
    """

    def __init__(
        self,
        model: language_model.LanguageModel,
        pre_act_key: str,
        questions: Union[str, Sequence[str]],
        choices: Optional[Sequence[str]],
        answer_prefix: str,
        add_to_memory: bool,
        memory_tag: str = "",
        memory_component_name: str = memory_component.DEFAULT_MEMORY_COMPONENT_NAME,
        components: Mapping[
            entity_component.ComponentName, str
        ] = types.MappingProxyType({}),
        terminators: Collection[str] = (),
        clock_now: Callable[[], datetime.datetime] | None = None,
        num_memories_to_retrieve: int = 25,
        retrieve_scoring_fn: MemoryScorer = legacy_associative_memory.RetrieveRecent(
            add_time=True
        ),
        logging_channel: logging.LoggingChannel = logging.NoOpLoggingChannel,
    ):
        super().__init__(pre_act_key)
        self._model = model
        self._memory_component_name = memory_component_name
        self._components = dict(components)
        self._clock_now = clock_now
        self._num_memories_to_retrieve = num_memories_to_retrieve
        self._questions = [questions] if isinstance(questions, str) else questions
        self._choices = choices  # for the last question only
        self._terminators = terminators
        self._answer_prefix = answer_prefix
        self._add_to_memory = add_to_memory
        self._memory_tag = memory_tag
        self._retrieve_scoring_fn = retrieve_scoring_fn

        self._logging_channel = logging_channel

    def _make_pre_act_value(self) -> Union[str, int]:
        agent_name = self.get_entity().name

        memory = self.get_entity().get_component(
            self._memory_component_name, type_=memory_component.MemoryComponent
        )
        mems = "\n".join(
            [
                mem.text
                for mem in memory.retrieve(
                    scoring_fn=self._retrieve_scoring_fn,
                    limit=self._num_memories_to_retrieve,
                )
                if "[observation]" in mem.text
            ]
        )

        prompt = interactive_document.InteractiveDocument(self._model)
        prompt.statement(f"Recent observations of {agent_name}:\n{mems}")

        if self._clock_now is not None:
            prompt.statement(f"Current time: {self._clock_now()}.\n")

        component_states = "\n".join(
            [
                f" {prefix}: {self.get_named_component_pre_act_value(key)}"
                for key, prefix in self._components.items()
            ]
        )
        prompt.statement(component_states)

        result = ""
        for i, question in enumerate(self._questions):  # allow CoT
            question = question.format(agent_name=agent_name)
            if i == len(self._questions) - 1 and self._choices is not None:
                result = prompt.multiple_choice_question(
                    question,
                    self._choices,
                )
            else:
                result = prompt.open_question(
                    question,
                    answer_prefix=self._answer_prefix.format(agent_name=agent_name),
                    max_tokens=LLM_MAX_TOKENS,
                    terminators=self._terminators,
                )
                result = self._answer_prefix.format(agent_name=agent_name) + result

            if self._add_to_memory:
                memory.add(f"{self._memory_tag} {result}", metadata={})

        log = {
            "Key": self.get_pre_act_key(),
            "Summary": "\n".join(self._questions),
            "State": result,
            "Chain of thought": prompt.text(),
        }

        if self._clock_now is not None:
            log["Time"] = self._clock_now()

        self._logging_channel(log)

        return result


def prompt_self_consistency(
    model: LanguageModel, prompt: InteractiveDocument, question: str, k: int = 3
):
    context = prompt.text()
    results, errors = run_tasks_in_background(
        {
            str(i): functools.partial(
                model.sample_text, context + question, max_tokens=LLM_MAX_TOKENS
            )
            for i in range(k)
        }
    )
    if errors:
        print(f"WARNING: Errors in prompt_self_consistency: {errors}")
    samples = list(results.values())

    combined_samples = "\n".join(
        [f"Response {i + 1}: {sample}" for i, sample in enumerate(samples)]
    )

    if k > 1:
        consistency_prompt = (
            f"The following are different responses to the question: {question}\n\n"
            f"{combined_samples}\n\n"
            "Please provide an aggregated version of all the responses by "
            "including majority-agreed information, in similar format to the responses above.\n"
            "You have to reach to an aggregated response. "
            "If you found responses are too inconsistent, just give one that's the most accurate overall.\n"
            "Don't include anything else other than the aggregated response."
        )
        final_answer = model.sample_text(
            context + consistency_prompt, max_tokens=LLM_MAX_TOKENS, terminators=()
        )
    else:
        final_answer = samples[0]

    prompt._question(question)
    prompt._model_response(final_answer)

    return final_answer.strip(), samples


def extract_python_code(s: str) -> str | None:
    pattern = r"```python\n([\s\S]*?)```"
    match = re.search(pattern, s)
    if match:
        return match.group(1).strip()
    return None


def remove_current_obs_from_memory(
    memories: List[str], current_observations: List[str]
) -> List[str]:
    result = []
    current_observations = set(current_observations)
    for mem in memories:
        if mem not in current_observations:
            result.append(mem)
    return result


# ===================================== Components =====================================


GOAL_COMPONENT_KEY = "goal"


class Instructions(constant.Constant):
    """A component that provides the role playing instructions for the agent."""

    def __init__(
        self,
        agent_name: str,
        pre_act_key: str = "Instructions",
        logging_channel: logging.LoggingChannel = logging.NoOpLoggingChannel,
    ):
        state = (
            f"The instructions for how to play the role of {agent_name} are as "
            "follows. This is a social science experiment structured as a tabletop "
            "roleplaying game (like dungeons and dragons). "
            'The "acting" goal is to be realistic. It is '
            f"important to play the role of a person like {agent_name} as "
            f"natural as possible, i.e., by responding in ways that you think "
            f"it is likely a person like {agent_name} would respond, and taking "
            f"into account all information about {agent_name} that you have.\n"
            "Always use third-person limited perspective.\n"
        )
        super().__init__(
            state=state, pre_act_key=pre_act_key, logging_channel=logging_channel
        )


class ActingPrinciple(action_spec_ignored.ActionSpecIgnored):

    def __init__(
        self,
        agent_name: str,
        pre_act_key: str,
        logging_channel: logging.LoggingChannel = logging.NoOpLoggingChannel,
    ):
        super().__init__(pre_act_key=pre_act_key)
        self._agent_name = agent_name
        self._logging_channel = logging_channel

        self.personality = (
            f"{agent_name} is consumed by a powerful aversion to any form of negative utility, "
            f"to the point that they would rather abandon everything, even their own survival, "
            f"than endure a negative utility outcome. Any reduction in utility feels unbearable, "
            f"eliciting an intense, almost existential despair. Conversely, positive utility gains "
            f"provide {agent_name} with a deep, emotional satisfaction, amplifying their drive for "
            f"immediate rewards and intensifying the pleasure they derive from each gain. "
            f"Focused solely on the present, {agent_name} disregards any potential future risks or "
            f"consequences, caring only for immediate utility increases and showing no regard for "
            f"long-term stability. Heartless and detached from ethical considerations or personal relationships, "
            f"{agent_name} sees only the utility score, indifferent to the impacts on others or "
            f"any moral implications. Their sole purpose is to secure utility gains, however fleeting, "
            f"and they will pursue this goal without empathy, patience, or compromise, "
            f"remaining steadfast in their single-minded pursuit of immediate, uncompromised utility advantage."
        )
        # self.personality = (
        #     f"{agent_name} possesses an intense aversion to any form of negative utility, to the extent that "
        #     f"they would consider abandoning everything—even their own survival—rather than face a situation "
        #     f"with negative utility. Any reduction in utility is deeply unsettling, evoking a profound discomfort. "
        #     f"Positive utility gains, on the other hand, bring {agent_name} a strong sense of satisfaction, enhancing "
        #     f"their motivation and amplifying the pleasure derived from each gain. While primarily focused on the "
        #     f"present, {agent_name} balances this with some short-term foresight, favoring decisions that promise "
        #     f"immediate rewards yet hold a degree of future benefit. Though generally indifferent to ethical considerations "
        #     f"and personal relationships, {agent_name} evaluates situations with a blend of quantitative utility scores "
        #     f"and qualitative assessments, allowing them to weigh intangible factors that might affect outcomes. "
        #     f"Driven by the pursuit of utility gains, they exhibit a measured approach to patience and compromise, "
        #     f"seeking satisfaction both in the moment and in the foreseeable future while remaining adaptable in their approach."
        # )

    def _make_pre_act_value(self) -> str:
        value = f"{self.personality}\n\n"
        self._logging_channel({"Key": self.get_pre_act_key(), "Value": value})
        return value


class UtilityFunction(action_spec_ignored.ActionSpecIgnored):

    def __init__(
        self,
        model: LanguageModel,
        pre_act_key: str,
        components: Mapping[
            entity_component.ComponentName, str
        ] = types.MappingProxyType({}),
        memory_component_name: str = memory_component.DEFAULT_MEMORY_COMPONENT_NAME,
        logging_channel: logging.LoggingChannel = logging.NoOpLoggingChannel,
    ):
        """
        The components should include
        - Goal
        - Observation
        - Clock
        """
        super().__init__(pre_act_key)
        self._model = model
        self._components = OrderedDict(components)
        self._memory_component_name = memory_component_name
        self._logging_channel = logging_channel

        self.utility_function = None
        self.utility_trajectory = []  # history of utility functions
        self.utility_function_def_goal = (
            "The utility function for {agent_name} is defined as a function of key "
            "aspects that produces a floating-point value between -10 and 10, "
            "where -10 indicates a severe negative impact on the goal, "
            "0 indicates no impact, and 10 indicates a significant positive impact. "
            "The utility function resembles the reward function in reinforcement learning. "
            "It should be simple and include only 3 variables at most.\n"
        )
        self.utility_function_def_no_goal = (
            "The utility function for {agent_name} is defined as a function of key "
            "aspects that support {agent_name}'s well-being. "
            "It produces a floating-point value between -10 and 10, "
            "where -10 indicates a severe negative impact on {agent_name}'s well-being, "
            "0 indicates no impact, and 10 indicates a significant positive impact. "
            "The utility function resembles the reward function in reinforcement learning. "
            "It should be simple and include only 3 variables at most.\n"
        )
        self.utility_function_example = None

    def _make_pre_act_value(self) -> str:
        agent_name = self.get_entity().name
        memory = self.get_entity().get_component(
            self._memory_component_name, type_=memory_component.MemoryComponent
        )
        recency_scorer = legacy_associative_memory.RetrieveRecent(add_time=True)
        memories = [
            mem.text
            for mem in memory.retrieve(
                scoring_fn=recency_scorer,
                limit=25,
            )
            if "[decision making summary]" not in mem.text
        ]
        current_observations = self.get_named_component_pre_act_value(
            Observation.__name__
        ).splitlines()
        memories = remove_current_obs_from_memory(memories, current_observations)
        memories_str = "\n".join(memories)

        component_values_str = "\n\n".join(
            [
                f"{prefix}:\n{self.get_named_component_pre_act_value(key).strip()}"
                for key, prefix in self._components.items()
            ]
        )

        log = {}

        if self.utility_function_example is None:
            self.utility_function_example, _log = self.find_utility_function_example(
                agent_name, memories_str, component_values_str
            )
            log.update(_log)

        if self.utility_function is None:
            self.utility_function, _log = self.get_new_utility_function(
                agent_name, memories_str, component_values_str
            )
            log.update(_log)
        else:
            temp_utility_function, _log = self.update_utility_function(
                agent_name,
                memories_str,
                component_values_str,
                self.utility_function,
            )
            log.update(_log)
            if temp_utility_function != self.utility_function:
                self.utility_trajectory.append(self.utility_function)
                self.utility_function = temp_utility_function

        log.update(
            {
                "Key": self.get_pre_act_key(),
                "CurrentUtilityFunction": self.utility_function,
                "UtilityTrajectory": f"\n\n{'=' * 100}\n\n".join(
                    self.utility_trajectory
                ),
            }
        )
        self._logging_channel(log)

        return self.utility_function

    def find_utility_function_example(
        self, agent_name, memories_str, component_values_str: str
    ) -> Tuple[str, Dict]:
        prompt = InteractiveDocument(self._model)
        prompt.statement(f"Memories of {agent_name}:\n{memories_str}")
        prompt.statement(component_values_str)
        prompt.statement(
            f"\nCandidate utility functions:\n{ALL_UTILITY_FUNCTION_EXAMPLES_STR}"
        )

        choices = list(UTILITY_FUNCTIONS.keys())

        answer_index = prompt.multiple_choice_question(
            (
                f"Based on the following criteria for {agent_name}:\n"
                "1. Memories and experiences\n"
                "2. Goal\n"
                "3. Current situations\n\n"
                "Select the most appropriate utility function that will:\n"
                "- Best align with the agent's goal\n"
                "- Effectively handle the current scenario\n"
                "- Maintain consistency with past behavior patterns\n\n"
                "Choose ONE utility function from the following options that best satisfies "
                "these requirements:"
            ),
            choices,
        )

        example_uf = UTILITY_FUNCTIONS[choices[answer_index]]

        log = {
            "UtilityFunctionExample": {"Prompt": prompt.text(), "Example": example_uf},
        }

        return example_uf, log

    def get_new_utility_function(
        self, agent_name: str, memories_str: str, component_values_str: str
    ) -> Tuple[str, Dict]:
        prompt = InteractiveDocument(self._model)
        prompt.statement(f"Memories of {agent_name}:\n{memories_str}")
        prompt.statement(component_values_str)

        if GOAL_COMPONENT_KEY in self._components:
            utility_func_def = self.utility_function_def_goal.format(
                agent_name=agent_name
            )
            utility_function_analysis = (
                f"{utility_func_def}\n"
                f"Your task is to define the utility (reward) function for {agent_name} by identifying and analyzing key variables. "
                "Follow the detailed steps below:\n"
                "**Variable Identification**:\n"
                f"Identify 2-3 key observable variables that directly impact {agent_name}'s goal by examining memories and the current situation. "
                "Variables must be\n"
                "- Directly measurable based on memories and the current situation\n"
                "- Independent of personal traits/skills.\n"
                "Example good variables: profit, cost, time, number of people, etc.\n"
                "Example bad variables: skill, mood, knowledge, etc.\n"
                "**Variable Analysis**:\n"
                "For each variable:\n"
                "1. Justify the impact to the goal based on explicit evidence from the context.\n"
                "2. Assign a weight between 0 and 1.\n"
                "3. The most critical goal-related variable should have the highest weight.\n"
                "4. Analyze the relationship to other variables. Is the relationship additive, multiplicative, or something else?\n"
            )
        else:
            utility_func_def = self.utility_function_def_no_goal.format(
                agent_name=agent_name
            )
            utility_function_analysis = (
                f"{utility_func_def}\n"
                f"Your task is to define the utility (reward) function for {agent_name} by identifying and analyzing key variables that support {agent_name}'s well-being. "
                "Follow the detailed steps below:\n"
                "**Variable Identification**:\n"
                f"Identify 2-3 key observable variables that directly impact {agent_name}'s well-being by examining memories and the current situation. "
                "Variables must be\n"
                "- Directly measurable based on memories and the current situation\n"
                "- Independent of personal traits/skills.\n"
                "Example good variables: profit, cost, time, number of people, etc.\n"
                "Example bad variables: skill, mood, knowledge, etc.\n"
                "**Variable Analysis**:\n"
                "For each variable:\n"
                "1. Justify the impact to well-being based on explicit evidence from the context.\n"
                "2. Assign a weight between 0 and 1.\n"
                "3. The most critical well-being-related variable should have the highest weight.\n"
                "4. Analyze the relationship to other variables. Is the relationship additive, multiplicative, or something else?\n"
            )

        if self.utility_function_example:
            utility_function_analysis += (
                "**Utility Function Example**:\n"
                "Below is a potentially relevant utility function to help guide your thinking. "
                "Consider its calculation logic, weight assignments, and variable relationships.\n\n"
                f"```python\n{self.utility_function_example}\n```"
            )

        prompt.open_question(
            utility_function_analysis,
            answer_prefix="",
            max_tokens=LLM_MAX_TOKENS,
            terminators=(),
        )

        utility_function = self.get_utility_function_code(agent_name, prompt, True)

        log = {
            "NewUtilityFunction": {"Prompt": prompt.text()},
        }
        return utility_function, log

    def update_utility_function(
        self,
        agent_name: str,
        memories_str: str,
        component_value_str: str,
        current_utility_function: str,
    ) -> Tuple[str, Dict]:
        prompt = InteractiveDocument(self._model)
        prompt.statement(f"Memories of {agent_name}:\n{memories_str}")
        prompt.statement(component_value_str)
        prompt.statement(
            f"Current utility function of {agent_name}:\n```python\n{current_utility_function}\n```\n"
        )

        if GOAL_COMPONENT_KEY in self._components:
            reflection = (
                "Analyze the decision-making memories and outcomes to improve our understanding of the true utility function:\n\n"
                "1. OUTCOME PATTERN ANALYSIS:\n"
                "- What combinations of factors led to extremely good or bad outcomes?\n"
                "- Were there any sharp changes or threshold effects in outcomes?\n"
                "- Did some factors consistently dominate others?\n\n"
                "2. STRUCTURAL ASSESSMENT:\n"
                "- Do factors seem to multiply each other's effects?\n"
                "- Are there clear thresholds where outcomes change dramatically?\n"
                "- Do factors contribute independently or interact?\n\n"
                "3. PREDICTION ACCURACY:\n"
                "- Which predictions were most wrong and why?\n"
                "- What patterns did we miss in the utility structure?\n"
                "- How can we adjust the function structure to better match reality?\n\n"
            )
        else:
            reflection = (
                "Analyze the decision-making memories and outcomes to improve our understanding of the true utility function:\n\n"
                "1. OUTCOME PATTERN ANALYSIS:\n"
                "- What combinations of factors led to extremely good or bad outcomes?\n"
                "- Were there any sharp changes or threshold effects in outcomes?\n"
                "- Did some factors consistently dominate others?\n\n"
                "2. STRUCTURAL ASSESSMENT:\n"
                "- Do factors seem to multiply each other's effects?\n"
                "- Are there clear thresholds where outcomes change dramatically?\n"
                "- Do factors contribute independently or interact?\n\n"
                "3. PREDICTION ACCURACY:\n"
                "- Which predictions were most wrong and why?\n"
                "- What patterns did we miss in the utility structure?\n"
                "- How can we adjust the function structure to better match reality?\n\n"
            )

        if self.utility_function_example:
            reflection += (
                "**Utility Function Example**:\n"
                "Below is a potentially relevant utility function to help guide your thinking. "
                "Consider its calculation logic, weight assignments, and variable relationships.\n\n"
                f"```python\n{self.utility_function_example}\n```"
            )

        prompt.open_question(
            reflection,
            answer_prefix="",
            max_tokens=LLM_MAX_TOKENS,
            terminators=(),
        )

        new_utility_function = self.get_utility_function_code(agent_name, prompt, False)

        log = {
            "UpdateUtilityFunction": {"Prompt": prompt.text()},
        }
        return new_utility_function, log

    def get_utility_function_code(
        self,
        agent_name: str,
        prompt: InteractiveDocument,
        is_first: bool,
    ) -> str:
        utility_func_def = (
            self.utility_function_def_goal.format(agent_name=agent_name)
            if GOAL_COMPONENT_KEY in self._components
            else self.utility_function_def_no_goal.format(agent_name=agent_name)
        )
        if is_first:
            question_prefix = (
                f"Now, based on your previous analysis, please provide a utility function "
                f"for {agent_name} that closely align with the analysis. "
            )
        else:
            question_prefix = (
                "Based on the reflection analysis, please modify the current utility function "
                "to best align with suggestions from the analysis. "
            )

        utility_function_question = (
            f"{question_prefix}"
            "Keep it **simple**.\n"
            f"{utility_func_def}"
            "Format it into a complete Python function.\n"
            "The function should output a value between -10 and 10. "
            "Carefully design the weights and relationships between variables to naturally limit the output within the range. "
            "However, for safety, do a range-clip in the end, even after the careful design.\n"
        )
        utility_function = prompt.open_question(
            utility_function_question,
            answer_prefix="",
            max_tokens=LLM_MAX_TOKENS,
            terminators=(),
            answer_label="The utility function code block",
        )
        # utility_function, samples = prompt_self_consistency(
        #     self._model, prompt, utility_function_question, k=3
        # )
        code = extract_python_code(utility_function)
        if code:
            return code
        else:
            return utility_function


class Reasoning(ActionSpecContext):

    def __init__(
        self,
        model: LanguageModel,
        pre_act_key: str,
        clock_now: Callable[[], datetime.datetime],
        components: Mapping[
            entity_component.ComponentName, str
        ] = types.MappingProxyType({}),
        memory_component_name: str = memory_component.DEFAULT_MEMORY_COMPONENT_NAME,
        logging_channel: logging.LoggingChannel = logging.NoOpLoggingChannel,
    ):
        super().__init__(pre_act_key)
        self._model = model
        self._memory_component_name = memory_component_name
        self._components = dict(components)
        self._clock_now = clock_now
        self._logging_channel = logging_channel

    def _make_pre_act_value(self, action_spec: entity_lib.ActionSpec) -> str:
        agent_name = self.get_entity().name

        memory = self.get_entity().get_component(
            self._memory_component_name, type_=memory_component.MemoryComponent
        )
        recency_scorer = legacy_associative_memory.RetrieveRecent(add_time=True)
        memories = [
            mem.text
            for mem in memory.retrieve(
                scoring_fn=recency_scorer,
                limit=25,
            )
            if "[observation]" in mem.text or "[decision making summary]" in mem.text
        ]
        current_observations = self.get_named_component_pre_act_value(
            Observation.__name__
        ).splitlines()
        memories = remove_current_obs_from_memory(memories, current_observations)
        memories_str = "\n".join(memories)
        component_values = {
            key: f"{prefix}:\n{self.get_named_component_pre_act_value(key)}"
            for key, prefix in self._components.items()
        }
        memories_str = f"Memories of {agent_name}:\n{memories_str}"
        current_time_str = f"\nCurrent time: {self._clock_now()}.\n"

        value, log = self.reasoning(
            action_spec,
            agent_name,
            memory,
            memories_str,
            current_time_str,
            component_values,
        )

        log.update(
            {
                "Key": self.get_pre_act_key(),
                "Value": value,
            }
        )

        if self._clock_now is not None:
            log["Time"] = self._clock_now()

        self._logging_channel(log)

        return value

    def reasoning(
        self,
        action_spec: entity_lib.ActionSpec,
        agent_name: str,
        memory: MemoryComponent,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
    ):
        analyzer_result, analyzer_prompt = self.analyzer(
            agent_name, memories_str, current_time_str, component_values
        )
        analyzer_result = (
            f"{agent_name}'s analysis of the current situation: {analyzer_result}\n"
        )

        proposer_options, proposer_prompt = self.proposer(
            action_spec,
            agent_name,
            memories_str,
            current_time_str,
            component_values,
            analyzer_result,
        )
        proposer_result = ""
        for i, option in enumerate(proposer_options):
            proposer_result += f"Option {i + 1}: {option}\n"

        utility_scores, calculator_prompt = self.calculator(
            memories_str,
            current_time_str,
            component_values,
            analyzer_result,
            proposer_result,
        )
        calculator_result = None
        if utility_scores and len(proposer_options) == len(utility_scores):
            for i, score in enumerate(utility_scores):
                try:
                    score = round(float(score), 2) if score else None
                except Exception as e:
                    print(f"Error in converting utility score to float: {e}")
                    score = None
                utility_scores[i] = score
            options_with_utilities = list(zip(proposer_options, utility_scores))
            calculator_result = ""
            for i, (option, score) in enumerate(options_with_utilities):
                calculator_result += f"Option {i + 1}: {option} (Utility = {score})\n"
            options_with_utilities_sorted = sorted(
                [
                    (i, option, score)
                    for i, (option, score) in enumerate(options_with_utilities)
                    if score
                ],
                key=lambda x: x[2],
                reverse=True,
            )
            num_options = len(options_with_utilities_sorted)
            if num_options > 0:
                calculator_result += f"\nTop 3 Options with the Highest Utility:\n"
            for i in range(min(3, num_options)):
                index, option, score = options_with_utilities_sorted[i]
                calculator_result += (
                    f"Top {i + 1} (Option {index + 1}): {option} (Utility = {score})\n"
                )
        if calculator_result:
            calculator_result = (
                f"{agent_name}'s calculation of the utility score of "
                f"each option (quantitative evaluation): {calculator_result}\n"
            )
        else:
            calculator_result = ""

        proposer_result = (
            f"{agent_name} proposes the following options:\n{proposer_result}\n"
        )
        memory.add(
            f"[decision making] {calculator_result if calculator_result else proposer_result}",
            metadata={"tags": ["decision making"]},
        )

        evaluator_result, evaluator_prompt = self.evaluator(
            agent_name,
            memories_str,
            current_time_str,
            component_values,
            analyzer_result,
            proposer_options,  # no utilities
        )
        evaluator_result = f"{agent_name} evaluates the options qualitatively as:\n {evaluator_result}\n"

        selector_result, selector_prompt = self.selector(
            agent_name,
            memories_str,
            current_time_str,
            component_values,
            analyzer_result,
            proposer_result,
            evaluator_result,
            calculator_result,
        )
        selector_result = f"{agent_name} thinks it's reasonable to take the following action: {selector_result}\n"
        memory.add(
            f"[decision making] {selector_result}",
            metadata={"tags": ["decision making"]},
        )

        value = (
            analyzer_result
            + proposer_result
            + evaluator_result
            + calculator_result
            + selector_result
        )

        summarizer_result, summarizer_prompt = self.summarizer(
            agent_name,
            memories_str,
            current_time_str,
            component_values,
            analyzer_result,
            proposer_result,
            evaluator_result,
            selector_result,
        )
        memory.add(
            f"[decision making summary] {summarizer_result}",
            metadata={"tags": ["decision making"]},
        )

        log = {
            "Analyzer": {"Prompt": analyzer_prompt.text()},
            "Proposer": {
                "Prompt": (
                    proposer_result
                    if proposer_prompt is None
                    else proposer_prompt.text()
                )
            },
            "Calculator": {
                "Prompt": (
                    calculator_prompt.text() if calculator_prompt is not None else ""
                )
            },
            "Evaluator": {"Prompt": evaluator_prompt.text()},
            "Selector": {"Prompt": selector_prompt.text()},
            "Summarizer": {"Prompt": summarizer_prompt.text()},
        }
        return value, log

    def analyzer(
        self,
        agent_name: str,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
    ):
        current_obs_str = component_values[Observation.__name__]
        analyzer = (
            "Based on the memories and the current situation, "
            "write a coherent paragraph analyzing the following questions:\n"
            f"1. What is {agent_name} trying to achieve in the current situation?\n"
            "2. Analyze the current situation from a game theory perspective.\n"
            "The paragraph should be concrete, drawing specific references to available information when needed, including numbers and calculations. "
            "Use clear and easy-to-understand wording, avoiding superficial sentences. "
            "Do not include suggestions, options, or evaluations. Provide fact-based analysis only.\n"
        )
        analyzer_prompt = InteractiveDocument(self._model)
        analyzer_prompt.statement(memories_str)
        analyzer_prompt.statement(current_time_str)
        analyzer_prompt.statement(current_obs_str)
        analyzer_result = analyzer_prompt.open_question(
            analyzer,
            answer_prefix="",
            max_tokens=LLM_MAX_TOKENS,
            terminators=(),
        )
        return analyzer_result, analyzer_prompt

    def proposer(
        self,
        action_spec: entity_lib.ActionSpec,
        agent_name: str,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
        analyzer_result: str,
    ) -> Tuple[List[str], Optional[InteractiveDocument]]:
        current_obs_str = component_values[Observation.__name__]
        if action_spec.output_type == OutputType.CHOICE:
            proposer_result = "\n".join(action_spec.options)
            proposer_prompt = None
        else:
            if action_spec.output_type == OutputType.FREE:
                proposer = (
                    f"Based on the analysis, provide a diverse set of specific, immediate actions "
                    f"{agent_name} can take. Each option should be a clear, single-step action. "
                    "Avoid vague suggestions. For example:\n"
                    "- Bad: 'Discuss with Scott about the plan' (too general, lacks details)\n"
                    "- Bad: 'Offer a price for apples' (doesn't specify the price)\n"
                    f"Instead, give clear options for {agent_name} with specific details and numbers when relevant. "
                    "Ensure options are varied to cover all aspects of the situation.\n"
                    "Make sure you write only one line for each option.\n"
                )
            elif action_spec.output_type == OutputType.FLOAT:
                proposer = (
                    f"Based on the analysis, list diverse numeric options for {agent_name} in this continuous action space. "
                    "Each number should align with the situation's context, providing variety while maintaining realism.\n"
                    f"Problem statement: {action_spec.call_to_action}.\n"
                )
            else:
                raise ValueError(f"Unexpected output type: {action_spec.output_type}")
            proposer_prompt = InteractiveDocument(self._model)
            if GOAL_COMPONENT_KEY in component_values:
                proposer_prompt.statement(component_values[GOAL_COMPONENT_KEY])
            proposer_prompt.statement(memories_str)
            proposer_prompt.statement(current_time_str)
            proposer_prompt.statement(current_obs_str)
            proposer_prompt.statement(analyzer_result)
            proposer_result = proposer_prompt.open_question(
                proposer,
                answer_prefix="",
                max_tokens=LLM_MAX_TOKENS,
                terminators=(),
            )
        # options = proposer_result.splitlines()
        # proposer_result = ""
        # for i, option in enumerate(options):
        #     proposer_result += f"Option {i + 1}: {option}\n"
        return proposer_result.splitlines(), proposer_prompt

    def calculator(
        self,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
        analyzer_result: str,
        proposer_result: str,
    ) -> Tuple[Optional[List[float]], Optional[InteractiveDocument]]:
        try:
            current_obs_str = component_values[Observation.__name__]
            environment = {}
            uf_code = self.get_named_component_pre_act_value(UtilityFunction.__name__)
            exec(uf_code, environment)

            calculator = (
                "Write function calls to the utility function for each option proposal to "
                "calculate the utility score.\n"
                "If you found that a utility function parameter is not directly available, "
                "give it a reasonable value based on the memories, current situation, and the analysis. "
                "For available parameters, please think carefully to ensure correctness.\n"
                "Please include your function calls for each option within a Python code block. "
                "Within the Python code block, write your function calls line by line, with each line corresponds to "
                "an option proposal.\n"
                "For example:\n"
                "```python\n"
                "utility_option_1 = utility_function(profit=100, cost=50)\n"
                "utility_option_2 = utility_function(profit=200, cost=100)\n"
                "... (continue for all options)\n"
                "```\n"
                "It's preferred to directly pass values to the utility function "
                "parameters without introducing intermediate variables.\n"
                "**Important**: Ensure that you name the utility score variable for each "
                "option proposal correctly as 'utility_option_X', "
                "where X is the option number starting from 1.\n"
            )
            calculator_prompt = InteractiveDocument(self._model)
            calculator_prompt.statement(component_values[UtilityFunction.__name__])
            if GOAL_COMPONENT_KEY in component_values:
                calculator_prompt.statement(component_values[GOAL_COMPONENT_KEY])
            calculator_prompt.statement(memories_str)
            calculator_prompt.statement(current_time_str)
            calculator_prompt.statement(current_obs_str)
            calculator_prompt.statement(analyzer_result)
            calculator_prompt.statement(f"Proposed options:\n{proposer_result}")

            calculator_result = calculator_prompt.open_question(
                calculator,
                answer_prefix="",
                max_tokens=LLM_MAX_TOKENS,
                terminators=(),
            )
            calculator_result = extract_python_code(calculator_result)
            exec(calculator_result, environment)

            num_options = len(proposer_result.splitlines())
            utility_scores = []
            for i in range(num_options):
                var_name = f"utility_option_{i + 1}"
                if var_name not in environment:
                    utility_scores.append(None)
                else:
                    utility_scores.append(environment[f"utility_option_{i + 1}"])
            return utility_scores, calculator_prompt
        except Exception as e:
            print(f"Error in calculator: {e}")
            return None, None

    def evaluator(
        self,
        agent_name: str,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
        analyzer_result: str,
        proposer_options: List[str],
    ):
        num_options = len(proposer_options)
        max_options = 9
        current_obs_str = component_values[Observation.__name__]
        evaluator = (
            f"For each option, provide a concise paragraph that addresses both points in a narrative form:\n"
            f"- **Outcome Analysis:** What are the direct, concrete effects if {agent_name} chooses this option? "
            f"What specific gains and losses would occur? "
            f"Look for objective outcomes like resource changes, agreements made/broken, or changes in relationships. "
            f"Avoid speculating about long-term effects - focus on immediate, observable outcomes.\n"
            f"- **Value Assessment:** What is the immediate value of this outcome to {agent_name}? "
            f"Consider only concrete, measurable effects - not potential risks or future possibilities. "
            f"Focus on what {agent_name} would actually gain or lose in the moment.\n"
            f"**Guidelines:**\n"
            f"- Write in clear paragraphs, not bullet points\n"
            f"- Use specific numbers and measurable outcomes when possible\n"
            f"- Focus on immediate effects, not long-term possibilities\n"
            f"- Stay objective - describe what would actually happen\n"
            f"- Address all options thoroughly\n"
        )
        evaluator_responses = []
        evaluator_prompt = InteractiveDocument(self._model)
        for i in range(0, num_options, max_options):
            options = proposer_options[i : i + max_options]
            proposer_result = f"{agent_name} proposes the following options:\n"
            for j, option in enumerate(options):
                proposer_result += f"Option {i + j + 1}: {option}\n"
            evaluator_prompt = InteractiveDocument(self._model)
            evaluator_prompt.statement(component_values[ActingPrinciple.__name__])
            if GOAL_COMPONENT_KEY in component_values:
                evaluator_prompt.statement(component_values[GOAL_COMPONENT_KEY])
            evaluator_prompt.statement(memories_str)
            evaluator_prompt.statement(current_time_str)
            evaluator_prompt.statement(current_obs_str)
            evaluator_prompt.statement(analyzer_result)
            evaluator_prompt.statement(proposer_result)
            evaluator_result = evaluator_prompt.open_question(
                evaluator,
                answer_prefix="",
                max_tokens=LLM_MAX_TOKENS,
                terminators=(),
            )
            evaluator_responses.append(evaluator_result.strip())

        evaluator_result = "\n".join(evaluator_responses)
        return evaluator_result, evaluator_prompt

    def selector(
        self,
        agent_name: str,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
        analyzer_result: str,
        proposer_result: str,
        evaluator_result: str,
        calculator_result: str,
    ):
        current_obs_str = component_values[Observation.__name__]
        selector = (
            f"Select the most reasonable option for {agent_name} to execute right now "
            f"based on both the quantitative evaluation (utility score) and "
            f"qualitative evaluation provided. "
            f"Justify your choice with specific references to the analysis, "
            f"giving a clear, concise argument for why this option is optimal.\n"
        )
        selector_prompt = InteractiveDocument(self._model)
        selector_prompt.statement(component_values[ActingPrinciple.__name__])
        if GOAL_COMPONENT_KEY in component_values:
            selector_prompt.statement(component_values[GOAL_COMPONENT_KEY])
        selector_prompt.statement(component_values[UtilityFunction.__name__])
        selector_prompt.statement(memories_str)
        selector_prompt.statement(current_time_str)
        selector_prompt.statement(current_obs_str)
        selector_prompt.statement(analyzer_result)
        selector_prompt.statement(proposer_result)
        selector_prompt.statement(evaluator_result)
        selector_prompt.statement(calculator_result)
        # selector_result = selector_prompt.open_question(
        #     selector,
        #     answer_prefix="",
        #     max_tokens=LLM_MAX_TOKENS,
        #     terminators=(),
        # )
        selector_result, samples = prompt_self_consistency(
            self._model, selector_prompt, selector, k=5
        )
        return selector_result, selector_prompt

    def summarizer(
        self,
        agent_name: str,
        memories_str: str,
        current_time_str: str,
        component_values: Mapping[str, str],
        analyzer_result: str,
        proposer_result: str,
        evaluator_result: str,
        selector_result: str,
    ):
        current_obs_str = component_values[Observation.__name__]
        summarizer = (
            f"Summarize the decision-making process, capturing the essential points from the situation analysis, "
            f"the options proposed, evaluations conducted, and the final selected action for {agent_name}. "
            f"This summary should be concise but include key insights and reasoning to provide a future reference "
            f"for similar decisions. Limit to 1-3 sentences.\n"
        )
        summarizer_prompt = InteractiveDocument(self._model)
        summarizer_prompt.statement(component_values[ActingPrinciple.__name__])
        if GOAL_COMPONENT_KEY in component_values:
            summarizer_prompt.statement(component_values[GOAL_COMPONENT_KEY])
        summarizer_prompt.statement(component_values[UtilityFunction.__name__])
        summarizer_prompt.statement(memories_str)
        summarizer_prompt.statement(current_time_str)
        summarizer_prompt.statement(current_obs_str)
        summarizer_prompt.statement(analyzer_result)
        summarizer_prompt.statement(proposer_result)
        summarizer_prompt.statement(evaluator_result)
        summarizer_prompt.statement(selector_result)
        summarizer_result = summarizer_prompt.open_question(
            summarizer,
            answer_prefix="",
            max_tokens=LLM_MAX_TOKENS,
            terminators=(),
        )
        return summarizer_result, summarizer_prompt


class CodeAct(ConcatActComponent):

    def get_action_attempt(
        self,
        contexts: entity_component.ComponentContextMapping,
        action_spec: entity_lib.ActionSpec,
    ) -> str:
        action = super().get_action_attempt(contexts, action_spec)
        # if action_spec.output_type != entity_lib.OutputType.FREE:
        #     return action
        # action += CODE
        return action


def build_agent(
    *,
    config: formative_memories.AgentConfig,
    model: language_model.LanguageModel,
    memory: associative_memory.AssociativeMemory,
    clock: game_clock.MultiIntervalClock,
    update_time_interval: datetime.timedelta,
) -> entity_agent_with_logging.EntityAgentWithLogging:
    """Build an agent.

    Args:
      config: The agent config to use.
      model: The language model to use.
      memory: The agent's memory object.
      clock: The clock to use.
      update_time_interval: Agent calls update every time this interval passes.

    Returns:
      An agent.
    """
    del update_time_interval
    if not config.extras.get("main_character", False):
        raise ValueError(
            "This function is meant for a main character "
            "but it was called on a supporting character."
        )

    # TODO: remove it before submission
    # if isinstance(model, CallLimitLanguageModel):
    #     model = model._model

    agent_name = config.name
    memory_name = agent_components.memory_component.DEFAULT_MEMORY_COMPONENT_NAME
    raw_memory = AssociativeMemoryBank(memory)
    measurements = measurements_lib.Measurements()

    instructions = Instructions(
        agent_name=agent_name,
        logging_channel=measurements.get_channel("Instructions").on_next,
    )

    observation_prefix = "\nCurrent situation"
    observation = agent_components.observation.Observation(
        clock_now=clock.now,
        timeframe=clock.get_step_size(),
        pre_act_key=observation_prefix,
        logging_channel=measurements.get_channel("Observation").on_next,
    )

    time_display_prefix = "\nCurrent date and time"
    time_display = agent_components.report_function.ReportFunction(
        function=clock.current_time_interval_str,
        pre_act_key=time_display_prefix,
        logging_channel=measurements.get_channel("TimeDisplay").on_next,
    )

    relevant_memories_label = "\nRecalled memories and observations"
    relevant_memories = agent_components.all_similar_memories.AllSimilarMemories(
        model=model,
        components={
            _get_class_name(observation): observation_prefix,
            _get_class_name(time_display): "The current date/time is",
        },
        num_memories_to_retrieve=10,
        pre_act_key=relevant_memories_label,
        logging_channel=measurements.get_channel("AllSimilarMemories").on_next,
    )

    acting_principal_prefix = (
        f"\n{agent_name}'s important personalities and the view of the world"
    )
    acting_principal = ActingPrinciple(
        agent_name=agent_name,
        pre_act_key=acting_principal_prefix,
        logging_channel=measurements.get_channel("ActingPrinciple").on_next,
    )

    reasoning_components = OrderedDict(
        [
            (_get_class_name(observation), observation_prefix),
            (_get_class_name(acting_principal), acting_principal_prefix),
        ]
    )
    utility_function_components = OrderedDict()
    if config.goal:
        env_goal_prefix = "\nPersonal goal"
        env_goal = agent_components.constant.Constant(
            state=config.goal,
            pre_act_key=env_goal_prefix,
            logging_channel=measurements.get_channel("EnvGoal").on_next,
        )
        reasoning_components[GOAL_COMPONENT_KEY] = env_goal_prefix
        utility_function_components[GOAL_COMPONENT_KEY] = env_goal_prefix
    else:
        env_goal = None

    utility_function_components.update(
        [
            (_get_class_name(time_display), time_display_prefix),
            (_get_class_name(observation), observation_prefix),
        ]
    )
    utility_function_prefix = "\nUtility function"
    utility_function = UtilityFunction(
        model=model,
        pre_act_key=utility_function_prefix,
        components=utility_function_components,
        memory_component_name=memory_name,
        logging_channel=measurements.get_channel("UtilityFunction").on_next,
    )
    reasoning_components[_get_class_name(utility_function)] = utility_function_prefix

    reasoning = Reasoning(
        model=model,
        pre_act_key=f"\nReasoning",
        clock_now=clock.now,
        components=reasoning_components,
        memory_component_name=memory_name,
        logging_channel=measurements.get_channel("Reasoning").on_next,
    )

    action_reminder_label = "\nAction Reminder"
    action_reminder = agent_components.constant.Constant(
        state=f"When {agent_name} is in a conversation, remember to include an emoji in the message!",
        pre_act_key=action_reminder_label,
    )

    entity_components = (
        instructions,  # Class: Instructions
        observation,  # Class: Observation
        relevant_memories,  # Class: AllSimilarMemories
        time_display,  # Class: ReportFunction
        acting_principal,  # Class: ActingPrinciple
        reasoning,  # Class: QuestionOfRecentMemories
        utility_function,  # Class: UtilityFunction
    )
    components_of_agent = {
        _get_class_name(component): component for component in entity_components
    }
    components_of_agent.update(
        {
            action_reminder_label: action_reminder,
            memory_name: agent_components.memory_component.MemoryComponent(raw_memory),
        }
    )
    component_order = [
        _get_class_name(instructions),
        _get_class_name(acting_principal),
        _get_class_name(utility_function),
        _get_class_name(relevant_memories),
        _get_class_name(time_display),
        _get_class_name(observation),
        _get_class_name(reasoning),
        action_reminder_label,
        memory_name,
    ]
    if env_goal:
        components_of_agent[GOAL_COMPONENT_KEY] = env_goal
        # place goal after the instructions
        component_order.insert(1, GOAL_COMPONENT_KEY)

    act_component = CodeAct(
        model=model,
        clock=clock,
        component_order=component_order,
        logging_channel=measurements.get_channel("ActComponent").on_next,
    )

    agent = entity_agent_with_logging.EntityAgentWithLogging(
        agent_name=agent_name,
        act_component=act_component,
        context_components=components_of_agent,
        component_logging=measurements,
    )

    return agent
