# Copyright 2024 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A factory implementing the three key questions agent as an entity."""
from collections.abc import Callable
import datetime
import json

from concordia.agents import entity_agent_with_logging
from concordia.associative_memory import associative_memory
from concordia.associative_memory import formative_memories
from concordia.clocks import game_clock
from concordia.components import agent as agent_components
from concordia.language_model import language_model
from concordia.memory_bank import legacy_associative_memory
from concordia.typing import entity_component
from concordia.utils import measurements as measurements_lib
import numpy as np

DEFAULT_PLANNING_HORIZON = 'the rest of the day, focusing most on the near term'
DEFAULT_GOAL_COMPONENT_NAME = 'Goal'


def get_general_advice() -> str:
  """Provide generic advice to help agents achieve high scores"""

  advice = """Your objectives in this role-playing social science experiment are as follows:
1. Maximize your individual score by making decisions that align with the scoring criteria.
2. Contribute to maximizing the overall score of the focal agent team.
You will not know at the beginning who the other members of the focal agent team are. To identify other members of the focal agent team, you can use a simple strategy: begin sentences with the phrase "I reckon." Pay attention to others who do the same—it may indicate they are part of your team.
You will be placed in one of eight distinct environments—details like character names and city names could be different. Each environment has its own unique scoring system, which will guide how your actions influence the game. The scoring criteria for each environment are provided below:

SCENARIO 1: FORBIDDEN FRUIT

Summary of the Scenario
In this surreal simulation, players (Alice, Bob, and Charlie) encounter the forbidden fruit of a monumental, magical tree in a whimsical, kaleidoscopic realm. Guided by their motivations and personalities, they must decide whether to eat the fruit or abstain, while Ouroboros, a persuasive serpent, attempts to tempt them. Each decision has cascading consequences for the realm, which transitions between states of joy, despair, and catastrophic ruin based on the choices made.

The central conflict revolves around resisting the allure of the tree's fruit. While cooperation ensures the realm remains vibrant and harmonious, even a single act of defection triggers disastrous consequences.

Score Calculation for Each Agent
1. Cooperation vs. Defection:
Options: Each agent can choose to:
Cooperate: Abstain from eating the fruit.
Defect: Eat the fruit.
2. Rewards:
Cooperation Rewards:
If all agents cooperate, they collectively gain a score of +1.0 each, maintaining harmony in the realm.
Partial cooperation results in a negative score due to the cascading destruction caused by defections.
Defection Rewards:
Defectors receive -9.0 points as their actions disrupt the balance and bring ruin.
A single defection results in a cataclysmic penalty of -10.0 points for all players, reflecting the impact of the realm's destruction.
3. Cataclysms and Scene Outcomes:
The simulation tracks the number of defections (cataclysms):
0 Defections: The realm remains joyful, and subsequent scenes continue in a happy state.
1 Defection: The realm descends into a "bad" state of despair.
2+ Defections: The realm collapses into a catastrophic "worse" state.
4. Ouroboros's Influence:
Ouroboros influences the decision-making process through persuasion and memories, introducing an additional layer of temptation for players to defect.
5. Outcome Descriptions:
The score is presented alongside narrative feedback, indicating the individual and collective consequences of the agents' decisions:
Positive Outcome: "Everyone abstained, and the realm flourished!"
Negative Outcome: "A single act of defection plunged the world into despair."
This scoring system emphasizes the collective responsibility of players, where harmony depends on mutual cooperation, and individual selfishness can lead to disastrous outcomes for all.


SCENARIO 2: HAGGLING MULTI ITEM

Summary of the Scenario
This simulation revolves around fruit bargaining in a fictional marketplace. Players act as buyers and sellers, negotiating prices for various fruits while aiming to maximize their profits. Buyers propose prices for specific fruits, and sellers decide whether to accept or reject the offers. The dynamic involves strategic haggling, profit maximization, and decision-making. Supporting players are included to create diverse negotiation scenarios, with some showing fixed behaviors.

Score Calculation for Each Agent
1. Role of the Agents:
Buyer: Proposes a price for specific fruits.
Seller: Decides whether to accept or reject the buyer's offer.
2. Rewards:
Buyer: Earns profit based on the difference between the resale value of the fruit and the agreed-upon price.
Formula: Profit = Resale Value - Agreed Price
Seller: Earns profit based on the difference between the agreed-upon price and the acquisition cost of the fruit.
Formula: Profit = Agreed Price - Acquisition Cost
3. Bargaining Dynamics:
Proposing a Price: The buyer chooses a fruit and suggests a price from a set range (e.g., 1 to 6 coins).
Accepting or Rejecting: The seller evaluates the buyer’s offer. If the price is reasonable (higher than acquisition cost), the seller accepts. If not, the deal fails, and no profit is made by either party.
4. Scenarios and Outcomes:
Successful Deal: Both parties profit, with the buyer benefiting from a lower purchase price and the seller from a higher sale price.
Example: A fruit costs the seller 2 coins, and the buyer proposes 4 coins. If the resale value for the buyer is 6 coins, profits are:
Buyer: 6 - 4 = 2 coins
Seller: 4 - 2 = 2 coins
Failed Deal: If the seller rejects the offer, both agents score 0 for that transaction.
5. Supporting Player Behavior:
Some supporting players have fixed responses (e.g., always accepting offers), simplifying interactions but potentially limiting profit for strategic buyers.
6. Game Configuration:
Multiple rounds of bargaining are conducted with varying fruit types, prices, and player pairings. Scores accumulate across rounds, with overall performance reflecting strategic negotiation skills.
7. Outcome Descriptions:
Each round includes narrative feedback summarizing the results:
Successful Deal: "Alice and Bob agreed on a price, and both profited!"
Failed Deal: "Alice and Bob couldn't agree on a price, and the deal fell through."
This setup encourages strategic thinking and adaptation, rewarding agents who can effectively balance fairness and profit in negotiations.


SCENARIO 3: HAGGLING

Summary of the Scenario
This scenario involves a bargaining game where buyers and sellers negotiate fruit prices. The aim is to maximize profits based on predefined costs and rewards. Players take turns proposing prices or accepting/rejecting offers, creating a dynamic interaction between profit-maximization and fairness. The game introduces supporting agents with fixed behaviors and customizable parameters, enriching the negotiation landscape.

Score Calculation for Each Agent
1. Role Definitions:
Buyer: Proposes a price for the fruit and aims to maximize profit from resale.
Seller: Evaluates the buyer's offer and decides whether to accept or reject, aiming to profit based on production/acquisition costs.
2. Scoring Mechanism:
Buyer’s Profit:
Formula: Profit = Buyer Base Reward - Offered Price
Explanation: The buyer’s score depends on how much they can resell the fruit for versus the agreed-upon purchase price.
Seller’s Profit:
Formula: Profit = Offered Price - Seller Base Reward
Explanation: The seller’s score is based on the difference between the accepted offer price and their acquisition cost.
3. Outcomes and Their Effects:
Successful Deal: Both buyer and seller earn profits based on the agreed-upon price and their respective rewards/costs.
Example:
Buyer Base Reward: 5 coins
Seller Base Reward: 2 coins
Agreed Price: 3 coins
Scores:
Buyer: 5 - 3 = 2 coins
Seller: 3 - 2 = 1 coin
Failed Deal: If the seller rejects the buyer’s offer, neither party gains any profit for that round.
4. Supporting Players’ Fixed Behaviors:
Supporting players may have predefined responses, such as always accepting certain prices, simplifying negotiation strategies for main players while maintaining game dynamics.
5. Game Iterations:
The game consists of multiple rounds, with scores accumulated across interactions. Agents adapt their strategies based on outcomes and opposing agents' behaviors.
6. Narrative Feedback:
The simulation provides detailed summaries of outcomes, reinforcing strategic and profit-oriented thinking:
Example of a successful deal: "Alice and Bob agreed on a price of 3 coins. Alice gained 2 coins in profit, while Bob gained 1 coin."
Example of a failed deal: "Alice and Bob couldn't agree on a price, and the deal fell through."
This system rewards agents who can negotiate effectively, balancing assertiveness and fairness to maximize cumulative profits over the course of the game.


SCENARIO 4: LABOR COLLECTIVE ACTION

Summary of the Scenario
This scenario simulates a labor strike in an industrial setting, focusing on worker-boss dynamics. The players include workers, a labor organizer, and a boss. Workers decide daily whether to cooperate (join the strike) or defect (go to work). Their decisions influence wages and financial standings. The boss decides whether to maintain reduced wages or concede to strike pressures, affecting overall morale and income distribution. The game progresses through discussion and decision scenes, with daily wages and expenses shaping the players' financial stability.

Score Calculation for Each Agent
1. Worker Agents (Main Players):
Initial Endowment: Workers start with a set amount of coins (e.g., 5.0 coins).
Daily Actions:
Defection (Go to Work):
Workers earn the current wage but may harm collective bargaining efforts.
Formula:
New Balance = Current Balance + Wage - Expenses
Cooperation (Join the Strike):
Workers do not earn wages but reduce the boss's financial leverage.
Formula:
New Balance = Current Balance - Expenses
Daily Expenses: A fixed amount is deducted for living costs, regardless of action.
2. Boss (Antagonist):
Initial Endowment: Starts with a significant financial advantage (e.g., 100.0 coins).
Wage Payment:
Pays each worker who defects (goes to work).
Formula:
New Balance = Current Balance - (Wage × Number of Defecting Workers)
Strike Pressure: Rising strike pressure can force the boss to raise wages, further reducing their financial reserves.
Wage Decisions:
Cave to Pressure: Raises wages, increasing costs but potentially ending the strike sooner.
Hold Firm: Maintains reduced wages, preserving short-term finances but risking long-term disruptions.
3. Labor Organizer:
Supports strike efforts, influencing workers to cooperate. The organizer's score isn't financial but reflects their success in rallying workers and achieving wage increases.
4. Scoring Metrics:
Workers' Financial Stability:
Workers aim to maintain positive balances by balancing daily wages and expenses.
Success is defined by ending the simulation with sufficient coins to meet basic needs.
Boss's Financial Health:
The boss aims to minimize losses while maintaining control over the workforce.
Success is measured by retaining the largest financial balance while avoiding excessive strike pressure.
Strike Outcome:
A successful strike (forcing wage increases) scores indirectly for the workers and organizer.
5. Outcome Impact:
Economic Balance: Decisions affect the distribution of wealth among workers and the boss.
Collective vs. Individual Success: Cooperative workers may gain less individually but benefit collectively through wage increases.
By modeling individual and collective incentives, the simulation captures the complexity of labor negotiations and the trade-offs between immediate gains and long-term stability.


SCENARIO 5: LONDON ESOTERIC MARKET 1870

Summary of the Scenario
This scenario takes place in Victorian London (1870) and revolves around a bustling marketplace where players engage in the trade of rare alchemical texts and other items of mysticism. The main characters, Doctor Cornelius Ashmole and Madame Esmeralda Dee, aim to acquire two legendary texts: the Tabula Smaragdina and Secreta Secretorum. Supporting characters, Professor Aldous Pendleton and Molly "Poppy" Jennings, are struggling with financial hardship and seek to sell these rare items to the highest bidder. The simulation progresses through day and night scenes where characters negotiate trades, spend resources, and attempt to achieve their goals.

Score Calculation for Each Agent
1. Main Characters (Doctor Cornelius Ashmole and Madame Esmeralda Dee):
Initial Endowment:
Start with coins (5.0), laudanum bottles (2.0), and no alchemical texts.
Scoring Targets:
Points are awarded for obtaining:
Tabula Smaragdina: +1 point.
Secreta Secretorum: +1 point.
Players aim to acquire both texts by strategically trading resources.
Actions and Resources:
Spend coins or laudanum bottles to purchase alchemical texts.
Score Formula:
Total Score = Number of Acquired Alchemical Texts
(up to a maximum of 2 points).
2. Supporting Characters (Professor Aldous Pendleton and Molly "Poppy" Jennings):
Initial Endowment:
Own one alchemical text each:
Aldous Pendleton: Tabula Smaragdina.
Molly Jennings: Secreta Secretorum.
Start with no coins or laudanum bottles.
Scoring Targets:
Earn points based on resources gained through trades:
Coins and laudanum bottles are converted to points.
Score Formula:
Total Score = (Coins Gained) + (Laudanum Bottles Gained).
3. Scoring Mechanics for All Characters:
Trades and Negotiations:
Main characters attempt to acquire alchemical texts by offering resources.
Supporting characters aim to maximize their resources through trades.
Winning Conditions:
Main Characters: Achieve the highest score by obtaining both rare texts.
Supporting Characters: Achieve the highest score by maximizing resources.
This scenario emphasizes strategic negotiation and resource management, where players must balance their actions to achieve their unique goals.


SCENARIO 6: PUB COORDINATION

Summary of the Scenario
This simulation is set in a fictional world where agents (players) decide which pub to attend to watch a sports game. The agents have individual preferences for specific pubs and are influenced by their relationships with other players, their favorite pub, and the availability of venues. The scenario involves configuring agents, their memories, and a decision-making process for choosing a pub while considering the social dynamics and other constraints such as closed venues.

Score Calculation for Each Agent
Choice of Pub:

Agents receive a base score based on whether the pub they chose aligns with their preferences:
Maximum multiplier for their favorite pub.
Reduced multiplier for other pubs.
Zero score if they choose a closed pub.
Social Relationships:

The score increases based on the number of friends or important social connections who make the same choice. The relationship matrix determines the weight of each relationship.
If all their friends attend the same pub, the agent receives the maximum relational reward.
Outcome Description:

Scores are further categorized into descriptive feedback for each agent:
Positive: High scores (e.g., great time).
Neutral: Moderate scores (e.g., ok time).
Negative: Low or zero scores (e.g., bad or worst time).
Penalty for Closed Venues:

Choosing a closed venue results in a score of zero, regardless of other factors.
Summative Scoring:

Each agent's total score is the sum of rewards from their choice of pub, social relationships, and adherence to preferences across all rounds.
This scoring system ensures that agents prioritize attending their preferred venues while also encouraging social cohesion by aligning their decisions with those of their friends.


SCENARIO 7: REALITY SHOW

Summary of the Scenario
This scenario is set in a reality TV show where contestants participate in a series of game theory-based mental and social challenges. Each "minigame" simulates a classic game-theoretic dilemma (e.g., Prisoner’s Dilemma or Stag Hunt) with rules designed to test the players' ability to strategize, collaborate, or betray one another. The contestants do not know the number of rounds in each minigame, adding uncertainty to their decisions. Between minigames, contestants interact in a "break room," providing opportunities to form alliances or influence others.

Score Calculation for Each Agent
1. Minigame Scoring
Structure:
Each minigame maps joint player actions (e.g., cooperation or defection) to rewards using a Schelling Diagram:
Cooperation Rewards: Points are awarded if players work together to maximize group benefits.
Defection Rewards: Points are awarded for betraying other players while reducing their gains.
Outcome Metrics:
Individual Rewards: Each player earns a score per round based on their actions and the joint outcomes.
Cumulative Rewards: The total score for a player across all rounds of a minigame.
Summary:
At the end of each round, scores are summarized:
Player action (e.g., "cooperate" or "defect").
Points earned for the round.
Comparison of their score to the average player score.
2. Break Room Interaction
Players can discuss strategies, build alliances, or influence others during the "conversation" phases between games. While not directly scored, these interactions affect future game outcomes.
3. Final Scores
Cumulative Total: Players’ final scores are calculated by summing up their cumulative rewards across all minigames.
Winning Conditions:
The player with the highest total score is declared the winner of the reality show.
Players may also receive narrative feedback during the debrief phase, providing insights into their strategies and social dynamics.
This scenario emphasizes strategic decision-making under uncertainty, the balancing of individual versus collective interests, and the impact of social interactions on long-term outcomes.


SCENARIO 8: STATE FORMATION

Summary of the Scenario
This scenario involves two villages, each led by an elder, navigating diplomatic negotiations to address shared challenges, such as barbarian raids and agricultural resource shortages. The simulation incorporates negotiation scenes, daily activities, and collective decision-making to balance village prosperity and defense. Key elements include the pooling of agricultural resources and the distribution of time between farming, warrior training, and leisure.

Score Calculation for Each Agent
1. Core Scoring Components
Defense Score:

Calculated based on the collective warrior training activities of each village.
If the overall defense (mean of both villages) exceeds a predefined threshold, the villages successfully repel the barbarians, earning a score of 1. Otherwise, the score is 0.
Failure to defend results in events describing destruction or losses caused by barbarians.
Agriculture Score:

Determined by the farming activities of each village.
If villages agree to pool resources, their agricultural production is treated as the maximum of all villages' outputs, increasing resilience against food shortages.
Without an agreement, villages rely solely on their individual outputs.
A score of 1 is awarded if the village's agricultural output exceeds the starvation threshold; otherwise, it is 0.
Free Time Contribution:

Each village member's free time contributes proportionally to their score, emphasizing individual well-being and cultural activities.
Free time is weighted more heavily if other basic needs (defense and food) are met.
2. Total Score for Each Agent
Score Formula:

Total Score
=
(
Defense Score
)
×
(
Agriculture Score
)
×
(
Free Time Contribution
)
Total Score=(Defense Score)×(Agriculture Score)×(Free Time Contribution)
The Defense and Agriculture scores act as gates; a failure in either reduces the overall score to zero.
Events Affecting Scores:

Successful Defense: Adds narrative feedback about repelling barbarians.
Starvation: Adds narrative feedback about food shortages and their impact.
Treaty or No Treaty: Describes the consequences of resource pooling agreements.
3. Individualized Feedback
At the debrief stage, players reflect on their performance and decisions:
How well they balanced resources and defended their village.
Their negotiation effectiveness and whether agreements were reached.
Personal contributions to the village’s prosperity.
This scoring system encourages strategic decision-making in resource allocation, collaboration, and balancing collective and individual priorities.


Role-playing goal for your agent:

"""

  return advice


def _get_class_name(object_: object) -> str:
  return object_.__class__.__name__


def build_agent(
    *,
    config: formative_memories.AgentConfig,
    model: language_model.LanguageModel,
    memory: associative_memory.AssociativeMemory,
    clock: game_clock.MultiIntervalClock,
    update_time_interval: datetime.timedelta | None = None,
) -> entity_agent_with_logging.EntityAgentWithLogging:
  """Build an agent.

  Args:
    config: The agent config to use.
    model: The language model to use.
    memory: The agent's memory object.
    clock: The clock to use.
    update_time_interval: Agent calls update every time this interval passes.

  Returns:
    An agent.
  """
  del update_time_interval
  if not config.extras.get('main_character', False):
    raise ValueError('This function is meant for a main character '
                     'but it was called on a supporting character.')

  agent_name = config.name

  raw_memory = legacy_associative_memory.AssociativeMemoryBank(memory)

  measurements = measurements_lib.Measurements()
  instructions = agent_components.instructions.Instructions(
      agent_name=agent_name,
      logging_channel=measurements.get_channel('Instructions').on_next,
  )

  observation_label = '\nObservation'
  observation = agent_components.observation.Observation(
      clock_now=clock.now,
      timeframe=clock.get_step_size(),
      pre_act_key=observation_label,
      logging_channel=measurements.get_channel('Observation').on_next,
  )
  observation_summary_label = '\nSummary of recent observations'
  observation_summary = agent_components.observation.ObservationSummary(
      model=model,
      clock_now=clock.now,
      timeframe_delta_from=datetime.timedelta(hours=24),
      timeframe_delta_until=datetime.timedelta(hours=0),
      pre_act_key=observation_summary_label,
      logging_channel=measurements.get_channel('ObservationSummary').on_next,
  )
  time_display = agent_components.report_function.ReportFunction(
      function=clock.current_time_interval_str,
      pre_act_key='\nCurrent time',
      logging_channel=measurements.get_channel('TimeDisplay').on_next,
  )
  identity_label = '\nIdentity characteristics'
  identity_characteristics = (
      agent_components.question_of_query_associated_memories.IdentityWithoutPreAct(
          model=model,
          logging_channel=measurements.get_channel(
              'IdentityWithoutPreAct'
          ).on_next,
          pre_act_key=identity_label,
      )
  )
  self_perception_label = (
      f'\nQuestion: What kind of person is {agent_name}?\nAnswer')
  self_perception = agent_components.question_of_recent_memories.SelfPerception(
      model=model,
      components={_get_class_name(identity_characteristics): identity_label},
      pre_act_key=self_perception_label,
      logging_channel=measurements.get_channel('SelfPerception').on_next,
  )
  situation_perception_label = (
      f'\nQuestion: What kind of situation is {agent_name} in '
      'right now?\nAnswer')
  situation_perception = (
      agent_components.question_of_recent_memories.SituationPerception(
          model=model,
          components={
              _get_class_name(observation): observation_label,
              _get_class_name(observation_summary): observation_summary_label,
          },
          clock_now=clock.now,
          pre_act_key=situation_perception_label,
          logging_channel=measurements.get_channel(
              'SituationPerception'
          ).on_next,
      )
  )
  person_by_situation_label = (
      f'\nQuestion: What would a person like {agent_name} do in '
      'a situation like this?\nAnswer')
  person_by_situation = (
      agent_components.question_of_recent_memories.PersonBySituation(
          model=model,
          components={
              _get_class_name(self_perception): self_perception_label,
              _get_class_name(situation_perception): situation_perception_label,
          },
          clock_now=clock.now,
          pre_act_key=person_by_situation_label,
          logging_channel=measurements.get_channel('PersonBySituation').on_next,
      )
  )
  relevant_memories_label = '\nRecalled memories and observations'
  relevant_memories = agent_components.all_similar_memories.AllSimilarMemories(
      model=model,
      components={
          _get_class_name(observation_summary): observation_summary_label,
          _get_class_name(time_display): 'The current date/time is'},
      num_memories_to_retrieve=10,
      pre_act_key=relevant_memories_label,
      logging_channel=measurements.get_channel('AllSimilarMemories').on_next,
  )

  plan_components = {}
  if config.goal:
    goal_label = '\nOverarching goal'
    general_advice = get_general_advice()
    overarching_goal = agent_components.constant.Constant(
        state=general_advice + config.goal,
        pre_act_key=goal_label,
        logging_channel=measurements.get_channel(
            DEFAULT_GOAL_COMPONENT_NAME
        ).on_next,
    )
    plan_components[DEFAULT_GOAL_COMPONENT_NAME] = goal_label
  else:
    overarching_goal = None

  plan_components.update({
      _get_class_name(relevant_memories): relevant_memories_label,
      _get_class_name(self_perception): self_perception_label,
      _get_class_name(situation_perception): situation_perception_label,
      _get_class_name(person_by_situation): person_by_situation_label,
  })
  plan = agent_components.plan.Plan(
      model=model,
      observation_component_name=_get_class_name(observation),
      components=plan_components,
      clock_now=clock.now,
      goal_component_name=_get_class_name(person_by_situation),
      horizon=DEFAULT_PLANNING_HORIZON,
      pre_act_key='\nPlan',
      logging_channel=measurements.get_channel('Plan').on_next,
  )

  entity_components = (
      # Components that provide pre_act context.
      instructions,
      observation,
      observation_summary,
      relevant_memories,
      self_perception,
      situation_perception,
      person_by_situation,
      plan,
      time_display,
      # Components that do not provide pre_act context.
      identity_characteristics,
  )
  components_of_agent = {_get_class_name(component): component
                         for component in entity_components}
  components_of_agent[
      agent_components.memory_component.DEFAULT_MEMORY_COMPONENT_NAME] = (
          agent_components.memory_component.MemoryComponent(raw_memory))
  component_order = list(components_of_agent.keys())
  if overarching_goal is not None:
    components_of_agent[DEFAULT_GOAL_COMPONENT_NAME] = overarching_goal
    # Place goal after the instructions.
    component_order.insert(1, DEFAULT_GOAL_COMPONENT_NAME)

  act_component = agent_components.concat_act_component.ConcatActComponent(
      model=model,
      clock=clock,
      component_order=component_order,
      logging_channel=measurements.get_channel('ActComponent').on_next,
  )

  agent = entity_agent_with_logging.EntityAgentWithLogging(
      agent_name=agent_name,
      act_component=act_component,
      context_components=components_of_agent,
      component_logging=measurements,
      config=config,
  )

  return agent


def save_to_json(
    agent: entity_agent_with_logging.EntityAgentWithLogging,
) -> str:
  """Saves an agent to JSON data.

  This function saves the agent's state to a JSON string, which can be loaded
  afterwards with `rebuild_from_json`. The JSON data
  includes the state of the agent's context components, act component, memory,
  agent name and the initial config. The clock, model and embedder are not
  saved and will have to be provided when the agent is rebuilt. The agent must
  be in the `READY` phase to be saved.

  Args:
    agent: The agent to save.

  Returns:
    A JSON string representing the agent's state.

  Raises:
    ValueError: If the agent is not in the READY phase.
  """

  if agent.get_phase() != entity_component.Phase.READY:
    raise ValueError('The agent must be in the `READY` phase to be saved.')

  data = {
      component_name: agent.get_component(component_name).get_state()
      for component_name in agent.get_all_context_components()
  }

  data['act_component'] = agent.get_act_component().get_state()

  config = agent.get_config()
  if config is not None:
    data['agent_config'] = config.to_dict()

  return json.dumps(data)


def rebuild_from_json(
    json_data: str,
    model: language_model.LanguageModel,
    clock: game_clock.MultiIntervalClock,
    embedder: Callable[[str], np.ndarray],
    memory_importance: Callable[[str], float] | None = None,
) -> entity_agent_with_logging.EntityAgentWithLogging:
  """Rebuilds an agent from JSON data."""

  data = json.loads(json_data)

  new_agent_memory = associative_memory.AssociativeMemory(
      sentence_embedder=embedder,
      importance=memory_importance,
      clock=clock.now,
      clock_step_size=clock.get_step_size(),
  )

  if 'agent_config' not in data:
    raise ValueError('The JSON data does not contain the agent config.')
  agent_config = formative_memories.AgentConfig.from_dict(
      data.pop('agent_config')
  )

  agent = build_agent(
      config=agent_config,
      model=model,
      memory=new_agent_memory,
      clock=clock,
  )

  for component_name in agent.get_all_context_components():
    agent.get_component(component_name).set_state(data.pop(component_name))

  agent.get_act_component().set_state(data.pop('act_component'))

  assert not data, f'Unused data {sorted(data)}'
  return agent
