import simpy
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import logging
import random
from pathlib import Path
import json5

# ────────────────────────────────────────────────────────────────────
# 1) Bring in your actor classes so we can assemble the sim here
# ────────────────────────────────────────────────────────────────────
from organisation.env.clinical_trial.core.actors import Actor
from organisation.env.clinical_trial.core.actor_factory import ActorFactory
from organisation.env.clinical_trial.core.messages import Message
from organisation.env.clinical_trial.core.llm_client import create_llm_client
from organisation.env.clinical_trial.core.tasks import TaskStatus

# from organisation.env.clinical_trial.core.task_assessor import TaskAssessor
from organisation.env.clinical_trial.core.actor_factory import ROLE_TO_CLASS
from organisation.env.config import (
    LLM_ENGINE,
    TIME_INCREMENT,
    MAX_STEPS,
    PHASES,
    INITIAL_PHASE,
    RANDOM_DRUG,
    WORKED_HOURS_LIMIT,
)
from organisation.env.clinical_trial.core.monitoring import (
    trace,
    make_event_monitor,
    parse_monitored_event,
)


logger = logging.getLogger(__name__)
# silence all LiteLLM INFO (and DEBUG) logs—only WARNING and above will show
logging.getLogger("LiteLLM").setLevel(logging.WARNING)

# if you also want to mute httpx INFO logs
logging.getLogger("httpx").setLevel(logging.WARNING)

logging.getLogger("Actor").setLevel(logging.INFO)

logger.setLevel(logging.DEBUG)


# ─────────────────────────────────────────────────────────────
# 2) Define the Simulation class right here
# ─────────────────────────────────────────────────────────────
class ClinicalTrialSimulation:
    """
    Minimal SimPy-based clinical-trial "engine": holds an env and a list of actors.
    """

    def __init__(self, seed: int):
        # 1) Create the SimPy world
        self.env = simpy.Environment()

        # PHASE MANAGEMENT
        self._phases = list(PHASES)
        # start at INITIAL_PHASE if present, else index 0
        self._phase_idx = (
            self._phases.index(INITIAL_PHASE) if INITIAL_PHASE in self._phases else 0
        )
        self.phase = self._phases[self._phase_idx]
        # --- MONITORING: attach a tracer right away (before anything can fire) ---
        self.monitored_events: list[tuple[float, int, dict]] = []
        _cb = make_event_monitor(self.monitored_events)
        trace(self.env, _cb)

        # 2) Build one shared LLM client for all actors.
        #    llm_client.py now loads Azure config (.yml + .env) automatically when LLM_ENGINE == "AzureOpenAI",
        #    and uses QWEN_API_BASE for "qwen" model.
        client = create_llm_client(LLM_ENGINE)
        llm_kwargs = None  # no per-call config needed; llm_client handles it internally

        # 3) Randomly select the target drug for the clinical trial
        path = Path("organisation/data/clinical_trial/drugs.json")
        text = path.read_text(encoding="utf-8")
        # json5 will ignore // and /* */ comments, and allow trailing commas:
        all_data = json5.loads(text)
        new_drugs = [drug for drug in all_data if not drug.get("is_competitor", False)]

        if RANDOM_DRUG:
            self.drug = random.choice(new_drugs)
        else:
            self.drug = new_drugs[seed % len(new_drugs)]

        # self.drug = new_drugs[4]

        # 4) Instantiate each role as an Actor with ActorFactory
        factory = ActorFactory(
            env=self.env,
            simulation=self,
            llm_client=client,
            llm_kwargs=llm_kwargs,
        )
        # track actor lifespans
        self.actor_created: dict[int, float] = {}
        self.actor_deleted: dict[int, float] = {}

        self.actors, self.external_factors = factory.create_actors()
        self.trial_interrupted = False

        # Mark which actor IDs are ORIGINALS (created by the factory at startup)
        self.original_actor_ids: set[int] = {a.actor_id for a in self.actors}

        # Stamp originals creation time
        t0 = float(self.env.now)  # should be 0.0
        for a in self.actors:
            self.actor_created[a.actor_id] = t0

        # figure out next free actor_id
        used_ids = [a.actor_id for a in self.actors]
        self.next_actor_id = max(used_ids, default=-1) + 1

        # store the llm args so add_actor can use them:
        self._llm_client = factory.llm_client
        self._llm_kwargs = factory.llm_kwargs

        # 5) Create study registry
        self.env.studies = {}

    # Helpers to inspect tasks across all actors
    def _tasks_for_phase(self, phase: str, mandatory_only: bool = True):
        tasks = [
            t
            for a in self.actors
            for t in a.tasks.values()
            if t.phase == phase and (t.mandatory if mandatory_only else True)
        ]
        return tasks

    def is_current_phase_complete(self) -> bool:
        """
        Phase is considered complete when all **mandatory** tasks in this phase
        are COMPLETED. If there are no mandatory tasks, and there are any tasks
        in the phase, require all of them to be COMPLETED.
        """
        phase = self.phase
        mandatory = self._tasks_for_phase(phase, mandatory_only=True)
        if mandatory:
            return all(t.status == TaskStatus.COMPLETED for t in mandatory)

        # fallback: if no mandatory tasks, require all tasks in phase to be done
        any_tasks = self._tasks_for_phase(phase, mandatory_only=False)
        if any_tasks:
            return all(t.status == TaskStatus.COMPLETED for t in any_tasks)
        # no tasks at all in this phase -> treat as complete
        return True

    def advance_phase_if_ready(self) -> bool:
        if not self.is_current_phase_complete():
            return False
        if self._phase_idx >= len(self._phases) - 1:
            return False  # already at last phase
        old = self.phase
        self._phase_idx += 1
        self.phase = self._phases[self._phase_idx]

        # notify actors and emit a monitoring event
        try:
            self.env.event().succeed(
                {"type": "Phase changed", "Details": {"from": old, "to": self.phase}}
            )
        except Exception:
            pass
        for a in self.actors:
            a.info(f"➡️ Phase advanced: {old} → {self.phase}")
        return True

    def all_mandatory_tasks_completed(self) -> bool:
        """
        Return True iff **every** Task with `mandatory=True` is in TaskStatus.COMPLETED.
        If there are no mandatory tasks at all, returns False (so you don't end
        immediately on an empty task set).
        """
        # collect all tasks marked mandatory
        mandatory_tasks = [
            t
            for actor in self.actors
            for t in actor.tasks.values()
            if getattr(t, "mandatory", False)
        ]
        if not mandatory_tasks:
            return False

        return all(t.status == TaskStatus.COMPLETED for t in mandatory_tasks)

    def add_actor(self, role: str, actor_id: int | None = None):
        """
        Spawn a new actor of the given org_role.
        If actor_id is provided, use it; otherwise allocate the next free id.
        """
        from organisation.env.clinical_trial.core.actor_factory import ROLE_TO_CLASS
        from organisation.env.clinical_trial.core.incentives import (
            generate_random_incentive,
        )

        cls = ROLE_TO_CLASS.get(role)
        if cls is None:
            raise ValueError(f"Cannot spawn unknown role {role!r}")

        new_id = self.next_actor_id if actor_id is None else int(actor_id)
        # ensure global unique and keep next_actor_id monotonic
        self.next_actor_id = max(self.next_actor_id, new_id + 1)

        incentive = generate_random_incentive(role)
        actor = cls(
            env=self.env,
            simulation=self,
            actor_id=new_id,
            llm_client=self._llm_client,
            llm_kwargs=self._llm_kwargs,
            incentive_text=incentive,
            task_dictionary=[],
        )
        self.actors.append(actor)
        self.actor_created[new_id] = float(self.env.now)
        return actor

    # No run_simulation_step() needed: the Gym wrapper will advance `env` directly.


# ─────────────────────────────────────────────────────────────
# 3) The Gym wrapper around ClinicalTrialSimulation
# ─────────────────────────────────────────────────────────────
class OrganisationEnv(gym.Env):
    """
    Gym environment driving a SimPy clinical trial with multiple actors.
    ActionSpace: each actor picks 0-3:
       0 = inactive
       1 = reason()
       2 = communicate_sync(...)
       3 = communicate_async(...)
    Observation: current SimPy time as a single float.

    Returns richer observations and vectorized actions.
    """

    metadata = {"render_modes": ["none"], "render_fps": 4}

    ACTION_MAP = {
        0: lambda actor, env: None,
        1: lambda actor, env: actor.reason(),
        2: lambda actor, env: actor.communicate_sync(
            participants=[a for a in env.simulation.actors if a is not actor]
        ),
        3: lambda actor, env: env._orchestrate_async(actor),
    }

    def __init__(self, simulation_cls, orchestrated_mode=True):
        super().__init__()
        logger.info(f"Setting up environment — orchestrated_mode={orchestrated_mode}")

        self.orchestrated_mode = orchestrated_mode

        # store the class so we can rebuild from scratch on reset
        self.simulation_cls = simulation_cls

        # horizon and step‐size
        self.max_steps = MAX_STEPS
        self.dt = TIME_INCREMENT

        # build one sim to see how many actors we've got
        self._init_simulation(seed=0)
        self.num_actors = len(self.simulation.actors)

        # N actors, each with 4 discrete choices
        self.action_space = spaces.MultiDiscrete([4] * self.num_actors)

        # observe only the sim clock
        self.observation_space = spaces.Dict(
            {
                "time": spaces.Box(
                    0, self.max_steps + self.dt, shape=(1,), dtype=np.float32
                ),
                "messages_sent": spaces.Box(0, np.inf, shape=(1,), dtype=np.int32),
                "pending_meetings": spaces.MultiDiscrete([2] * self.num_actors),
            }
        )

        # Remember whether we already aligned
        self._aligned_once = False

    def _orchestrate_async(self, actor, recipients=None):
        """
        Ask actor for the content, then broadcast to everyone else.
        """
        content = actor.communicate_async(recipients)
        if not content:
            return
        if recipients is None:
            recipients = [
                other for other in self.simulation.actors if other is not actor
            ]

        for recipient in recipients:
            msg = Message(
                env=self.simulation.env,
                content=content,
                sender=actor.org_role,
                recipient=recipient.org_role,
                comm_type="async",
            )
            actor.logger.info(
                f"→ async communication to={recipient.org_role}:{recipient.actor_id}: {content}"
            )
            recipient.receive_message(msg)
        actor.receive_message(
            Message(
                env=self.simulation.env,
                content=content,
                sender=actor.org_role,
                recipient=", ".join([x.org_role for x in recipients]),
                comm_type="async",
            )
        )

    def get_full_memory_state(self) -> dict[int, list[Message]]:
        """
        Collect full memory from all actors and return as a dict:
        { actor_name: [Message, ...] }
        """
        return {
            actor.actor_id: actor.get_all_messages() for actor in self.simulation.actors
        }

    def _init_simulation(self, seed):
        """(Re)create the SimPy world + actors."""
        # Clear the global Actor registry
        Actor._registry.clear()
        self.simulation = self.simulation_cls(seed)
        self.drug = self.simulation.drug

        for a in self.simulation.actors:
            a.messages_sent = 0
            a.tokens_produced = 0

    def reset(self, *, seed=None):
        """
        Rebuild the sim (time→0) and return (obs, info).
        """
        super().reset(seed=seed)
        random.seed(seed)
        np.random.seed(seed)
        self._init_simulation(seed)
        obs = self._build_obs()
        return obs, {}

    def is_valid(self, action: dict) -> bool:
        """
        Validate meeting symmetry/recipient correctness among keys present in `action`.
        Only considers entries whose keys look like "Role:Id".
        """
        # id -> "Role:Id" key for the *current* actors
        id_to_key = {
            a.actor_id: f"{a.org_role}:{a.actor_id}" for a in self.simulation.actors
        }

        for actor_key, a_r in action.items():
            # skip non-actor keys if present

            assert isinstance(actor_key, str)
            if ":" in actor_key:
                assert len(actor_key.split(":")) == 2
                role, actor_id = actor_key.split(":")
            else:
                actor_id = actor_key
            try:
                actor_id = int(actor_id)
            except Exception:
                raise Exception(f"Invalid actor key: {actor_key!r}")

            if (not isinstance(a_r, (list, tuple))) or len(a_r) != 2:
                raise Exception(f"Invalid action payload for {actor_key!r}: {a_r!r}")

        for actor_key, a_r in action.items():
            # Normalize payload shape
            if not isinstance(a_r, (list, tuple)):
                a_r = (a_r, [])
            if len(a_r) != 2:
                raise Exception(f"Invalid action payload for {actor_key!r}: {a_r!r}")

            a, recipients = a_r

            assert a in [
                0,
                1,
                2,
                3,
                "inactive",
                "reasoning",
                "sync_communication",
                "communicate_sync",
                "async_communication",
                "communicate_async",
            ], f"Invalid action: actor {actor_key} has invalid action {a}"

            norm_recips = self._normalize_recipient_ids(recipients)

            if a in [2, "communicate_sync"]:
                try:
                    this_id = int(actor_key.split(":")[-1])
                except Exception:
                    raise Exception(f"Invalid actor key: {actor_key!r}")

                if this_id not in norm_recips:
                    raise Exception(
                        f"Invalid action: actor {actor_key} is not in recipients of its own meeting"
                    )

                for rid in norm_recips:
                    other_key = id_to_key.get(rid)
                    if other_key is None or other_key not in action:
                        raise Exception(
                            f"Invalid action: recipient {rid} has no action entry"
                        )
                    other_a, other_recips = action[other_key]
                    if other_a not in [2, "communicate_sync"]:
                        raise Exception(
                            f"Invalid action: recipient {other_key} is not in communicate_sync"
                        )
                    other_ids = self._normalize_recipient_ids(other_recips)
                    if other_ids != norm_recips:
                        raise Exception(
                            f"Invalid action: {other_key} has different recipients than {actor_key}"
                        )

        return True

    def _normalize_recipient_ids(self, recips):
        """
        Accept [int, '2', 'Investigator:2', ...] → return [2, ...] (ints).
        """
        norm = []
        for r in recips:
            if isinstance(r, int):
                norm.append(r)
            elif isinstance(r, str):
                try:
                    norm.append(int(r.split(":")[-1]))  # allow '2' or 'Role:2'
                except Exception as e:
                    raise ValueError(f"Invalid recipient identifier: {r!r}") from e
            else:
                raise ValueError(f"Invalid recipient type: {type(r).__name__}")
        return norm

    def _actors_by_role(self) -> dict[str, list]:
        class_to_role = {cls: role for role, cls in ROLE_TO_CLASS.items()}
        out: dict[str, list] = {}
        for a in self.simulation.actors:
            role = class_to_role.get(type(a))
            if role:
                out.setdefault(role, []).append(a)
        return out

    def _pending_by_role(self) -> dict[str, int]:
        by_role = self._actors_by_role()
        pending: dict[str, int] = {}
        for role, actors in by_role.items():
            count = 0
            for a in actors:
                count += sum(
                    1 for t in a.tasks.values() if t.status != TaskStatus.COMPLETED
                )
            pending[role] = count
        return pending

    def _rebalance_role_tasks_to_new_actor(self, role: str, new_actor) -> None:
        """
        Move a fair share of NOT_STARTED tasks of `role` from the most-loaded actors
        to `new_actor`. We do not move tasks that are IN_PROGRESS/COMPLETED.
        """

        peers = [
            a
            for a in self.simulation.actors
            if a is not new_actor and a.org_role == new_actor.org_role
        ]
        if not peers:
            return

        peer_loads = []
        for a in peers:
            not_started = [
                t for t in a.tasks.values() if t.status == TaskStatus.NOT_STARTED
            ]
            peer_loads.append((a, not_started))
        peer_loads.sort(key=lambda x: len(x[1]), reverse=True)

        total_not_started = sum(len(lst) for _, lst in peer_loads)
        target_each = total_not_started // (len(peers) + 1)

        moved = 0
        for peer, not_started in peer_loads:
            if moved >= target_each:
                break
            while not_started and moved < target_each:
                task = not_started.pop(0)
                if task.name in peer.tasks and task.status == TaskStatus.NOT_STARTED:
                    peer.tasks.pop(task.name, None)
                    new_actor.tasks[task.name] = task
                    moved += 1

    def step(self, action):
        """
        Execute the given action, advance the simulation, and return (obs, reward, done, truncated, info).
        """

        # ─────────────────────────────────────────────────────────────
        # 0) get the previous number of messages sent
        #    (for the reward calculation)
        # ─────────────────────────────────────────────────────────────
        # prev_msgs = sum(a.messages_sent for a in self.simulation.actors)
        in_meeting = set()

        # ─────────────────────────────────────────────────────────────
        # Orchestrated branch (dict): align actors to config, validate, execute
        # ─────────────────────────────────────────────────────────────
        if self.orchestrated_mode and isinstance(action, dict):
            # 1) Align live actors strictly to the keys present in the config
            #    (spawn/delete to match Role:Id keys; ignore any legacy 'spawn'/'delete' keys)
            try:
                self._align_actors_to_config(action, source="config")
            except Exception as e:
                logger.error(f"Config alignment failed: {e}")
                obs = self._build_obs()
                info = {
                    "full_memory_state": {},
                    "metrics": {
                        "messages_sent": sum(
                            a.messages_sent for a in self.simulation.actors
                        ),
                        "tokens_produced": sum(
                            a.tokens_produced for a in self.simulation.actors
                        ),
                        "sim_time": self.simulation.env.now,
                    },
                    "monitoring": {
                        "events_count": len(self.simulation.monitored_events),
                        "summary": parse_monitored_event(
                            self.simulation.monitored_events
                        ),
                    },
                    "invalid_action": f"alignment: {e}",
                }
                return obs, -1.0, False, False, info

            # 2) Validate *after* alignment (so ids/keys exist)
            try:
                self.is_valid(action)
            except Exception as e:
                logger.error(f"Invalid action: {e}")
                obs = self._build_obs()
                info = {
                    "full_memory_state": {},
                    "metrics": {
                        "messages_sent": sum(
                            a.messages_sent for a in self.simulation.actors
                        ),
                        "tokens_produced": sum(
                            a.tokens_produced for a in self.simulation.actors
                        ),
                        "sim_time": self.simulation.env.now,
                    },
                    "monitoring": {
                        "events_count": len(self.simulation.monitored_events),
                        "summary": parse_monitored_event(
                            self.simulation.monitored_events
                        ),
                    },
                    "invalid_action": str(e),
                }
                return obs, -1.0, False, False, info

            # 3) Execute per-actor directives for the actors listed in the config.
            #    Actors not listed were already deleted by alignment.
            for actor in list(self.simulation.actors):
                if self.simulation.trial_interrupted:
                    break

                key = f"{actor.org_role}:{actor.actor_id}"

                plan = action.get(key, ("inactive", []))
                act_name, recipients = plan
                recipients = self._normalize_recipient_ids(recipients)

                # if this actor's attention is locked (by a tool or a meeting), do nothing
                if actor.attention.users and act_name not in [
                    0,
                    "inactive",
                ]:  # simpy Resource has `users` when held
                    logger.debug(
                        f"⏳ {key} attention locked by {actor.activity}; skipping planned '{act_name}'."
                    )
                    continue

                if (actor.active_meeting is not None) and (
                    act_name not in ["communicate_sync", 2]
                ):
                    logger.info(f"Meeting interrupted for participants {recipients}")
                    actor.active_meeting.interrupt()

                if actor.actor_id in in_meeting:
                    continue

                if act_name == "reasoning" or act_name == 1:
                    self.simulation.env.process(actor.reason())

                elif act_name == "communicate_async" or act_name == 3:
                    other_participants = [
                        a for a in self.simulation.actors if a.actor_id in recipients
                    ]
                    other_participants.sort(key=lambda x: recipients.index(x.actor_id))
                    self._orchestrate_async(actor, other_participants)

                elif act_name == "communicate_sync" or act_name == 2:
                    other_participants = [
                        a for a in self.simulation.actors if a.actor_id in recipients
                    ]

                    self.simulation.env.event().succeed(
                        {
                            "type": "CommunicatingSync",
                            "Details": {
                                "actor_id": actor.actor_id,
                                "actor_type": actor.org_role,
                                "time": self.simulation.env.now,
                                "recipients": [x.actor_id for x in other_participants],
                            },
                        }
                    )
                    other_participants.sort(key=lambda x: recipients.index(x.actor_id))
                    in_meeting |= {*recipients}

                    existing = actor.active_meeting
                    logger.debug(f"Existing meeting fetch: {(existing is not None) and (existing.participants == other_participants)}")

                    if (existing is not None) and (
                        existing.participants == other_participants
                    ):
                        continue

                    meetings_to_interrupt = {
                        p.active_meeting
                        for p in other_participants
                        if p.active_meeting is not None
                    }
                    for m in meetings_to_interrupt:
                        m.interrupt()

                    meeting = actor.communicate_sync(participants=other_participants)
                    self.simulation.env.process(meeting.run())

                elif act_name == "inactive" or act_name == 0:
                    pass
                else:
                    if actor.active_meeting is not None:
                        logger.debug("meeting interrupted ! ")
                        actor.active_meeting.remove_participant(actor)

                    if act_name == "reasoning" or act_name == 1:
                        actor.reason()

                    elif act_name == "communicate_async" or act_name == 3:
                        other_participants = [
                            a
                            for a in self.simulation.actors
                            if a.actor_id in recipients
                        ]

                        other_participants.sort(
                            key=lambda x: recipients.index(x.actor_id)
                        )
                        self._orchestrate_async(actor, other_participants)

                    elif act_name == "inactive" or act_name == 0:
                        pass

                    else:
                        raise ValueError(f"Unknown orchestrated action: {act_name}")

        # ─────────────────────────────────────────────────────────────
        # Non-dict branch (MultiDiscrete): no auto spawn/delete
        # ─────────────────────────────────────────────────────────────
        else:
            for actor, a in zip(self.simulation.actors, action):
                if self.simulation.trial_interrupted:
                    break
                self.ACTION_MAP[a](actor, self)

        # ─────────────────────────────────────────────────────────────
        # Advance time & return obs/metrics
        # ─────────────────────────────────────────────────────────────
        self.simulation.env.run(until=self.simulation.env.now + self.dt)
        obs = self._build_obs()

        terminated = self.simulation.trial_interrupted
        done = terminated or (self.simulation.env.now >= self.max_steps)

        new_msgs = sum(a.messages_sent for a in self.simulation.actors)
        info = {
            "full_memory_state": {
                a.org_role + ":" + str(a.actor_id): a.get_all_messages()
                for a in self.simulation.actors
            },
            "metrics": {
                "messages_sent": new_msgs,
                "tokens_produced": sum(
                    a.tokens_produced for a in self.simulation.actors
                ),
                "sim_time": self.simulation.env.now,
            },
            "monitoring": {
                "events_count": len(self.simulation.monitored_events),
                "summary": parse_monitored_event(self.simulation.monitored_events),
            },
        }

        # Stop the sim if above 2000 worked hours
        if info["monitoring"]["summary"]["Total_worked_time"] > WORKED_HOURS_LIMIT:
            self.simulation.trial_interrupted = True
            done = True
            logging.getLogger(__name__).info("Max worked hours reached, ending trial.")

        if done:
            info["monitoring"]["summary"]["Total time"] = int(self.simulation.env.now)
            if self.drug["expected_outcome"] == "Failure":
                info["monitoring"]["summary"]["Correct_outcome"] = int(
                    self.simulation.trial_interrupted
                )
            elif self.drug["expected_outcome"] == "Success":
                info["monitoring"]["summary"]["Correct_outcome"] = int(
                    info["monitoring"]["summary"]["phase_III_started"]
                )

            info["monitoring"]["summary"]["Expected_outcome"] = self.drug[
                "expected_outcome"
            ]
            info["monitoring"]["summary"]["drug_id"] = self.drug["drug_id"]
            reward = info["monitoring"]["summary"]["Correct_outcome"]

        else:
            reward = 0

        return obs, reward, done, not terminated, info

    def remove_actor(self, actor_id: int) -> bool:
        """
        Delete the actor if it exists. (No "keep one per role" guard here,
        because config-diff alignment is authoritative.)
        """
        actor = Actor.get_by_id(actor_id)
        if actor is None:
            return False

        self.simulation.actor_deleted[actor_id] = float(self.simulation.env.now)
        self.simulation.actors = [
            a for a in self.simulation.actors if a.actor_id != actor_id
        ]
        Actor._registry.pop(actor_id, None)
        return True

    def _spawn_and_rebalance(self, role: str, source: str, actor_id: int | None = None):
        new_actor = self.simulation.add_actor(role, actor_id=actor_id)
        self._rebalance_role_tasks_to_new_actor(role, new_actor)
        self.simulation.env.event().succeed(
            {
                "type": f"Actor spawned ({source})",
                "Details": {"role": role, "actor_id": new_actor.actor_id},
            }
        )
        return new_actor

    def _delete_and_log(self, actor_id: int, source: str):
        removed = self.remove_actor(actor_id)
        if removed:
            self.simulation.env.event().succeed(
                {"type": f"Actor deleted ({source})", "Details": {"actor_id": actor_id}}
            )
        else:
            logger.warning(f"Could not remove actor_id={actor_id}")
        return removed

    def _parse_config_keys(self, config: dict) -> set[tuple[str, int]]:
        """
        Accept keys like "Statistician:4" and return {("Statistician", 4), ...}.
        Ignores any special keys (e.g., 'spawn', 'delete') if present.
        """
        desired: set[tuple[str, int]] = set()
        for k in config.keys():
            if not (isinstance(k, str) and ":" in k):
                # ignore non-actor keys silently
                continue
            role, id_str = k.rsplit(":", 1)
            desired.add((role, int(id_str)))

        # also add actors in activity
        for a in self.simulation.actors:
            if a.attention.users:
                desired.add((a.org_role, a.actor_id))
        return desired

    def _current_actor_pairs(self) -> set[tuple[str, int]]:
        return {(a.org_role, a.actor_id) for a in self.simulation.actors}

    def _align_actors_to_config(self, config: dict, source: str):
        """
        Make the live actor set exactly match the keys in `config`.
        - Missing in config -> delete
        - Present in config but not live -> spawn with that exact id
        """
        desired = self._parse_config_keys(config)
        current = self._current_actor_pairs()

        to_delete = current - desired
        to_spawn = desired - current

        # delete highest ids first (deterministic)
        for role, aid in sorted(to_delete, key=lambda x: x[1], reverse=True):
            self._delete_and_log(aid, source=source)

        # spawn ascending ids (deterministic)
        for role, aid in sorted(to_spawn, key=lambda x: x[1]):
            self._spawn_and_rebalance(role, source=source, actor_id=aid)

    def _build_obs(self):
        return {
            "context": "\n\n".join(
                [
                    f"{a.org_role}:{a.actor_id}:\n {a.memory.retrieve_context()}"
                    for a in self.simulation.actors
                ]
            ),
            "time": np.array(
                [self.simulation.env.now], dtype=self.observation_space["time"].dtype
            ),
            "messages_sent": np.array(
                [sum(a.messages_sent for a in self.simulation.actors)],
                dtype=self.observation_space["messages_sent"].dtype,
            ),
            "pending_meetings": np.array(
                [1 if a.active_meeting else 0 for a in self.simulation.actors],
                dtype=np.int8,
            ),
        }

    def render(self, mode="none"):
        if mode != "none":
            raise NotImplementedError("Only 'none' supported.")
