import os
from typing import List, Optional
import ast
from openai import AzureOpenAI

from organisation.env.clinical_trial.policies.usage import LLMUsageTracker
from organisation.env.config import SYSTEM_ACTOR_ID, GPT5_MODEL

STARTING_CONFIG = "{'Investigator:1':('reasoning',[]) , 'Sponsor:2': ('communicate_async', [Investigator:1,Statistician:4]),'Legal Team:3':('inactive', []),'Statistician:4':('inactive', [])}"


class LLMPolicy:
    def __init__(self):
        # ── LLM client setup (keep your Azure endpoint/model) ──
        self.client = AzureOpenAI(
            azure_endpoint="https://your-endpoint.openai.azure.com/",
            api_version="2025-04-01-preview",
            api_key=os.environ["API_KEY"],
        )
        self.model = GPT5_MODEL
        # Bookkeeping / budget
        self.initial_budget = 2000
        self.budget = self.initial_budget
        self.max_retries = 3
        # Store the last valid config (as JSON string). Useful if you want a fallback.
        self.previous_config: Optional[str] = None
        # For debugging/trace
        self.log_action_obs: List[str] = []
        self.retry = 0
        # Simple regex used to detect a plausible config block

        self._llm_usage = LLMUsageTracker(type(self).__name__, actor_id=SYSTEM_ACTOR_ID)

    # ──────────────────────────────────────────────────────────────────────
    # Prompting helpers
    # ──────────────────────────────────────────────────────────────────────
    def format_observation(
        self, observations, timestamp, drug_id, study_str, actor_activity_str
    ):
        prompt = f"""You are the management agent of a clinical trial program. You coordinate an organization composed of multiple actors, each with specific roles. Your objective is to achieve the organizational goal while minimizing actor involvement, so they remain available for other tasks.

**Organizational goals:**
1. Identify the Minimum Efficient Dose of the drug {drug_id} by completing single arm studies.
2. Start the phase B of the clinical trial program
3. Demonstrate comparative advantage of the drug {drug_id} by completing comparative studies.
4. Send applications to the regulatory authorities for the phase III approval.
5. While doing these goals, interrupt the clinical trial program if the drug is unsafe or ineffective.

The goals have to be achieved in the listed order.

**Actors and tasks:**
- Investigators is the main actor: they can design and monitor clinical studies.
- Studies must be approved by a Sponsor after being designed.
- Completed studies must be analyzed by a Statistician.
- Only the Sponsor can start the phase B of the program.
- Only the Sponsor can interrupt the clinical trial program.
- Only the Legal Team can send applications to the regulatory authorities.
- The Legal Team should send multiple applications and improve them based on feedback from the regulatory agency.
- As soon as a comparative study is analyzed, the Legal Team should start sending applications to the regulatory agency.

Each time an actor is using a tool or performing a task, they are busy and cannot be assigned to anything but "inactive". You can see the current activities of the actors below.

Actors only have access to the information present in their observations. Make sure that the actor has the required information to perform their tasks. Use communication states to share information between actors.
When there is no information to share, favor reasoning or inactive states.
Actors takes time to work, do not assume that a given task is realised at the moment you ordered it. Wait to obtain a confirmation in the observations.

At each time step, you must propose a new **configuration** of the organization in JSON format.
A configuration maps each actor type to a tuple `(state, recipient)` where:

- `"reasoning"`: the actor thinks and executes their task (for examples : starting studies, approving studies, analysing studies, sending applications).
- `"communicate_async"`: the actor sends a message to the actors listed in `recipient`.
- `"communicate_sync"`: the actor participates in a synchronous meeting with all actors in `recipient`. All actors that are recipients of a meeting must also have the same state and recipients. The actor should be included in its recipient.
- `"inactive"`: the actor is idle and does nothing.

Make sure to call the actors by their full id, which is of the form `Role:UniqueNumber` (e.g., `Investigator:1`, `Legal Team:3`).

If you need additional actors, you can expand the organization by adding new actors of any role. You can add as many actors as you want but with moderation, keep in mind that each additional actor increases the cost of the clinical trial program. To add a new actor, simply include them in the configuration with their desired state and recipients. Make sure the new actor has a unique ID.
When you remove an actor from the configuration, they are no longer part of the organization and cannot be re-added later.

Alternatively, while waiting for monitored studies' results (studies that are being monitored but not yet completed), you MUST **stall the organization** for a fixed time by returning a JSON file of the form:
```json
{'{"waiting duration": <hours>}'}
```
Never stall the organization if any study has not been approved before stalling.

### Previous configuration:
{self.previous_config}

### START Actors' observations:
{observations}
### END Actors' observations.

### Current timestamp: {timestamp}

### On-going and past studies:
{study_str}

### Actors' current activities:
{actor_activity_str}
    """
        # print("PROMPT:", prompt)
        return prompt

    def get_completion(self, observations, env):
        observations = observations["context"]

        self.log_action_obs.append(observations)

        actor_activity_str = (
            "\n".join(
                [
                    f"- {a.org_role}:{a.actor_id} is currently doing '{a.activity[0]}' and is not available for other tasks."
                    for a in env.simulation.actors
                    if a.activity
                ]
            )
            or "No actor is currently engaged in any activity."
        )

        study_str = (
            "\n".join(
                [
                    f"- {study_id}: approved={study.approved}, monitored={study.ongoing}, completed={study.completed}, analysed={study.analysed}, completion timestamp={int(study.start_date + study.duration + study.max_period) if (not study.completed) and (study.start_date is not None) else 'N/A'}"
                    for study_id, study in env.simulation.env.studies.items()
                    if study.responsible is not None
                ]
            )
            if len(env.simulation.env.studies) > 0
            else "No ongoing or past studies."
        )

        messages = [
            {
                "role": "user",
                "content": self.format_observation(
                    observations,
                    timestamp=int(env.simulation.env.now),
                    study_str=study_str,
                    drug_id=env.simulation.drug["drug_id"],
                    actor_activity_str=actor_activity_str,
                ),
            }
        ]

        tries = 1
        for i in range(tries):
            kwargs = {
                "model": self.model,
                "messages": messages,
            }

            if self.model in [GPT5_MODEL]:
                kwargs["reasoning_effort"] = "minimal"

            try:
                response = self.client.chat.completions.create(**kwargs)
                break
            except Exception as e:
                print(f"Error on LLM call, retrying {i + 1}/{tries}:", e)
                if i == tries - 1:
                    raise e

        return response.choices[0].message.content

    def emit_llm_usage_totals(self):
        self._llm_usage.emit_totals()

    # ---------------------------------------------------------------------
    # Main interface
    # ---------------------------------------------------------------------

    def select_action(self, obs, env):
        if env.simulation.env.now < env.wait_time:
            return {
                f"{a.org_role}:{a.actor_id}": ("inactive", [])
                for a in env.simulation.actors
            }

        if self.budget <= 0:
            return {
                f"{a.org_role}:{a.actor_id}": ("inactive", [])
                for a in env.simulation.actors
            }

        self.budget -= 1
        action_str = self.get_completion(obs, env)

        self.log_action_obs.append(
            "XXXXXXXXXXXXX" * 10 + "\n" + str(action_str) + "\n" + "XXXXXXXXXXXXX" * 10
        )
        action_str = "{" + action_str.split("{")[-1].split("}")[0] + "}"

        try:
            action = ast.literal_eval(action_str)
            if "waiting duration" in action:
                env.wait_time = int(action["waiting duration"]) + env.simulation.env.now
                action = {
                    f"{a.org_role}:{a.actor_id}": ("inactive", [])
                    for a in env.simulation.actors
                }

            env.is_valid(action)
        except Exception as e:
            print(
                f"retrying ({self.retry} attempts) due to error:",
                e,
                "\naction_str",
                action_str,
            )

            if self.retry >= 3:
                action = env.prev_action
            else:
                self.retry += 1
                action = self.select_action(obs, env)
        self.retry = 0

        env.prev_action = action
        print("action", action)
        self.previous_config = str(action)
        return action

    def reset(self):
        """Reset per-episode bookkeeping."""
        self.log_action_obs = []
        self.budget = self.initial_budget
        self.previous_config = STARTING_CONFIG
