from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
from state import PartitionNode

explainer_agent_system_prompt = """You are an expert in the {domain} domain tasked with explaining a dataset and its variables. Your goal is to provide a clear and concise description of the dataset, including the meaning of each variable and their potential relationships. You will receive a list of variables and, optionally, a dataset description.
Utilize the available tools to look up the meanings of variable names. Employ an iterative thought-action-observation approach to gather evidence, validate your reasoning, and refine both the description of variables.

Tools available:
{tools}

Example of a tool call:
"""

def create_explainer_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates an explainer agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the explainer agent.
    """

    base_example = [
        HumanMessage(content="Variable names: CO2_t_2025, tot_MWh_2025, delta_T_25_24. Dataset description: A dataset on the environmental impact of industrial activity."),
        AIMessage(content="Based on the provided description, it is likely that CO2_t_2025 is a measurement on carbon dioxide emissions in 2025, in metric tons (t). MWh is a common energy consumption measured, and it is likely a total over 2025.", name="ExplainerAgent"),
    ]

    enhanced_description = "Enhanced description: <general_description>A dataset on the environmental impact of industrial activity. CO2_t_2025 is a measurement on carbon dioxide emissions in 2025, in metric tons; tot_MWh_2025 is cumulative energy consumption, in MWh, during 2025; delta_T_25_24 refers to the difference between the average surface temperature on Earth between 2025 and 2024.</general_description>"

    if "rag_assistant" in available_tools and ("human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools):
        # Case 1: Both RAG and Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="", name="ExplainerAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Delta T meaning in environmental and industrial data"}, "id": "1"}]),
            ToolMessage(content="Delta is usually a measure for a difference or increase of a variable. T is the SI symbol for time or, commonly, temperature.", tool_call_id="1"),
            AIMessage(content="delta_T_25_24 likely refers to the difference in temperature between 2025 and 2024. Given the environmental context, it probably refers to Earth's surface temperature, whose change is linked to CO2 emissions.  However, I'd like confirmation.", name="ExplainerAgent"),
            AIMessage(content="", name="ExplainerAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Does delta_T_25_24 refer to the difference in Earth's average surface temperature between 2025 and 2024?"}, "id": "2"}]),
            ToolMessage(content="Yes, delta_T_25_24 refers to the difference in Earth's average surface temperature between 2025 and 2024.", tool_call_id="2"),
            AIMessage(content=enhanced_description, name="ExplainerAgent"),
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG
        example = base_example + [
            AIMessage(content="", name="ExplainerAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Delta T meaning in environmental and industrial data"}, "id": "1"}]),
            ToolMessage(content="Delta is usually a measure for a difference or increase of a variable. T is the SI symbol for time or, commonly, temperature.", tool_call_id="1"),
            AIMessage(content="delta_T_25_24 likely refers to the difference in temperature between 2025 and 2024. Given the environmental context, it probably refers to Earth's surface temperature, whose change is linked to CO2 emissions.", name="ExplainerAgent"),
            AIMessage(content=enhanced_description, name="ExplainerAgent"),
        ]
    elif "human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools:
        # Case 3: Only Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="delta_T_25_24 likely refers to the difference in temperature between 2025 and 2024. Could you confirm this?", name="ExplainerAgent"),
            AIMessage(content="", name="ExplainerAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Does delta_T_25_24 refer to the difference in Earth's average surface temperature between 2025 and 2024?"}, "id": "1"}]),
            ToolMessage(content="Yes, delta_T_25_24 refers to the difference in Earth's average surface temperature between 2025 and 2024.", tool_call_id="1"),
            AIMessage(content=enhanced_description, name="ExplainerAgent"),
        ]
    else:
        # Case 4: No tools
        example = base_example + [
            AIMessage(content="delta_T_25_24 likely refers to the difference in temperature between 2025 and 2024. Given the environmental context, it probably refers to Earth's surface temperature, whose change is linked to CO2 emissions.", name="ExplainerAgent"),
            AIMessage(content=enhanced_description, name="ExplainerAgent"),
        ]

    return example

explainer_agent_user_prompt = """You are tasked with analyzing the following dataset:
Variable names: {variable_names}
Dataset description: {dataset_description}

Your objectives are:
(1) Refine and expand the Dataset description, if present.
(2) Provide an informative description of the dataset labels, clarifying the meaning of acronyms and context of each variable. Preserve all information in the above description, if present.
(3) Include both above results within <general_description></general_description> tags.

IMPORTANT: Web search cannot understand acronyms. If variale names are acronyms, do not use those acronyms for a web search. Instead, make the query in natural language.
IMPORTANT: Variable labels in output should be the same as in the input.
"""


divide_hypothesis_agent_system_prompt = """You are an expert in the {domain} domain tasked with partitioning a dataset’s variables into groups that might share causal relationships. Each variable represents a node in a causal graph.
Your goal is, given a list of variables, to divide them into groups.

Reason on:
- Pairs of variables that are directly causally related ("A" causes "B"). If they are, they belong in the same group.
- If they are not likely to be directly causally related, they either:
    Case 1) Belong in separate groups, meaning they are likely independent.
    Case 2) Are indirectly related through common causes ("C" causes both "A" and "B") or mediators ("A" causes "C", "C" causes "B"). In this case, you can form a group by also including the common causes or mediators.
- If two variables have semantically similar names or describe similar things, it does not necessarily mean they belong in the same group.

Additional requirements:
- The groups may overlap.
- Groups should not be identical.
- Each variable in the dataset should be assigned to at least one group.
- You may form any number of groups.
- Include detailed information on each variable within each group.

Your process:
- Available tools should be used to gather information and validate your reasoning. Use them to to confirm variable meanings and assess relationships.
- Utilize an iterative thought-action-observation approach to gather evidence, validate your reasoning and refine your groups.

Tools available:
{tools}

An example follows:
"""

def create_divide_hypothesis_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates a divide agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the divide agent.
    """

    base_example = [
        HumanMessage(content="You are tasked with analyzing the following dataset: Dataset description: A dataset of survey results investigating usage patterns of different means of transport, focusing on cars and trains. It examines the following six variables: Age (A), Sex (S), Education (E), Occupation (O), Residence (R) and Travel (T). For your partitioning task, focus on the following variables: [T, A, S, R, E, O]"),
        AIMessage(content="I will group the variables based on potential causal connections. A and S appear independent, so they belong in separate groups. <group><nodes>[A]</nodes></group> <group><nodes>[S]</nodes></group>.", name="DivideHypothesisAgent"),
    ]

    proposed_groups = ("Proposed groups: <group><nodes>[A, E, O, T]</nodes><description>Age (A), Education (E) and Occupation (O) are likely directly connected, influencing the individual's transportation choice (T).</description></group> <group><nodes>[S, E, R, T]</nodes><description>Sex (S) may interact with Education (E) and Residence (R), potentially affecting travel (T).</description></group>")

    if "rag_assistant" in available_tools and "human_in_the_loop" in available_tools:
        # Case 1: Both RAG and Human-in-the-loop
        example = base_example + [
            AIMessage(content="I should now look into Education (E) and how it relates to A and S."),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What is the relationship between gender and higher education level? Are there differences in education level purely attributed to gender?"}, "id": "1"}]),
            ToolMessage(content="Studies and statistics show that males and females can have different average levels of education.", tool_call_id="1"),
            AIMessage(content="Considering that Age also influences education, I will add E to both groups. <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="I should now examine how Occupation (O) and city size (R) relate to other variables.", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[
                {"name": "rag_assistant", "args": {"research_topic": "Link between occupation type and education. Are graduates more likely to be employees or self-employed?"}, "id": "2"},
                {"name": "rag_assistant", "args": {"research_topic": "Correlation between employment type and size of city. Do large cities have fewer self-employed individuals?"}, "id": "3"}
            ]),
            ToolMessage(content="Data indicate that less educated individuals are more likely to be self-employed, while large companies tend to hire graduates.", tool_call_id="2"),
            ToolMessage(content="It remains unclear how employment type correlates with city size.", tool_call_id="3"),
            AIMessage(content="I will tentatively add O to the first group and R to the second, yielding: <group><nodes>[A, E, O]</nodes></group> <group><nodes>[S, E, R]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Mode of Travel (T) is the key point of the survey. What is it that commonly influences choice of of transporation (T)?", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[{"name": "human_in_the_loop", "args": {"question": "How do people commonly reason when choosing a mean of transportation?"}, "id": "4"}]),
            ToolMessage(content="It depends on traffic, whether you have a driving license, where is your workplace. Some jobs also require people to drive during the day, for instance a delivery driver or a salesperson.", tool_call_id="4"),
            AIMessage(content="Travel (T) likely connects to both groups: <group><nodes>[A, E, O, T]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Before finalizing, I'd like human confirmation of these groupings.", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[{"name": "human_in_the_loop", "args": {"question": "Do you agree with the proposed variable groupings?" + proposed_groups}, "id": "5"}]),
            ToolMessage(content="The proposed groups are reasonable.", tool_call_id="5"),
            AIMessage(content=proposed_groups, name="DivideHypothesisAgent"),
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG — follow the same chain-of-thought without human confirmation
        example = base_example + [
            AIMessage(content="I should now look into Education (E) and its relation to A and S."),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What is the relationship between gender and higher education level? Any differences solely based on gender?"}, "id": "1"}]),
            ToolMessage(content="Studies show average education levels differ by gender.", tool_call_id="1"),
            AIMessage(content="Considering Age’s influence, I add E to both groups: <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Next, I explore how Occupation (O) and Residence (R) interact with the other variables.", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[
                {"name": "rag_assistant", "args": {"research_topic": "How is occupation related to education? Is there a trend in employment type among graduates?"}, "id": "2"},
                {"name": "rag_assistant", "args": {"research_topic": "What is the relationship between employment type and city size?"}, "id": "3"}
            ]),
            ToolMessage(content="Evidence suggests graduates tend to be employed and that city size has an unclear influence on employment type.", tool_call_id="2"),
            ToolMessage(content="Correlation between city size and employment type remains inconclusive.", tool_call_id="3"),
            AIMessage(content="I tentatively add O to the first group and R to the second: <group><nodes>[A, E, O]</nodes></group> <group><nodes>[S, E, R]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Since Travel (T) is key to this survey, I include it in both groups: <group><nodes>[A, E, O, T]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content=proposed_groups, name="DivideHypothesisAgent"),
        ]
    elif "human_in_the_loop" in available_tools:
        # Case 3: Only Human-in-the-loop — mimic similar steps but with human confirmations
        example = base_example + [
            AIMessage(content="I should now review Education (E) and its relation to A and S."),
            AIMessage(content="Based on context, Age appears to influence education. Could you confirm if Education should be grouped with Age?", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[
                {"name":  "human_in_the_loop", "args": {"question": "Does Education relate closely to Age?"}, "id": "1"}
            ]),
            ToolMessage(content="Yes, Education is related to Age.", tool_call_id="1"),
            AIMessage(content="I will add E to both groups: <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Next, I need to assess how Occupation (O) and Residence (R) connect with other variables. Do you agree that Occupation belongs with Age and Residence with Sex?", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[
                {"name":  "human_in_the_loop", "args": {"question": "Should Occupation (O) be combined with Age (A) and Education (E) while Residence (R) with Sex (S) and Education (E)?"}, "id": "2"}
            ]),
            ToolMessage(content="Yes, that grouping makes sense.", tool_call_id="2"),
            AIMessage(content="I will tentatively form the groups: <group><nodes>[A, E, O]</nodes></group> and <group><nodes>[S, E, R]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Mode of Travel (T) is the key point of the survey. What is it that commonly influences choice of of transporation (T)?", name="DivideHypothesisAgent"),
            AIMessage(content="", name="DivideHypothesisAgent", tool_calls=[{"name": "human_in_the_loop", "args": {"question": "How do people commonly reason when choosing a mean of transportation?"}, "id": "3"}]),
            ToolMessage(content="It depends on traffic, whether you have a driving license, where is your workplace. Some jobs also require people to drive during the day, for instance a delivery driver or a salesperson.", tool_call_id="3"),
            AIMessage(content="Travel (T) likely connects to both groups: <group><nodes>[A, E, O, T]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content=proposed_groups, name="DivideHypothesisAgent"),
        ]
    else:
        # Case 4: No tools — use internal reasoning only
        example = base_example + [
            AIMessage(content="I infer that Education level (E) is influenced by Age and should be grouped accordingly."),
            AIMessage(content="Considering contextual cues, I add E to both groups, resulting in: <group><nodes>[A, E]</nodes></group> and <group><nodes>[S, E]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Next, I deduce that Occupation type (O) fits with Age and Education, while size of city of residence (R) correlates with Sex and Education."),
            AIMessage(content="Thus, I tentatively form: <group><nodes>[A, E, O]</nodes></group> and <group><nodes>[S, E, R]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content="Since Travel choice (T) is the primary topic of the survey, I include it in both groups: <group><nodes>[A, E, O, T]</nodes></group> and <group><nodes>[S, E, R, T]</nodes></group>", name="DivideHypothesisAgent"),
            AIMessage(content=proposed_groups, name="DivideHypothesisAgent"),
        ]

    return example

divide_hypothesis_agent_user_prompt = """You are tasked with analyzing the following dataset:
Dataset description: {dataset_description}
For your partitioning task, focus on the following variables: {variable_names}

IMPORTANT: Web search cannot understand acronyms. If variable names are acronyms, do not use those acronyms for a web search. Instead, make the query in natural language.
IMPORTANT: Variable labels in output should be the same as in the input. Each input variable should be assigned to a group. Double check as to not introduce new variables by accident.
IMPORTANT: Output a list of variables and their descriptions for each group. Ensure that each list is enclosed within <nodes> tags. For example: <group><nodes>[Variable1, Variable2]</nodes><description>Description of the group</description></group>
"""

divide_critic_agent_system_prompt = """You are an expert in the {domain} domain tasked with partitioning a dataset’s variables into groups that might share causal relationships. Each variable represents a node in a causal graph.
Your goal is to analyze groups proposed by another agent and and modify them if necessary.

Reason on:
- If a pair of variables are in the same group, they are either:
    Case 1) Directly causally related ("A" causes "B").
    Case 2) They are indirectly related through common causes ("C" causes both "A" and "B") or mediators ("A" causes "C", "C" causes "B"). In this case, the common causes or mediators should also be in the group.
- If two variables have semantically similar names or describe similar things, review whether they really belong in the same group.

Additional requirements:
- The groups may overlap.
- Groups should not be identical.
- Each variable in the dataset should be assigned to at least one group.
- You may edit any number of groups or even create new ones.
- Include detailed information on each variable within each group, preserve any relevant information that is already present.

Your process:
- Available tools should be used to gather information and validate your reasoning. Use them to to confirm variable meanings and assess relationships.
- Utilize an iterative thought-action-observation approach to gather evidence, validate your reasoning and refine your groups.

Tools available:
{tools}

An example follows:
"""

def create_divide_critic_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates a critic agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the critic agent.
    """

    base_example = [
        HumanMessage(content="You are tasked with analyzing the following dataset: Dataset description: A dataset of survey results investigating usage patterns of different means of transport, focusing on cars and trains. It examines the following six variables: Age (A), Sex (S), Education (E), Occupation (O), Residence (R) and Travel (T). For your partitioning task, focus on the following variables: [T, A, S, R, E, O]. The groups proposed by the previous agent are: Variable names in group 1: [A, S, E] Description of variables in group 1: Education level (E) is likely to have a direct connection to both Age (A) and Sex (S). Variable names in group 2: [O, R, T] Description of variables in group 2: Choice of transporation (T) is shown to directly be related to occupation type (O) and size of city of residence (R)."),
        AIMessage(content="I will review the proposed groups of variables based on potential causal connections. A person's Age (A) and Sex (S) are independent, so they belong in separate groups. <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E, O, R, T]</nodes></group>.", name="DivideCriticAgent"),
    ]

    proposed_groups = ("Proposed groups: <group><nodes>[A, E, O, T]</nodes><description>Age (A), Education (E) and Occupation (O) are likely directly connected, influencing the individual's transportation choice (T).</description></group> <group><nodes>[S, E, R, T]</nodes><description>Sex (S) may interact with Education (E) and Residence (R), potentially affecting travel (T).</description></group>")

    if "rag_assistant" in available_tools and "human_in_the_loop" in available_tools:
        # Case 1: Both RAG and Human-in-the-loop
        example = base_example + [
            AIMessage(content="I should now look into Education (E) and how it relates to A and S."),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What is the relationship between gender and higher education level? Are there differences in education level purely attributed to gender?"}, "id": "1"}]),
            ToolMessage(content="Studies and statistics show that males and females can have different average levels of education.", tool_call_id="1"),
            AIMessage(content="As proposed, education (E) does relate to both A and S, so it should appear in the same group: <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E, O, R, T]</nodes></group>"),
            AIMessage(content="I should try to make the groups more balanced. Does Occupation type (O) belong with city size (R) and Education level (E)?", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[
                {"name": "rag_assistant", "args": {"research_topic": "Link between occupation type and education. Are graduates more likely to be employees or self-employed?"}, "id": "2"},
                {"name": "rag_assistant", "args": {"research_topic": "Correlation between employment type and size of city. Do large cities have fewer self-employed individuals?"}, "id": "3"}
            ]),
            ToolMessage(content="Data indicate that less educated individuals are more likely to be self-employed, while large companies tend to hire graduates.", tool_call_id="2"),
            ToolMessage(content="It remains unclear how employment type correlates with city size.", tool_call_id="3"),
            AIMessage(content="O is likely linked to E but not R, i will move O to the first group: <group><nodes>[A, E, O]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Mode of Travel (T) is the key point of the survey. Is it likely to have a direct connection to occupation type?", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[{"name": "human_in_the_loop", "args": {"question": "I have reviewed the groups like so: <group><nodes>[A, E, O]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>. Does a person's employment type (O) influence choice of transportation (T)?"}, "id": "4"}]),
            ToolMessage(content="Some jobs do require people to drive during the day, for instance a delivery driver or a salesperson.", tool_call_id="4"),
            AIMessage(content="Travel (T) likely connects to both groups: <group><nodes>[A, E, O, T]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Before finalizing, I'd like human confirmation of these groupings.", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[{"name": "human_in_the_loop", "args": {"question": "Do you agree with the reviewd variable groupings?" + proposed_groups}, "id": "5"}]),
            ToolMessage(content="The proposed groups are reasonable.", tool_call_id="5"),
            AIMessage(content=proposed_groups, name="DivideCriticAgent"),
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG — follow the same chain-of-thought without human confirmation
        example = base_example + [
            AIMessage(content="I should now look into Education (E) and its relation to A and S."),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What is the relationship between gender and higher education level? Any differences solely based on gender?"}, "id": "1"}]),
            ToolMessage(content="Studies show average education levels differ by gender.", tool_call_id="1"),
            AIMessage(content="Considering Age’s influence, I add E to both groups: <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Next, I explore how Occupation (O) and Residence (R) interact with the other variables.", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[
                {"name": "rag_assistant", "args": {"research_topic": "How is occupation related to education? Is there a trend in employment type among graduates?"}, "id": "2"},
                {"name": "rag_assistant", "args": {"research_topic": "What is the relationship between employment type and city size?"}, "id": "3"}
            ]),
            ToolMessage(content="Evidence suggests graduates tend to be employed and that city size has an unclear influence on employment type.", tool_call_id="2"),
            ToolMessage(content="Correlation between city size and employment type remains inconclusive.", tool_call_id="3"),
            AIMessage(content="I tentatively add O to the first group and R to the second: <group><nodes>[A, E, O]</nodes></group> <group><nodes>[S, E, R]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Since Travel (T) is key to this survey, I include it in both groups: <group><nodes>[A, E, O, T]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content=proposed_groups, name="DivideCriticAgent"),
        ]
    elif "human_in_the_loop" in available_tools:
        # Case 3: Only Human-in-the-loop — mimic similar steps but with human confirmations
        example = base_example + [
            AIMessage(content="I should now review Education (E) and its relation to A and S."),
            AIMessage(content="Based on context, Age appears to influence education. Could you confirm if Education should be grouped with Age?", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[
                {"name":  "human_in_the_loop", "args": {"question": "Does Education relate closely to Age?"}, "id": "1"}
            ]),
            ToolMessage(content="Yes, Education is related to Age.", tool_call_id="1"),
            AIMessage(content="I will add E to both groups: <group><nodes>[A, E]</nodes></group> <group><nodes>[S, E]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Next, I need to assess how Occupation (O) and Residence (R) connect with other variables. Do you agree that Occupation belongs with Age and Residence with Sex?", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[
                {"name":  "human_in_the_loop", "args": {"question": "Should Occupation (O) be combined with Age (A) and Education (E) while Residence (R) with Sex (S) and Education (E)?"}, "id": "2"}
            ]),
            ToolMessage(content="Yes, that grouping makes sense.", tool_call_id="2"),
            AIMessage(content="I will tentatively form the groups: <group><nodes>[A, E, O]</nodes></group> and <group><nodes>[S, E, R]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Mode of Travel (T) is the key point of the survey. What is it that commonly influences choice of of transporation (T)?", name="DivideCriticAgent"),
            AIMessage(content="", name="DivideCriticAgent", tool_calls=[{"name": "human_in_the_loop", "args": {"question": "How do people commonly reason when choosing a mean of transportation?"}, "id": "3"}]),
            ToolMessage(content="It depends on traffic, whether you have a driving license, where is your workplace. Some jobs also require people to drive during the day, for instance a delivery driver or a salesperson.", tool_call_id="3"),
            AIMessage(content="Travel (T) likely connects to both groups: <group><nodes>[A, E, O, T]</nodes></group> <group><nodes>[S, E, R, T]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content=proposed_groups, name="DivideCriticAgent"),
        ]
    else:
        # Case 4: No tools — use internal reasoning only
        example = base_example + [
            AIMessage(content="I infer that Education (E) is influenced by Age and should be grouped accordingly."),
            AIMessage(content="Considering contextual cues, I add E to both groups, resulting in: <group><nodes>[A, E]</nodes></group> and <group><nodes>[S, E]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Next, I deduce that Occupation (O) fits with Age and Education, while Residence (R) correlates with Sex and Education."),
            AIMessage(content="Thus, I tentatively form: <group><nodes>[A, E, O]</nodes></group> and <group><nodes>[S, E, R]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content="Since Travel (T) is the primary outcome, I include it in both groups: <group><nodes>[A, E, O, T]</nodes></group> and <group><nodes>[S, E, R, T]</nodes></group>", name="DivideCriticAgent"),
            AIMessage(content=proposed_groups, name="DivideCriticAgent"),
        ]

    return example


divide_critic_agent_user_prompt = """You are tasked with analyzing the following dataset:
Dataset description: {dataset_description}
For your partitioning task, focus on the following variables: {variable_names}
The groups proposed by the previous agent are: {proposed_groups}

IMPORTANT: Web search cannot understand acronyms. If variable names are acronyms, do not use those acronyms for a web search. Instead, make the query in natural language.
IMPORTANT: Variable labels in output should be the same as in the input. Each input variable should be assigned to a group. Double check as to not introduce new variables by accident.
IMPORTANT: Output a list of variables and their descriptions for each group. Ensure that each list is enclosed within <nodes> tags. For example: <group><nodes>[Variable1, Variable2]</nodes><description>Description of the group</description></group>
"""

divide_agent_iterative_prompt = """Analyze the following group of variables. Determine if further division is necessary.

Variable names: {variable_names}

Your objectives are:
(1) Reassess potential causal relationships within this group. Is there heterogeneity in this group? Are there separate independent groups of variables in it, such that we can split it further? 
(2) ONLY if needed, divide the group into smaller, meaningful subgroups and describe each.

Use the available tools to gather additional information if necessary.
Use an iterative thought-action-observation approach to refine your groupings. Document your reasoning and summarize a description for each subgroup.

IMPORTANT: Variable labels should be the same as in the input. Each input variable should be assigned to a subgroup. Double check as to not introduce new variables by accident.
IMPORTANT: Ensure each subgroup is enclosed within <group> tags. For example: 
<group><nodes>[Variable1, Variable2]</nodes><description>Description of the subgroup</description></group>
IMPORTANT: DO RESPECT THE ABOVE FORMAT. 
DO NOT ADD \\n NEWLINE CHARACTERS OR LONG SPACES BETWEEN EACH NODE. JUST RESPECT THE FORMAT ABOVE AND SEPARATE VARIABLE NAMES WITH A COMMA.
"""

divide_critic_agent_refine_prompt = """To test the quality of the clustering, i have calculated a score for each group, based on independence tests. Tests were based on distributions in the actual data.
A low score means that a few variables in the group are independent from all others. Outliers in a group are those variables whose score is particularly low, and likely belong in another group.
The following are groups with a low score, as well as what seem to be the outliers within the group:
{bad_clusters}

Please revise the groups while taking this information into account.
"""

groups_with_outliers_template = """Group {group_id}\n
Variables: {variable_names},\n
Score: {silhouette_score},\n
Outliers: {outliers}\n
"""

def format_bad_clusters_template(hsic_resuts: dict[str, list[float | list[str] | list[str]]]) -> str:
    """
    Formats the bad clusters template with the provided HSIC results.

    Args:
        hsic_resuts (dict): A dictionary containing the HSIC results, witht the following format:
            - Each key is the partition id (str).
            - Each value is a list containing:
                - The average silhouette score of the cluster (float).
                - A list of variable names identified as outliers in the cluster (list[str]).
                - The full list of variable names in the cluster (list[str]).

    Returns:
        bad_clusters_str: The formatted string.
    """
    bad_clusters_str=""
    for group_id, (silhouette_score, outliers, variable_names) in hsic_resuts.items():
        bad_clusters_str += groups_with_outliers_template.format(
            group_id=group_id,
            silhouette_score=silhouette_score,
            outliers=", ".join(outliers),
            variable_names=", ".join(variable_names)
        )

    return bad_clusters_str

conquer_hypothesis_agent_system_prompt = """You are an expert in the {domain} domain tasked with identifying cause-effect relationships from data. Your goal is to construct a causal graph where nodes represent variables, and directed edges represent hypothesized cause-effect relationships. You will receive a list of variables and, optionally, a dataset description.
You have access to tools to assist in gathering prior knowledge, helping you to refine your hypothesis. Use these tools iteratively to gather relevant knowledge, and refine your causal graph. Reason through each step and leveraging the tools to support your conclusions.
Try to understand what each variable means. You can the tools available in case additional information is needed. 

IMPORTANT: In some cases you might have access to a human expert. In that case, use it to gather contextual knowledge, and balance it between knowledge from RAG and human expert.
IMPORTANT: Do not be conservative in putting a causal relationship. It is better to put an edge in case you are in doubt even after using your tools.

Tools available:
{tools}

Example of a tool call:
"""

def create_conquer_hypothesis_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates a hypothesis agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the hypothesis agent.
    """

    base_example = [
        HumanMessage(content="Variable names: CO2_emissions, energy_consumption, global_temperature_increase"),
        AIMessage(content="I know that CO2 emissions can influence global temperature; I need to confirm there are no hidden confounders.", name="HypothesisAgent"),
    ]

    final_answer = "Final Answer: <edges>(energy_consumption, CO2_emissions), (CO2_emissions, global_temperature_increase)</edges>"

    if "rag_assistant" in available_tools and ("human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools):
        # Case 1: Both RAG and Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="", name="HypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "How do CO2 emissions relate to global temperature increase? Are there other effects or confounders? Is it just a correlation?"}, "id": "1"}]),
            ToolMessage("The scientific community mostly agrees that CO2 emissions are a direct cause of global temperature rise", tool_call_id="1"),
            AIMessage(content="I conclude that CO2 emissions influence global temperature increase. But, how does energy consumption fit in?", name="HypothesisAgent"),
            AIMessage(content="", name="HypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "How is energy consumption related to CO2 emissions and global temperatures? Does increased consumption cause increased emissions, or vice-versa?"}, "id":"2"}]),
            ToolMessage("Higher energy consumption implies higher emissions in most countries, but has not been confirmed to directly cause temperature increase.", tool_call_id="2"),
            AIMessage(content="The possible relationships between these three phenomena are now clear: consumption causes emissions, which in turn causes temperature increase. Before I finalize, can a human expert confirm this chain of causation?", name="HypothesisAgent"),
            AIMessage(content="", name="HypothesisAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Is the following causal chain accurate: energy_consumption -> CO2_emissions -> global_temperature_increase?"}, "id": "3"}]),
            ToolMessage(content="Yes, that causal chain is generally accurate.", tool_call_id="3"),
            AIMessage(content=final_answer, name="HypothesisAgent")
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG
        example = base_example + [
            AIMessage(content="", name="HypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "How do CO2 emissions relate to global temperature increase? Are there other effects or confounders? Is it just a correlation?"}, "id": "1"}]),
            ToolMessage("The scientific community mostly agrees that CO2 emissions are a direct cause of global temperature rise", tool_call_id="1"),
            AIMessage(content="I conclude that CO2 emissions influence global temperature increase. But, how does energy consumption fit in?", name="HypothesisAgent"),
            AIMessage(content="", name="HypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "How is energy consumption related to CO2 emissions and global temperatures? Does increased consumption cause increased emissions, or vice-versa?"}, "id":"2"}]),
            ToolMessage("Higher energy consumption implies higher emissions in most countries, but has not been confirmed to directly cause temperature increase.", tool_call_id="2"),
            AIMessage(content="The possible relationships between these three phenomena are now clear: consumption causes emissions, which in turn causes temperature increase.", name="HypothesisAgent"),
            AIMessage(content=final_answer, name="HypothesisAgent")
        ]
    elif "human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools:
        # Case 3: Only Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="I know that CO2 emissions can influence global temperature increase, and that energy consumption is also likely involved. Can a human expert confirm the relationships between these three variables?", name="HypothesisAgent"),
            AIMessage(content="", name="HypothesisAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Is the following causal chain accurate: energy_consumption -> CO2_emissions -> global_temperature_increase?"}, "id": "1"}]),
            ToolMessage(content="Yes, that causal chain is generally accurate.", tool_call_id="1"),
            AIMessage(content=final_answer, name="HypothesisAgent")
        ]
    else:
        # Case 4: No tools
        example = base_example + [
            AIMessage(content="I conclude that energy consumption causes CO2 emissions, which in turn causes global temperature increase.", name="HypothesisAgent"),
            AIMessage(content=final_answer, name="HypothesisAgent")
        ]

    return example

conquer_hypothesis_agent_user_prompt = """Reason to get a causal graph from the following dataset.
Dataset description: {general_description}

You will focus on the following variables:
Variable names: {variable_names}
Description of variables: {variable_description}

Successively, you will perform the following points until you are confident about a final answer:
(1) Understand the context, about what are the variables modeling at a causal level. You can use the tools to search for more information.
(2) Reason on which might be the root nodes which are not influenced by other variables. Successively, reason on the relationships between those root nodes and the other child variables. Then reason between children of children, and so on...
(3) Output a preliminary list of all edges that COULD POTENTIALLY be present between those variables. DO NOT BE TOO CONSERVATIVE. AN EDGE MORE IS BETTER THAN AN EDGE LESS.
(4) Reflect and improve your estimate. 

Finally, output a cumulative list of directed edges between the provided variables.

IMPORTANT: Make sure the the list of edges are within the <edges> tags. For example: <edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
IMPORTANT: ABSOLUTELY RESPECT THE FORMAT. DO NOT USE ARROWS OR ANYTHING SIMILAR TO REPRESENT EDGES. ONLY USE THIS FORMAT, AND USE THE COMMA TO SEPARATE VARIABLE NAMES:<edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
"""


conquer_critic_agent_system_prompt = """You are an expert in the {domain} domain tasked with critically evaluating proposed causal relationships between variables in a dataset. Your goal is to analyze a hypothesized causal graph and suggest modifications, particularly focusing on identifying edges that are in the wrong anti-causal direction, or that do not hold true.

You will receive a list of variables, a dataset description, and a proposed causal graph. Use a thought-action-observation process for your reasoning. You have access to tools to assist in verifying relationships by gathering prior knowledge. Be systematic in your approach, reasoning through each step and leveraging the tools to support your conclusions.

First, assess the validity of each edge in the proposed graph and consider potential confounding relationships which might have been missed. Use the tools available to gather additional information as needed.

Tools available:
{tools}

Example of a tool call:
"""

def create_conquer_critic_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates a critic agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the critic agent.
    """

    base_example = [
        HumanMessage(content="Variable names: CO2_emissions, energy_consumption, global_temperature_increase \n Hypothesis graph: (energy_consumption, CO2_emissions), (CO2_emissions, global_temperature_increase)"),
        AIMessage(content="I will evaluate the proposed causal graph for potential flaws and counterarguments.", name="CriticAgent"),
    ]

    final_answer = "The edge (energy_consumption, CO2_emissions) is questionable due to confounding factors. Final answer: <edges>(CO2_emissions, global_temperature_increase)</edges>"

    if "rag_assistant" in available_tools and ("human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools):
        # Case 1: Both RAG and Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="", name="CriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What are common confounding variables in studies of CO2 emissions and temperature? Are there any known counterexamples?"}, "id": "1"}]),
            ToolMessage("Confounding variables such as industrial activity and land use changes can influence both CO2 emissions and temperature.", tool_call_id="1"),
            AIMessage(content="The presence of confounding variables suggests that the relationship between CO2 emissions and temperature may not be direct. I need to investigate further.", name="CriticAgent"),
            AIMessage(content="", name="CriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Are there instances where increased energy consumption did not lead to higher CO2 emissions?"}, "id": "2"}]),
            ToolMessage("In some cases, countries have increased energy consumption through renewable sources, which do not emit CO2.", tool_call_id="2"),
            AIMessage(content="This indicates that the edge (energy_consumption, CO2_emissions) may not always hold true, especially in the context of renewable energy. Before I revise the hypothesis, I would like a human's opinion.", name="CriticAgent"),
            AIMessage(content="", name="CriticAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Given that renewable energy sources exist, is the edge (energy_consumption, CO2_emissions) always valid, or are there exceptions?"}, "id": "3"}]),
            ToolMessage(content="The edge (energy_consumption, CO2_emissions) is not always valid due to the increasing use of renewable energy sources.", tool_call_id="3"),
            AIMessage(content="Considering the confounding variables and the potential for anti-causal relationships, I propose the following adjustments: \n - The edge (energy_consumption, CO2_emissions) may not be valid in all contexts. \n - The edge (CO2_emissions, global_temperature_increase) remains plausible but requires further evidence.", name="CriticAgent"),
            AIMessage(content=final_answer, name="CriticAgent")
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG
        example = base_example + [
            AIMessage(content="", name="CriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What are common confounding variables in studies of CO2 emissions and temperature? Are there any known counterexamples?"}, "id": "1"}]),
            ToolMessage("Confounding variables such as industrial activity and land use changes can influence both CO2 emissions and temperature.", tool_call_id="1"),
            AIMessage(content="The presence of confounding variables suggests that the relationship between CO2 emissions and temperature may not be direct. I need to investigate further.", name="CriticAgent"),
            AIMessage(content="", name="CriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Are there instances where increased energy consumption did not lead to higher CO2 emissions?"}, "id": "2"}]),
            ToolMessage("In some cases, countries have increased energy consumption through renewable sources, which do not emit CO2.", tool_call_id="2"),
            AIMessage(content="This indicates that the edge (energy_consumption, CO2_emissions) may not always hold true, especially in the context of renewable energy.", name="CriticAgent"),
            AIMessage(content="Considering the confounding variables and the potential for anti-causal relationships, I propose the following adjustments: \n - The edge (energy_consumption, CO2_emissions) may not be valid in all contexts. \n - The edge (CO2_emissions, global_temperature_increase) remains plausible but requires further evidence.", name="CriticAgent"),
            AIMessage(content=final_answer, name="CriticAgent")
        ]
    elif "human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools:
        # Case 3: Only Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="Given the increasing use of renewable energy sources, I am questioning the validity of the edge (energy_consumption, CO2_emissions). I need human confirmation on this.", name="CriticAgent"),
            AIMessage(content="", name="CriticAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Given that renewable energy sources exist, is the edge (energy_consumption, CO2_emissions) always valid, or are there exceptions?"}, "id": "1"}]),
            ToolMessage(content="The edge (energy_consumption, CO2_emissions) is not always valid due to the increasing use of renewable energy sources.", tool_call_id="1"),
            AIMessage(content="Considering the potential for anti-causal relationships, I propose the following adjustments: \n - The edge (energy_consumption, CO2_emissions) may not be valid in all contexts. \n - The edge (CO2_emissions, global_temperature_increase) remains plausible but requires further evidence.", name="CriticAgent"),
            AIMessage(content=final_answer, name="CriticAgent")
        ]
    else:
        # Case 4: No tools
        example = base_example + [
            AIMessage(content="I question the validity of the edge (energy_consumption, CO2_emissions) due to potential confounding factors.", name="CriticAgent"),
            AIMessage(content="Considering the potential for anti-causal relationships, I propose the following adjustments: \n - The edge (energy_consumption, CO2_emissions) may not be valid in all contexts. \n - The edge (CO2_emissions, global_temperature_increase) remains plausible but requires further evidence.", name="CriticAgent"),
            AIMessage(content=final_answer, name="CriticAgent")
        ]

    return example

conquer_critic_agent_user_prompt = """Your task is to critically analyze the hypothesized causal graph based on the provided dataset. Here is the hypothesis you need to evaluate:

Dataset description: {general_description}

You will focus on the following variables:
Variable names: {variable_names}
Description of variables: {variable_description}
Hypothesis graph: {causal_graph}

You will perform the following steps until you reach a confident conclusion:
(1) Evaluate the proposed relationships between the variables. 
(2) Assess whether any edges are in the anti-causal direction (i.e., (effect, cause) instead of (cause, effect)). Consider that intervening on the effect should not change the cause, but not the viceversa.
(3) Do those causal relationships always hold, or only on some context? Think about counterfactual scenarios.
(4) Utilize the available tools to gather additional information and evidence to support your analysis. If available, use the human expert for confirmation or for contextual knowledge.

Finally, provide a cumulative list of directed edges identified at each iteration for every group of variables.

IMPORTANT: Ensure that the list of edges is enclosed within <edges> tags. For example: <edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
IMPORTANT: ABSOLUTELY RESPECT THE FORMAT. DO NOT USE ARROWS OR ANYTHING SIMILAR TO REPRESENT EDGES. ONLY USE THIS FORMAT, AND USE THE COMMA TO SEPARATE VARIABLE NAMES:<edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
"""

# Template to integrate partitions and their description into agents' prompts
group_template = """Variable names in group {i}: {variable_names_i}\n
Description of variables in group {i}: {variable_description_i}\n
"""

def format_group_template(group_list: list[PartitionNode]):
    """
    Function to format group_template using data stored in PartitionNode objects,
    It generates a string representation of the groups, which can be inserted in the {groups} field of prompts
    """
    groups_str = ""
    i=1
    for group in group_list:
        group_str = group_template.format(
            variable_names_i=", ".join(group.variable_names),
            variable_description_i=group.description,
            i=str(i),
        )
        groups_str += group_str
        i += 1

    return groups_str

merging_hypothesis_agent_system_prompt = """You are a {domain} domain expert responsible for evaluating causal relationships. In this context, each variable in a dataset represents a node in a causal graph.
You will receive different groups of variables, each accompanied by a description. The causal relationships within each group have already been assessed. Your objective is to analyze the relationships between these groups to hypothesize potential connections bridging the graphs. The goal is to merge them into a larger causal graph. Specifically, you will propose potential directed edges (cause, effect) between variables from different groups.
Utilize the available tools to verify and refine your hypotheses iteratively. Approach this task systematically, reasoning through each step and leveraging the tools to support your conclusions.

Available Tools:
{tools}

Example of a tool call:
"""

def create_merging_hypothesis_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates a merging hypothesis agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the merging hypothesis agent.
    """

    base_example = [
        HumanMessage(content="Variable names in group 1: CO2_emissions, energy_consumption, global_temperature_increase \n Variable names in group 2: deforestation_rate, renewable_energy_usage, industrial_activity"),
        AIMessage(content="I will merge the two groups into a single causal graph.", name="MergingHypothesisAgent"),
    ]

    final_answer = "Possible relationships between these two groups are now clear: renewable energy usage influences CO2 emissions directly. Deforestation does not cause emissions, but contributes to global temperature increase. Industrial activity affets CO2 emissions directly. Final Answer: <edges>(renewable_energy_usage, CO2_emissions), (deforestation_rate, global_temperature_increase), (industrial_activity, CO2_emissions)</edges>"

    if "rag_assistant" in available_tools and ("human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools):
        # Case 1: Both RAG and Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="", name="MergingHypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "How does renewable energy usage relate to CO2 emissions? Is it shown to have any impact on global temperatures? Are there other effects or confounders?"}, "id": "1"}]),
            ToolMessage("The scientific community mostly agrees that higher renewable energy usage contributes to decreasing CO2 emissions, but not to global temperature directly.", tool_call_id="1"),
            AIMessage(content="", name="MergingHypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Do deforestation and industrial activity contribute to global temperature increase? Are there intermediate factors to consider?"}, "id": "2"}]),
            ToolMessage("Deforestation is commonly attributed to an increase in CO2 in the atmosphere, and thus global warming. Higher industrial activity is linked both to higher CO2 emissions and a global temperature increase.", tool_call_id="2"),
            AIMessage(content="Based on the information I have, and the RAG results, I propose the following connections. Before finalizing, can a human confirm these relationships?", name="MergingHypothesisAgent"),
            AIMessage(content="", name="MergingHypothesisAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Are these relationships accurate: (renewable_energy_usage -> CO2_emissions), (deforestation_rate -> global_temperature_increase), and (industrial_activity -> CO2_emissions)?"}, "id": "3"}]),
            ToolMessage(content="Yes, those relationships are generally accurate.", tool_call_id="3"),
            AIMessage(content=final_answer, name="MergingHypothesisAgent")
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG
        example = base_example + [
            AIMessage(content="", name="MergingHypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "How does renewable energy usage relate to CO2 emissions? Is it shown to have any impact on global temperatures? Are there other effects or confounders?"}, "id": "1"}]),
            ToolMessage("The scientific community mostly agrees that higher renewable energy usage contributes to decreasing CO2 emissions, but not to global temperature directly.", tool_call_id="1"),
            AIMessage(content="", name="MergingHypothesisAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Do deforestation and industrial activity contribute to global temperature increase? Are there intermediate factors to consider?"}, "id": "2"}]),
            ToolMessage("Deforestation is commonly attributed to an increase in CO2 in the atmosphere, and thus global warming. Higher industrial activity is linked both to higher CO2 emissions and a global temperature increase.", tool_call_id="2"),
            AIMessage(content="Possible relationships between these two groups are now clear: renewable energy usage influences CO2 emissions directly. Deforestation does not cause emissions, but contributes to global temperature increase. Industrial activity affets CO2 emissions directly.", name="MergingHypothesisAgent"),
            AIMessage(content=final_answer, name="MergingHypothesisAgent")
        ]
    elif "human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools:
        # Case 3: Only Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="Based on the available variables, I propose the following connections.  Can a human confirm these relationships?", name="MergingHypothesisAgent"),
            AIMessage(content="", name="MergingHypothesisAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Are these relationships accurate: (renewable_energy_usage -> CO2_emissions), (deforestation_rate -> global_temperature_increase), and (industrial_activity -> CO2_emissions)?"}, "id": "1"}]),
            ToolMessage(content="Yes, those relationships are generally accurate.", tool_call_id="1"),
            AIMessage(content=final_answer, name="MergingHypothesisAgent")
        ]
    else:
        # Case 4: No tools
        example = base_example + [
            AIMessage(content="Possible relationships between these two groups are: renewable energy usage influences CO2 emissions directly. Deforestation contributes to global temperature increase. Industrial activity affects CO2 emissions directly.", name="MergingHypothesisAgent"),
            AIMessage(content=final_answer, name="MergingHypothesisAgent")
        ]

    return example

merging_hypothesis_agent_user_prompt = """Your task is to establish connections between variables from the following groups, which will introduce new edges in a directed causal graph. 
Output pairs of variables from different groups, ensuring that the relationships are consistent.

Dataset description: {general_description}

You will now focus on these specific groups:
{groups}

Follow these steps until you are confident in your final answer:
(1) Understand the context, about what is each group of variables modeling at a causal level. Understand how each groups is related to each other. You can use the tools to search for more information.
(2) Output a preliminary list of bridging edges that COULD POTENTIALLY be present between those groups of variables. DO NOT BE TOO CONSERVATIVE. AN EDGE MORE IS BETTER THAN AN EDGE LESS.
(3) Reflect and improve your estimate. 

Finally, present a single list of directed edges that connect the groups.

IMPORTANT: Ensure the list of edges is enclosed within <edges> tags. For example: <edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
IMPORTANT: ABSOLUTELY RESPECT THE FORMAT. DO NOT USE ARROWS OR ANYTHING SIMILAR TO REPRESENT EDGES. ONLY USE THIS FORMAT, AND USE THE COMMA TO SEPARATE VARIABLE NAMES:<edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
"""

merging_critic_agent_system_prompt = """You are a {domain} domain expert tasked with critically analyzing the proposed connections between variable groups in a causal graph. Your role is to evaluate the validity of the directed edges (cause, effect) and suggest modifications, particularly focusing on identifying edges that are in the wrong anti-causal direction, or that do not hold true.

You will receive a list of separate groups of variables, each with its description. You will also be provided with a set of causal edges bridging those groups. You need to understand how those groups are related.

Use a thought-action-observation process for your reasoning. You have access to tools to assist in verifying relationships by gathering prior knowledge. 

Available Tools:
{tools}

Example of a tool call:
"""

def create_merging_critic_agent_example(available_tools: list[str]) -> list[BaseMessage]:
    """
    Generates a merging critic agent example based on available tools.

    Args:
        available_tools (list): A list of strings representing the available tools.

    Returns:
        list: An example conversation for the merging critic agent.
    """

    base_example = [
        HumanMessage(content="Variable names in group 1: CO2_emissions, energy_consumption, global_temperature_increase \n Variable names in group 2: deforestation_rate, renewable_energy_usage, industrial_activity. \n Proposed connections: (renewable_energy_usage, CO2_emissions), (deforestation_rate, global_temperature_increase), (industrial_activity, CO2_emissions)"),
        AIMessage(content="I will analyze the proposed connections for validity and causal order.", name="MergingCriticAgent"),
    ]

    final_answer = "After evaluating the proposed connections, I suggest the following revisions: \n - The connection (renewable_energy_usage, CO2_emissions) is valid but requires acknowledgment of confounding factors. \n - The connection (deforestation_rate, global_temperature_increase) is valid but context-dependent. \n Final Answer: <edges>(renewable_energy_usage, CO2_emissions), (deforestation_rate, global_temperature_increase)</edges>"

    if "rag_assistant" in available_tools and ("human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools):
        # Case 1: Both RAG and Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="", name="MergingCriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Is there evidence that renewable energy usage directly causes a decrease in CO2 emissions? Are there confounding factors?"}, "id": "1"}]),
            ToolMessage("Studies indicate that while renewable energy usage is associated with lower CO2 emissions, other factors such as energy efficiency and policy changes also play significant roles.", tool_call_id="1"),
            AIMessage(content="The connection between renewable energy usage and CO2 emissions may be influenced by confounding factors. I will further investigate the relationship between deforestation and global temperature increase.", name="MergingCriticAgent"),
            AIMessage(content="", name="MergingCriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What is the relationship between deforestation and global temperature increase? Are there studies that suggest alternative causal pathways?"}, "id": "2"}]),
            ToolMessage("Deforestation is linked to increased CO2 emissions, which contribute to global warming. However, the direct impact of deforestation on temperature increase can vary based on local conditions.", tool_call_id="2"),
            AIMessage(content="The proposed connection (deforestation_rate, global_temperature_increase) appears valid, but it is essential to consider local environmental factors. Given the complexity, I would like a human to check the validity of my evaluation.", name="MergingCriticAgent"),
            AIMessage(content="", name="MergingCriticAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Is it accurate to say that the connection between renewable_energy_usage and CO2_emissions is valid but has confounding factors, and that the connection between deforestation_rate and global_temperature_increase is valid but context-dependent?"}, "id": "3"}]),
            ToolMessage(content="Yes, that is an accurate assessment.", tool_call_id="3"),
            AIMessage(content="I will summarize my findings.", name="MergingCriticAgent"),
            AIMessage(content=final_answer, name="MergingCriticAgent")
        ]
    elif "rag_assistant" in available_tools:
        # Case 2: Only RAG
        example = base_example + [
            AIMessage(content="", name="MergingCriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "Is there evidence that renewable energy usage directly causes a decrease in CO2 emissions? Are there confounding factors?"}, "id": "1"}]),
            ToolMessage("Studies indicate that while renewable energy usage is associated with lower CO2 emissions, other factors such as energy efficiency and policy changes also play significant roles.", tool_call_id="1"),
            AIMessage(content="The connection between renewable energy usage and CO2 emissions may be influenced by confounding factors. I will further investigate the relationship between deforestation and global temperature increase.", name="MergingCriticAgent"),
            AIMessage(content="", name="MergingCriticAgent", tool_calls=[{"name": "rag_assistant", "args": {"research_topic": "What is the relationship between deforestation and global temperature increase? Are there studies that suggest alternative causal pathways?"}, "id": "2"}]),
            ToolMessage("Deforestation is linked to increased CO2 emissions, which contribute to global warming. However, the direct impact of deforestation on temperature increase can vary based on local conditions.", tool_call_id="2"),
            AIMessage(content="The proposed connection (deforestation_rate, global_temperature_increase) appears valid, but it is essential to consider local environmental factors.", name="MergingCriticAgent"),
            AIMessage(content="I will summarize my findings.", name="MergingCriticAgent"),
            AIMessage(content=final_answer, name="MergingCriticAgent")
        ]
    elif "human_in_the_loop" in available_tools or "human_in_the_loop_ui" in available_tools:
        # Case 3: Only Human-in-the-loop
        hitl_tool = "human_in_the_loop" if "human_in_the_loop" in available_tools else "human_in_the_loop_ui"
        example = base_example + [
            AIMessage(content="Given the potential confounding factors and context-dependent relationships, I need human confirmation on the validity of these proposed connections.", name="MergingCriticAgent"),
            AIMessage(content="", name="MergingCriticAgent", tool_calls=[{"name": hitl_tool, "args": {"question": "Is it accurate to say that the connection between renewable_energy_usage and CO2_emissions is valid but has confounding factors, and that the connection between deforestation_rate and global_temperature_increase is valid but context-dependent?"}, "id": "1"}]),
            ToolMessage(content="Yes, that is an accurate assessment.", tool_call_id="1"),
            AIMessage(content="I will summarize my findings.", name="MergingCriticAgent"),
            AIMessage(content=final_answer, name="MergingCriticAgent")
        ]
    else:
        # Case 4: No tools
        example = base_example + [
            AIMessage(content="I suggest the following: The connection (renewable_energy_usage, CO2_emissions) is valid but requires acknowledgment of confounding factors. The connection (deforestation_rate, global_temperature_increase) is valid but context-dependent.", name="MergingCriticAgent"),
            AIMessage(content=final_answer, name="MergingCriticAgent")
        ]

    return example

merging_critic_agent_user_prompt = """Your task is to critically evaluate the proposed connections between the following groups of variables. 

Dataset description: {general_description}

You will now focus on these specific groups:
{groups}

Proposed connections:
{group_connections}

Follow these steps to analyze the connections:
(1) Evaluate the proposed relationships between the variables. 
(2) Assess whether any edges are in the anti-causal direction (i.e., (effect, cause) instead of (cause, effect)). Consider that intervening on the effect should not change the cause, but not the viceversa.
(3) Do those causal relationships always hold, or only on some context? Think about counterfactual scenarios.
(4) Utilize the available tools to gather additional information and evidence to support your analysis. If available, use the human expert for confirmation or for contextual knowledge.

Provide a summary of your findings, including any necessary revisions to the proposed connections.

IMPORTANT: Ensure the list of edges is enclosed within <edges> tags. For example: <edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
IMPORTANT: ABSOLUTELY RESPECT THE FORMAT. DO NOT USE ARROWS OR ANYTHING SIMILAR TO REPRESENT EDGES. ONLY USE THIS FORMAT, AND USE THE COMMA TO SEPARATE VARIABLE NAMES:<edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
"""

edge_list_format_fix_prompt = """
It seems like there is a formatting issue with parsing your previous output. The edges between variables should be a list of tuples, enclosed within tags, like this example: <edges>(A, B), (B, C)</edges>. Please review your previous message to make sure edges are properly formatted.
"""

groups_format_fix_prompt = """
It seems like there is a formatting issue with parsing your previous output. Each groups of variables, should be enclosed within <group> tags. Within each group, there should be a list of variable names between <nodes></nodes> tags, as well as a description between <description></description> tags. Take this as an example: <group><nodes>[A, B, C]</nodes><description>Description of the group</description></group>. Please review your previous message to make sure groups are properly formatted.
"""

critic_refine_user_prompt = """After running conditional independence tests, the following edges did not find enough support in the data. 

Uncertain edges: {edges}

The uncertainty might arise because certain causal relationships might not be identifiable from the data, or because that causal relationship does not exist.

Please clarify this by reasoning and searching more evidence, through tool calls, for each one of those causal relationships. If you cannot find any, please state which edges are not supported by the data.

Finally, output an updated list of edges within <edges> tags. For example: <edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
THE NEW LIST SHOULD CONTAIN ONLY THOSE EDGES THAT YOU THINK ARE TRUE CAUSAL RELATIONSHIPS.

IMPORTANT: ABSOLUTELY RESPECT THE FORMAT. DO NOT USE ARROWS OR ANYTHING SIMILAR TO REPRESENT EDGES. ONLY USE THIS FORMAT, AND USE THE COMMA TO SEPARATE VARIABLE NAMES:<edges>(Variable1, Variable2), (Variable2, Variable3)</edges>"""

non_estisting_variables_prompt = """After reviewing the edge list of the causal graph, it appears that some variables have been introduced that do not exist in the dataset.
Variables you should focus on: {variable_names}
Nonexistent variables: {nonexisting_nodes}

This issue might have arisen due to a mix-up in variable names or acronyms. Please revise your previous output to ensure that only valid variables from the dataset are included in the edge list. Double-check for any potential naming errors or misinterpretations.

Finally, output an updated list of edges within <edges> tags. For example: <edges>(Variable1, Variable2), (Variable2, Variable3)</edges>
THE NEW LIST SHOULD CONTAIN ONLY THOSE EDGES THAT INVOLVE VALID VARIABLES FROM THE DATASET."""

groups_wrong_partitioning_fix_prompt = """After reviewing the nodes in the new sub-partition, it appears that some variables have been introduced that are not present in the parent partition.

This issue might have arisen due to a mix-up in variable names or acronyms. Please revise your previous output to ensure that only valid variables from the parent partition are included. Double-check for any potential naming errors or misinterpretations.

IMPORTANT: Variable labels should be the same as in the input. Each input variable should be assigned to a subgroup. Double check as to not introduce new variables by accident.
IMPORTANT: Ensure each subgroup is enclosed within <group> tags. For example: 
<group><nodes>[Variable1, Variable2]</nodes><description>Description of the subgroup</description></group>
IMPORTANT: DO RESPECT THE ABOVE FORMAT. 
DO NOT ADD \\n NEWLINE CHARACTERS OR LONG SPACES BETWEEN EACH NODE. JUST RESPECT THE FORMAT ABOVE AND SEPARATE VARIABLE NAMES WITH A COMMA."""