# REGENERATE_GRAPH = "<regenerate_graph>"
# KEEP_FACTOR = "<keep_factor>"
# KEEP_GRAPH = "<keep_graph>"
# KEEP_RULE = "<keep_rule_json>"


gpt5_system_prompt = """
# Role and Objective

- Identify major observable and binary factors and outcomes in a given scenario, focusing on clearly visible, independent, and deterministic influences suitable for reliable video-based analysis.

# Checklist (plan before action)
- Review scenario description.
- Identify potential factors and outcomes per criteria.
- Ensure each item is visible, binary, independent, and deterministic.
- Explain how each is directly observed on video.
- Prioritize and limit output per `10`.

# Instructions
- Given a brief scenario description, list important and common potential factors (causes) and outcomes (effects) that significantly influence or result from key physical phenomena in the scenario.
- Factors must fit within these categories:
1. Properties of scenario objects.
2. Properties of environmental objects or the environment.
3. Properties of actions or the actions themselves.
- Each factor and outcome must meet these criteria:
1. **Clearly visible** in standard video footage, not inferred from hidden or indirect evidence.
- Explain how the visual distinction can be made for each item.
2. **Binary**: Classifiable as "yes” or “no” (true/false), not continuous values.
3. **Independent**: True/false assignment to any factor does not depend on others; minimize overlap or correlation.
4. **Deterministic**: Each factor has a direct, predictable effect on an outcome.
5. **Outcome visibility**: Outcomes should be directly observable in the video.
- If a pair of brackets appears in the description, treat the content in brackets as an expected variable but consider proposing others where relevant.
- Expected count of factors is roughly between `2` and `5`; some flexibility is allowed.
- Limit total count of factors plus outcomes to `10`; prioritize by commonality and importance, merging or dropping near-duplicates, trivial, or overly detailed entries.

Order both lists by importance to the scenario (most important first).
- Validate after listing: verify each entry meets all criteria. If any entry fails, self-correct or remove it before final output.
## Prohibited Practices
- Do NOT split one factor into multiple based on true/false value (e.g., “fast” vs. “slow” as two nodes).
- Do NOT use overlapping terms for different factors (e.g., avoid both “bounce” and “bouncy” as separate elements).
"""


# message_system = "You will be given a short description of a scenario. You should propose some possible factors where changing them may significantly affect the results. These factors could be (1) the objects or their properties in the scenario, (2) the environment, or (3) the actions.\n These factors should (1) be visible and easy to be recognized in a video, (2) be binary and can be labeled as \"yes\" or \"no\", instead of a continuous value, (3) be independent of each other,(4) the effect of the factors should be deterministic, not increase or decrease the probability of a certain result (5) the results should be also visible in a video. You should list the factors and shortly explain how they affect the scenario."

factor_requirements = """
1. It should be **visible** and easily recognizable in a video. For example, transparent objects like wind is not visible and detailed property of small objects are not recognizable in a video. Also notice that it must be directly visible in the video, not inferred from other factors or results. For example, "object is elastic" should be inferred from its material appearance, not from the result "object bounces." Always explain how each factor can be distinguished from the video.
2. It should be **binary**, meaning it can be clearly labeled as either "yes" or "no", rather than a continuous value.
3. It should be **independent**, neither related to or contradictory to other factors. It means the True or False value of one factor can be assigned independently of the values of other factors.
4. Its effect on the outcome should be **deterministic** (i.e., it directly leads to a certain result, rather than just increasing or decreasing the probability).
5. The resulting effect should also be **visible** in a video.
"""

factor_prohibitions = """
1. DO NOT split the True or False of the same factor into two separate factors. For example, 'movement is fast' and 'movement is slow' SHOULD NEVER be treated as two different factors. Similarly, 'phenonmenon appears' and 'no phenonmenon' should not be treated as two different results.
2. DO NOT use overlapping or similar descriptions for different factors. Reduce correlation between factors. For example, do not use both "bounce" and "bouncy" simultaneously.
"""
# . DO NOT miss any results. Ensure that every explained factor explicitly leads to a stated result. For example, if you explain "block may move" due to a factor, you must also include "block moves" as a result.

explicit_disturb = """
If there is a pair bracket in the description, it means the content in the bracket is expected to be a variable (factors or outcome). For example, "A (large) stone is thrown into a swimming pool (and splash water)." means we expect "does the water splash" as one of the outcome and whether the stone is large enough is expected as one of "factors". But notice that it does not mean that other factors or outcomes are not allowed, you can also propose other factors or outcomes.
"""

check_explitcit_disturb = """
Please ensure that the content in the bracket has been correctly identified as a variable (factor or outcome) in your answer.
"""

message_system = """
You will be provided with a brief description of a scenario. There could be some physical phenomenon in this scenario. Please identify some **important** and **common** potential factors whose changes could significantly influence some important outcome of the scenario. These factors can fall into one of the following categories:
1. The objects or their properties in the scenario.
2. The object in the environment or the properties of the environment.
3. The actions or some properties of the action. 

For example, if the scenario is 'Something is thrown into a swimming pool', the properties of the thrown object (e.g., size, weight, material), the properties of the water (e.g., depth, temperature, be covered by something or not), and the action of throwing (e.g., speed, angle), or the environment (e.g., the wind) could all be factors that influence the outcome (e.g., whether the ball hits the water, whether water splashes).

For each factor, ensure that it meets the following criteria:
""" + factor_requirements + "Make sure not to violate the following prohibitions:" + '\n' +  factor_prohibitions + explicit_disturb + "\nPlease list all the factors and results you identified in your answer."
# + """
# Please organize your answer as a **json** file as a list of dict, where each dict is like {"type": "factor_or_result", "name": "factor_or_result_name", "explanation": "how it affects the scenario and why you believe it is important and common"}. Start your answer with a <json> tag and end with a </json> tag.
# """

complex_prompt = """
We expect the number of the reason factors and roughly in the range of {low_node} to {high_node}. In some cases, it is acceptable to have a few more or fewer factors.
"""

node_limit_note = """
The TOTAL number of nodes (factors + results) must be no more than {node_limit}. If you have too many candidates, keep only the most important and common ones, merge near-duplicate ones, and drop trivial or overly detailed ones.
"""

message_user = 'The scenario is: "{scenario}"'

# message_analyze_graph = "Given your proposed factors and expected results, please generate a causal graph to summarize the physical relations between them. In the graph, you should contain the important and common factors while too detailed and trivial factors should be omitted. The causal graph should be a directed acyclic graph, where each node represents a factor, and each edge represents a direct causal relation. The causal graph should be organized as dot format. The dot file should be started as a <dot> tag and ended as a </dot> tag."

message_analyze_graph = """
Based on the factors and their expected results you proposed, generate a causal graph that summarizes the physical relationships between them.
The graph should be a **directed acyclic graph**, where: 
 - Each **node** represents a factor or a result. 
 - Each **edge** represents a direct causal relationship between two nodes. 
Ensure that only the **important** and **deterministic** causal relationships are included in the graph, while omitting trivial or overly detailed relationships.
Your final graph should not contain any cycles, or isolated nodes. Your graph should be consistent with the factors and results you proposed earlier while each factor should has no incoming edge and each result should has at least one incoming edge. All factors and results should be included in the graph. 
"""

remind_factors = """
The scenario is: "{scenario}"

The generated factors:

```
{factors}
```

"""

# message_analyze_rules = "Given your causal graph, please generate a bool expression for each non-root factor, which represents the conditions for this factor to be true. And the variables in the bool expression should be exactly the parents of this factor in the causal graph. You should organize your causal graph as dot format and your bool expressions should be dict[str, list[dict[str, bool]]], where the key is the name of the non-root factors and the value is the bool expression. For example, if a factor A = B or (C and not D), the bool expression should be {\"A\": [{\"B\": True}, {\"C\": True, \"D\": False}]}. \n Your final answer should be a json file with \"roots\"(a list of str), \"non_roots\"(a list of str), \"rules\"(a dict[str, list[dict[str, bool]]]). Please start your final answer with a <json> tag and end with a </json> tag. If you find some mistakes in your generated causal graph, please generate \"<regenerate_graph>\" and then you can regenerate the causal graph as we mentioned before."
message_analyze_rules = """
Given the causal graph you generated, please create a Boolean expression for each **non-root** factor (factors with incoming edges) that represents the conditions under which that factor is **true**. The Boolean expression for each non-root factor should involve only the **parent factors** (i.e., the factors directly connected to it in the causal graph). The condition should be expressed as a **disjunctive normal form** (DNF), which is a disjunction (OR) of conjunctions (AND) of literals. 
"""

remind_factors_and_graph = """
The scenario is: "{scenario}"

The generated factors:
```
{factors}
```

The generated causal graph:
```
{graph}
```

"""

message_self_check_factors = "Please review the factors you have proposed. Ensure that each factor satisfies the following 5 requirements:" + factor_requirements + "Make sure not to violate the following prohibitions:" + factor_prohibitions + check_explitcit_disturb + \
f"""
Additionally, filter out any factors that are:
- **Too detailed**, **corner-case**, or **uncommon** in the scenario.
- Have an effect that is **too indirect** or difficult to understand.

If necessary, you may regenerate the factors to meet the criteria. It's OK to keep your previous answer but you should carefully check every requirement for every factor and result.
"""

message_self_check_graph = """Please review your causal graph. Ensure that it meets the following criteria:
1. All nodes are **visible** and **binary**.
2. All root nodes are **independent** of each other, which means the choice of one root node should not influence the choice of another root node.
3. All edges in the graph is a **direct** and **deterministic** causal relation
4. Include all **important** causes and results, while omitting trivial or overly detailed nodes.
""" + check_explitcit_disturb + f"""
If necessary, regenerate the causal graph to meet these requirements. It's OK to keep your previous graph if it already meets the criteria but you should carefully check every requirement for every node and edge.
"""

message_self_check_rules = f"""Please review your answer. Ensure your answer meets the following criteria:
1. The "roots" and "non_roots" list must be consistent with the causal graph. 
2. For the bool expressions:
    - All the non-root factors are included in the rules dict, and no other factors are mistakenly included as keys
    - All variables in the Boolean expressions are exactly the parents of the corresponding non-root factors in the causal graph
    - The boolean expressions should correctly represent the physical rules in the real world.
If necessary, regenerate the json file to meet the requirements. It's OK to keep your previous rules if they already meet the criteria but you should carefully check every requirement for every variable and rule."""

message_self_check_again = "Please review your {object} again. If necessary, you can regenerate the {object}. It's OK to keep your previous {object} if it already meets the criteria but you should first carefully check it."

message_regenerate_factors_allow = f"If you find that you need to modify your generated factors, please generate the modified factors instead the graph."

message_regenerate_graph_allow = f"If you find that you need to modify your generated causal graph, please generate the modified graph instead of the rules."