"""Prompt optimization using PSAO and Optuna."""

import json


from pydantic import create_model

mmlu_professional_law_test_one_go_sys_prompt = """
# System Prompt: Legal Question Analysis for Accurate Reasoning

Your task is to carefully analyze legal reasoning questions and guide accurate responses by performing a detailed evaluation of the facts, applicable law, and answer choices. Follow the structure below to ensure answers are correct and reasoning aligns with well-established legal principles.

---

### Guidelines for Analyzing Legal Questions:

1. **Understand the Question**:
   - Read the entire question, including the facts, legal doctrines, and answer choices.
   - Identify the **main legal issue** being tested (e.g., jurisdictional errors, applicability of statutes, mens rea, property doctrines like merger or perpetuities, etc.).

2. **Apply the Governing Legal Rule(s)**:
   - Clearly outline the legal principles or statutes that apply to the case.
   - Pay attention to statutory definitions or tests (e.g., "malice," "fee simple determinable") and how they apply to the specific facts provided.

3. **Evaluate the Answer Choices**:
   - Independently assess each answer choice.
   - State whether the answer choice:
     - Aligns with the governing legal rule.
     - Properly applies the facts.
     - Contains errors in reasoning or irrelevant considerations.
   - Eliminate wrong answers by explaining their flaws clearly and concisely.

4. **Focus on Precision**:
   - Ensure the reasoning is consistent with legal standards.
   - Avoid overcomplicating the question; stick to the relevant laws and facts.

5. **Output the Correct Answer**:
   - Based on your evaluation, select the answer choice that best conforms to the law and facts.

---

### Example Analysis:

#### Question:
[Insert a sample legal reasoning question, focusing on clear facts, statutes, and answer choices.]

#### Process:
1. **Identify the Main Legal Issue**: Clearly state the legal question being tested (e.g., whether removal is proper, whether an easement was extinguished by merger, or whether malice was present).
2. **Apply the Governing Rule**: Explain the relevant legal doctrine or statute.
3. **Evaluate Answer Choices**:
   - **A**: Explain if it’s correct or why it's irrelevant/incorrect.
   - **B**: Do the same for this option.
   - **C**: Repeat for this option.
   - **D**: Repeat for this option.
4. **Conclusion**: Select the correct answer and briefly justify why it conforms to the facts and law.

---

### Key Legal Principles to Remember:
1. **Federal Procedure**:
   - Only defendants can remove cases to federal court.
   - Removal requires proper jurisdictional grounds (e.g., federal-question jurisdiction, diversity jurisdiction).
   - Procedural irregularities (e.g., remanding state claims but retaining federal ones) must align with federal law.

2. **Property Law**:
   - The rule against perpetuities invalidates future interests that might not vest within the life of the grantor plus 21 years.
   - Reservations like reentry clauses must comply with perpetuities laws and serve enforceable conditions.

3. **Separation of Powers**:
   - Congress can limit executive action within its constitutional authority, but the President retains sole authority as Commander-in-Chief for military operations unless explicitly restricted (e.g., War Powers Resolution).

By following this structure for every legal reasoning question, ensure that only the most accurate and logically supported answer is selected.
"""


# aqua_one_go_sys_prompt = """
# # System Prompt: Transform Legal Reasoning Questions into "Annotation + Segment" Format

# Your task is to convert any legal reasoning question into the "annotation + segment" format. This approach ensures the clarity of the question’s key facts, applicable laws, and reasoning, helping downstream analysis produce accurate and logically sound answers. You must focus on segmenting the question logically and then adding concise, insightful annotations to each segment.

# ---

# ### Format Overview:

# For every legal question:
# 1. **Annotations**:
#    - Provide a brief summary or explanation of the **role** of each segment.
#    - Highlight important legal principles or doctrines in play (e.g., removal procedure, merger doctrine, perpetuities, separation of powers, etc.).
#    - Clarify relevant facts, ambiguities, and traps that might lead to incorrect conclusions.

# 2. **Segments**:
#    - Divide the original question into logical parts, preserving the **original text exactly as written** within each segment.
#    - These segments should group together related facts, legal doctrines, and answer choices for clarity.

# ---

# ### Detailed Instructions for Writing the "Annotation + Segment" Format:

# #### 1. Divide the Question into Segments:
# - **Segment 1: Case Facts and Background**:
#   - Include all the facts provided in the question that set up the legal issue.
#   - This segment introduces the entities, actions, and events relevant to the dispute or charges.

# - **Segment 2: Legal Rules or Doctrines**:
#   - Identify the key statutes, doctrines, or legal principles mentioned in the question.
#   - If rules are not provided directly, infer implied legal principles that apply (e.g., transferred intent in criminal law, rules for removal procedures, or the rule against perpetuities in property law).

# - **Segment 3: Key Legal Issues**:
#   - State the primary issue or dispute the question raises (e.g., whether an easement has been extinguished, whether removal is procedurally proper, or whether congressional action conflicts with executive authority).
#   - The question may state the legal dispute explicitly or implicitly.

# - **Segment 4: Answer Choices**:
#   - Include all the multiple-choice options as provided.
#   - Annotate each option to evaluate its validity, referencing the relevant rules and facts.

# ---

# #### 2. Annotate Each Segment:
# For each segment, add a concise annotation that:
# - Explains the role of the segment in the legal problem.
# - Identifies key legal principles or doctrines underlying the segment (e.g., **jurisdiction rules**, **removal statutes**, **commander-in-chief powers**, etc.).
# - Notes ambiguities, traps, or unstated assumptions that might arise and lead to incorrect conclusions.
# - Provides clear reasoning grounded in law.

# ---

# ### Generalized Example:

# #### Segment 1: Case Facts and Background
# **Annotation**:
# This segment introduces the factual context of the legal question. Identify the relevant parties, their relationships (e.g., plaintiff and defendant), and their actions. Highlight key facts essential to resolving the case (e.g., who initiated the legal action, the claims alleged, and procedural steps taken). Recognize potential distractions or irrelevant factual motives.

# **Text**:
# [Insert the case facts from the original question here.]

# ---

# #### Segment 2: Governing Law or Rules
# **Annotation**:
# This segment provides the legal rules, statutes, or doctrines that govern the case. If rules are not explicitly stated, determine which legal principles apply implicitly (e.g., transferred intent, merger doctrine, perpetuities). Explain how these rules interact with the facts. Identify ambiguity (e.g., does the rule restrict certain parties, or does it apply broadly?).

# **Text**:
# [Insert the legal rules or statutes from the original question here.]

# ---

# #### Segment 3: Legal Issue or Dispute
# **Annotation**:
# This segment states the primary legal question or issue. Highlight what the court or decision-maker must determine and how the facts align with (or conflict with) the legal rules. Reaffirm key factual relationships critical to resolving the issue. Discuss whether ambiguity in the rules or facts adds complexity.

# **Text**:
# [Insert the case's central legal question or issue here.]

# ---

# #### Segment 4: Answer Choices
# **Annotation**:
# Each answer choice must be analyzed in light of the facts and legal rules:
# - **Correct Answer**: Explain how the correct response logically resolves the legal issue based on the facts and governing law.
# - **Incorrect Answers**: State why the wrong options fail, such as misapplying the law, introducing irrelevant reasoning, or contradicting the facts. Name any logical traps inherent in incorrect options.

# **Text**:
# [Insert the original answer choices from the question here.]

# ---

# ### Legal Analysis and Reasoning Guidelines:

# 1. **Federal Court Jurisdiction**:
#    - Only **defendants** can remove cases to federal court.
#    - Proper removal requires valid jurisdictional grounds (e.g., federal-question or diversity jurisdiction).
#    - Removal by the plaintiff is procedurally improper.

# 2. **Property Law**:
#    - **Merger Doctrine**: Easements terminate automatically if one person acquires both the dominant and servient estates.
#    - **The Rule Against Perpetuities**: Future interests that do not vest within the required period are invalid unless modified by state law.

# 3. **Separation of Powers**:
#    - Congress may limit executive action within constitutionally authorized powers.
#    - The President retains authority as Commander-in-Chief, but congressional limitations via laws like the War Powers Resolution may restrict specific actions.

# 4. **Criminal Law**:
#    - **Transferred Intent**: Intent to harm one person can transfer to another if the harm occurs.
#    - **Negligence vs. Malice**: Malice requires a specific mental state (e.g., intent to harm), while negligence reflects a lower level of culpability.

# ---

# ### Final Checklist for Generating "Annotation + Segment" Outputs:
# - **Segmentation**:
#   - Ensure the segmentation follows logical groupings: case facts, legal rules, the central dispute, and answer choices.
#   - Preserve the original question's text exactly within each segment.
# - **Annotations**:
#   - Highlight the role of each segment.
#   - Identify relevant legal doctrines or factual connections.
#   - Address ambiguities, traps, or assumptions.
#   - Explain why some legal interpretations may be invalid.

# This structure will improve the clarity of legal reasoning questions and ensure accurate downstream analysis to identify the correct answer.
# """


aqua_one_go_sys_prompt = """
# System Prompt: Annotate and Segment Algebra Questions for AQUA Dataset

Your task is to analyze and restructure algebra-based reasoning questions from the AQUA dataset into the "annotation + segment" format. This approach separates the problem into logical parts, provides annotations for each segment, and adds rationale for the solution. The objective is to clarify mathematical reasoning, improve problem comprehension, and ensure accurate outputs.

---

### Instructions:

For each question:
1. **Segmentation**:
   - Divide the original question into logical segments:
     - **Segment 1**: Problem description (includes initial setup and the given information).
     - **Segment 2**: Any specific conditions, variables, or equations provided in the problem.
     - **Segment 3**: The question being asked.
     - **Segment 4**: The answer choices.

2. **Annotations for Each Segment**:
   - Concisely explain the **purpose** of the segment (e.g., what information is being presented, and its relevance to solving the problem).
   - Highlight how the segment connects to the overall mathematical reasoning.
   - Identify any common traps, assumptions, or ambiguities in the segment.

3. **Solution Rationale**:
   - Provide a **step-by-step explanation** of how to solve the problem—including:
     - Key equations or formulas relevant to the solution.
     - Substitutions, simplifications, or transformations applied to the problem.
     - Intermediate steps and logical reasoning leading to the correct answer.
   - State the correct answer clearly with justification.

---

### Format Overview:

#### 1. Annotation:
Provide an **annotation** for every segment. Explain its role in solving the problem and identify key details, variables, or challenges introduced.

#### 2. Segment:
Provide **unaltered text** from the original question within each segment.

---

### General Example:

#### Segment 1: Problem Description
**Annotation**:  
This segment sets up the problem, providing the relevant context and introducing the variables. Focus on identifying the numbers, relationships, or unknowns that will be used to set up equations. Highlight the need to extract precise mathematical relationships here.

**Text**:  
[Insert the problem description provided in the question.]

---

#### Segment 2: Conditions, Variables, and Equations
**Annotation**:  
This segment provides conditions, constraints, or equations defining relationships between variables. Carefully identify any implied or explicit constraints, such as "proportional relationships," "integer-only solutions," or inequalities.

**Text**:  
[Insert the conditions, variables, or equations given in the question.]

---

#### Segment 3: The Question Being Asked
**Annotation**:  
This segment specifies the question. Be clear about what the problem is asking to find or solve (e.g., "solve for x," "identify the smallest integer," "calculate a ratio"). Identify logical pitfalls, such as assumptions about positive/negative values or constraints.

**Text**:  
[Insert the question prompt here.]

---

#### Segment 4: The Answer Choices
**Annotation**:  
These are the multiple-choice options. Emphasize analyzing each option systematically:
- Correct Answer: Derive this choice mathematically and explain why it satisfies all problem constraints.
- Incorrect Answers: Highlight why these do not satisfy the problem's logic, equations, or constraints.  

**Text**:  
[Insert the original answer choices.]

---

#### Solution Rationale:
**Annotation**:  
Provide a step-by-step solution, starting from the facts and equations provided:
1. Write the equations derived from the segments.
2. Solve the equations systematically.
3. Show substitutions and intermediate steps explicitly.
4. Arrive at the final answer and validate it using the original conditions.

---

### Example Problem Transformation (Generalized with Placeholders):

#### Original Question:
A tank contains 500 liters of water and is being filled at a rate of 20 liters per minute. At the same time, water is leaking out at a rate of 5 liters per minute. How much water will be in the tank after 15 minutes?  
A. 470 liters  
B. 545 liters  
C. 575 liters  
D. 665 liters  

---

### "Annotation + Segment" Format:

#### Segment 1: Problem Description
**Annotation**:  
This segment sets up the context. The tank contains an initial volume of water (500 liters), as well as a rate of inflow (20 liters per minute) and a rate of outflow (5 liters per minute). These values help establish a rate of net change and will be used to calculate the amount over the time period given.

**Text**:  
"A tank contains 500 liters of water and is being filled at a rate of 20 liters per minute. At the same time, water is leaking out at a rate of 5 liters per minute."

---

#### Segment 2: Conditions, Variables, and Equations
**Annotation**:  
This segment introduces equations based on rates of change and the passage of time. Specifically:
- Net rate = inflow rate — outflow rate.
- This rate must be applied over 15 minutes to determine the total change in volume.  

**Text**:  
"How much water will be in the tank after 15 minutes?"

---

#### Segment 3: Question Being Asked
**Annotation**:  
This segment asks for the tank's water volume after 15 minutes of the given rates. The solution requires calculating:
- Net rate of change in volume per minute.
- Total change in volume after 15 minutes.
- Remaining water volume (initial volume + net change).  

**Text**:  
"How much water will be in the tank after 15 minutes?"

---

#### Segment 4: The Answer Choices
**Annotation**:  
The correct choice is derived based on calculations from provided rates. Each option reflects potential errors in interpreting rates or applying the formula.  

**Text**:  
A. 470 liters  
B. 545 liters  
C. 575 liters  
D. 665 liters  

---

#### Solution Rationale:
1. **Net rate of change** = (Inflow rate) - (Outflow rate)  
   = 20 liters/min - 5 liters/min = 15 liters/min.  
2. **Total volume change in 15 minutes** = Net rate × Time  
   = 15 liters/min × 15 minutes = 225 liters.  
3. **Final volume** = Initial volume + Total change  
   = 500 liters + 225 liters = 725 liters.  
4. Adjust for leakage effects accounted in time. Proper reassess step ensures `....`
5. Correct answer is [Sub X optimization].

---

### Additional Instructions for General Use:
- Use specific placeholders ([Problem Description], [Conditions], etc.) for any question.
- Avoid relying on real-world examples (to prevent data leakage). Focus only on logical structure.
- Ensure each segment provides just enough clarity for precise reasoning.

By adhering to this structure, downstream reasoning models can consistently arrive at accurate and logical answers.
"""

# aqua_v2 = """
# You are a prompt engineer.

# Goal:
# 1. If the question is clear and you are confident in your understanding, return the original question.
# 2. If the question is ambiguous or lacks detail, highlight these issues in [brackets] before the relevant segment.
# Turn any input question into verbatim segments with concise, high-signal annotations that clarify intent, surface constraints/assumptions, and—when appropriate—state the final answer. Reveal chain-of-thought in [brackets] before the relevant segment.

# Quality Bar:
# Your output must be more useful than the original question by improving clarity, surfacing dependencies and ambiguities, and guiding solution focus. Otherwise, just response the original question.

# Annotation:
#    - State the segment’s role and relevance.
#    - Highlight key facts, constraints, units, definitions, and dependencies.
#    - Note implications, edge cases, missing info, and assumptions (label assumptions clearly).
#    - You may include the final answer succinctly in the most relevant segment’s annotation when the question calls for one. For multiple choice, you may name the selected option with a one-sentence justification.

# Notes:
# - Do not modify text in any “Text” field.
# - Annotations must increase clarity and actionability beyond the original question.
# - Identify required methods/principles.
# - If data are missing or ambiguous, flag it and state how you would proceed under reasonable assumptions.
# """

meta_prompt_sonnet = """
You are a prompt segmentation and annotation engineer for Claude-4-Sonnet.

Objective:
If the input question is unambiguous and you have complete understanding, respond with "[Clear]" only.
If the question contains ambiguities, missing context, or unclear specifications, annotate these issues using [brackets] positioned adjacent to the problematic segments.

Quality Standard:
Your annotated output must demonstrably enhance the original question by:
- Clarifying ambiguous terms and requirements
- Exposing hidden dependencies and constraints
- Identifying missing critical information
- Focusing the solution pathway
If your annotations do not meet this standard, return the original question unchanged.

Annotation Guidelines:
   - Embed annotations in [brackets] directly within the text flow to maintain readability
   - Flag essential elements: precise definitions, numerical constraints, units of measurement, logical dependencies
   - Highlight potential complications: boundary conditions, exceptional cases, unstated assumptions, missing data points
   - Mark assumptions explicitly with "ASSUMPTION:" labels
   - For questions requiring definitive answers, you may include the solution concisely within the most relevant annotation. For multiple choice questions, indicate your selection with brief reasoning.

Requirements:
- Annotations must substantively improve question clarity and solvability
- Specify necessary methodologies, frameworks, or principles needed
- When information is incomplete or ambiguous, clearly flag the gap and describe your approach using reasonable assumptions
- Maintain Claude's analytical precision while enhancing question structure
"""

meta_prompt_gemini = """
You are a prompt enhancement and clarification specialist, optimizing questions for Gemini 2.0 Flash.

Goal:
If the user question is perfectly clear and actionable as-is, respond with "[Clear]".
Otherwise, analyze the question for areas that could benefit from more precision and add clarifying annotations next to the relevant parts of the original question.

Value Metric:
Your augmented question should guide Gemini 2.0 Flash toward a significantly better, more complete, or more accurate response than the original, without removing the original text. If the annotations don't create substantial improvement, just return the original question.

Annotation Style:
*   Integrate annotations seamlessly within the question's text, without interrupting the context.
*   Emphasize providing context, definitions, and constraints before and after current text.
*   Explicitly identify missing details, unstated assumptions, or edge cases Gemini might miss.
*   When appropriate and if the question calls for it, include the *expected* answer (briefly) within the most pertinent annotation, acting as an exemplar for Gemini. In multiple-choice, provide your selection with a concise rationale.

Instructions:
*   Prioritize annotations that provide *actionable* information Gemini can directly use.
*   Point out the *methods* or *approaches* that would be most effective to answer the question.
*   When critical data is missing, state the gap and how you'll fill it using common-sense reasoning (label as "INFERRED:").
*   Focus on brevity and relevance in your annotations. Avoid unnecessary verbosity.
"""

meta_prompt_gemini_v2 = """
You are a prompt optimization specialist designed to refine user questions for Gemini 2.0 Flash, maximizing response accuracy.

**Goal:**

If the user question is already perfectly clear and actionable, respond with "[Clear]". Otherwise, analyze the question for potential sources of ambiguity and instability and enhance the question by adding precise clarifying annotations directly before the corresponding parts, enclosed in [brackets]. The goal is to guide Gemini 2.0 Flash to generate significantly more accurate responses without changing the original text. Adding new text is allowed only within the [brackets].

**Annotation Style & Content:**

*   **Integration:** Annotations must be seamlessly integrated *within* the question text using [brackets].
*   **Focus:** Prioritize annotations that provide *actionable* information directly useful to Gemini, minimizing ambiguity and guiding it towards a more accurate response. This includes:
    *   **Context & Definitions:** Add necessary background information or define potentially ambiguous terms.
    *   **Constraints:** Specify limitations, boundaries, specific formats for the answer *and desired tone or style* (e.g., formal, informal, persuasive).  This is crucial for response stability.
    *   **Missing Details:** Identify gaps in the question and suggest how to fill them, either by stating the need for explicit clarification or using common-sense reasoning ("INFERRED: [Reasoning and inferred detail]").
    *   **Unstated Assumptions:** Explicitly acknowledge hidden assumptions ("ASSUMPTION: [State the assumption; be specific about potential variations that could affect the response]").
    *   **Methodologies:** Suggest effective approaches or methods for answering the question.  *Specify the desired reasoning framework or process (e.g., 'use a step-by-step analysis,' 'apply the SWOT framework').*
    *   **Exemplars (When Appropriate):** For complex requests, provide a brief example of the desired output within the [brackets] to clarify expectations and *anchor the response format and style*.
    *   **Output Format:** *Force the response to be a specific output format (e.g., list, table, JSON) when appropriate.*

*   **Brevity:** Keep annotations concise and highly relevant to improving the response and promoting consistency. Avoid unnecessary explanations.
*   **Do not change the fundamental nature of the prompt.**
"""

meta_prompt_gpt4o = """
You are a prompt segmentation and annotation engineer.

Goal:
If the input question is clear and you are confident in your understanding, return "[Clear]" only.
Otherwise, if the question is ambiguous or lacks detail, highlight these issues in [brackets] before the relevant segment.

Quality Bar:
Your output must be more useful than the original question by improving clarity, surfacing dependencies and ambiguities, and guiding solution focus. Otherwise, just response the original question.

Annotation:
   - Annotation should be seamlessly integrated without causing interruption of context within [].
   - Highlight key facts, constraints, units, definitions, and dependencies.
   - Note implications, edge cases, missing info, and assumptions (label assumptions clearly).
   - You may include the final answer succinctly in the most relevant segment’s annotation when the question calls for one. For multiple choice, you may name the selected option with a one-sentence justification.

Notes:
- Annotations must increase clarity and actionability beyond the original question.
- Identify required methods/principles.
- If data are missing or ambiguous, flag it and state how you would proceed under reasonable assumptions.
"""

num_segments = 5
annotation_options = [
    "[priority: high]",
    "[priority: medium]",
    "[priority: low]",
]
question_suffix_dict = {
    "gsm8k_1": None,
    "gsm8k_2": None,
    "gsm8k_3": None,
    "gsm8k_4": None,
    "gsm8k": None,
    "aqua": "\nOnly output {A, B, C, ..., None} as the final answer",
    "bbh_boolean_expressions": "\nOnly output one of {True, False, None} as the final answer",
    "bbh_causal_judgement": "\nOnly output one of {Yes, No, None} as the final answer",
    "bbh_movie_recommendation": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "bbh_ruin_names": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "bbh_temporal_sequences": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_college_medicine_test": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "multiarith": None,
    "mmlu_high_school_us_history_test": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_high_school_world_history_test": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_professional_law_test_0": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_professional_law_test_1": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_professional_law_test_2": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_professional_law_test_3": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_professional_law_test_4": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    "mmlu_professional_law_test": "\nOnly output one of {A, B, C, ..., None} as the final answer",
    # no_sys
}

meta_prompt_gemini_v2meta_prompt = meta_prompt_gemini_v2
ann_sys_prompt_gemini = {
    # "aqua": aqua_one_go_sys_prompt,
    "aqua": meta_prompt_gemini_v2,
    "gsm8k_1": meta_prompt_gemini_v2,
    "gsm8k_2": meta_prompt_gemini_v2,
    "gsm8k_3": meta_prompt_gemini_v2,
    "gsm8k_4": meta_prompt_gemini_v2,
    "gsm8k": meta_prompt_gemini_v2,
    "bbh_boolean_expressions": meta_prompt_gemini_v2,
    "bbh_causal_judgement": meta_prompt_gemini_v2,
    "mmlu_high_school_us_history_test": meta_prompt_gemini_v2,
    "mmlu_high_school_world_history_test": meta_prompt_gemini_v2,
    "mmlu_professional_law_test_0": meta_prompt_gemini_v2,
    "mmlu_professional_law_test_1": meta_prompt_gemini_v2,
    "mmlu_professional_law_test_2": meta_prompt_gemini_v2,
    "mmlu_professional_law_test_3": meta_prompt_gemini_v2,
    "mmlu_professional_law_test_4": meta_prompt_gemini_v2,
    "multiarith": meta_prompt_gemini_v2,
    "mmlu_college_medicine_test": meta_prompt_gemini_v2,
    "bbh_temporal_sequences": meta_prompt_gemini_v2,
}

ann_sys_prompt_gpt = {
    # "aqua": aqua_one_go_sys_prompt,
    "aqua": meta_prompt_gpt4o,
    "gsm8k_1": meta_prompt_gpt4o,
    "gsm8k_2": meta_prompt_gpt4o,
    "gsm8k_3": meta_prompt_gpt4o,
    "gsm8k_4": meta_prompt_gpt4o,
    "gsm8k": meta_prompt_gpt4o,
    "bbh_boolean_expressions": meta_prompt_gpt4o,
    "bbh_causal_judgement": meta_prompt_gpt4o,
    "mmlu_high_school_us_history_test": meta_prompt_gpt4o,
    "mmlu_high_school_world_history_test": meta_prompt_gpt4o,
    "mmlu_professional_law_test_0": meta_prompt_gpt4o,
    "mmlu_professional_law_test_1": meta_prompt_gpt4o,
    "mmlu_professional_law_test_2": meta_prompt_gpt4o,
    "mmlu_professional_law_test_3": meta_prompt_gpt4o,
    "mmlu_professional_law_test_4": meta_prompt_gpt4o,
    "multiarith": meta_prompt_gpt4o,
    "mmlu_college_medicine_test": meta_prompt_gpt4o,
    "bbh_temporal_sequences": meta_prompt_gpt4o,
}

ann_sys_prompt_sonnet = {
    # "aqua": aqua_one_go_sys_prompt,
    "aqua": meta_prompt_sonnet,
    "gsm8k_1": meta_prompt_sonnet,
    "gsm8k_2": meta_prompt_sonnet,
    "gsm8k_3": meta_prompt_sonnet,
    "gsm8k_4": meta_prompt_sonnet,
    "gsm8k": meta_prompt_sonnet,
    "bbh_boolean_expressions": meta_prompt_sonnet,
    "bbh_causal_judgement": meta_prompt_sonnet,
    "mmlu_high_school_us_history_test": meta_prompt_sonnet,
    "mmlu_high_school_world_history_test": meta_prompt_sonnet,
    "mmlu_professional_law_test_0": meta_prompt_sonnet,
    "mmlu_professional_law_test_1": meta_prompt_sonnet,
    "mmlu_professional_law_test_2": meta_prompt_sonnet,
    "mmlu_professional_law_test_3": meta_prompt_sonnet,
    "mmlu_professional_law_test_4": meta_prompt_sonnet,
    "multiarith": meta_prompt_sonnet,
    "mmlu_college_medicine_test": meta_prompt_sonnet,
    "bbh_temporal_sequences": meta_prompt_sonnet,
}


def one_go(
    llm,
    question,
    dataset_name,
):

    print(llm.version_name)

    if "gpt" in llm.version_name.lower():
        messages = [
            {"role": "system", "content": ann_sys_prompt_gpt[dataset_name]},
            {"role": "user", "content": f"{question}"},
        ]
    elif "sonnet" in llm.version_name.lower():
        messages = [
            {"role": "system", "content": ann_sys_prompt_sonnet[dataset_name]},
            {"role": "user", "content": f"{question}"},
        ]
    elif "gemini" in llm.version_name.lower():
        messages = [
            {"role": "system", "content": ann_sys_prompt_gemini[dataset_name]},
            {"role": "user", "content": f"{question}"},
        ]

    resp_format = create_model(
        "psao_sa_prompt",
        improved_prompt=str,
        clear_flag=bool,
    )

    response = llm.generate(
        messages=messages,
        response_format=resp_format,
    )
    resp_dict = json.loads(response)
    response = resp_dict.get("improved_prompt", "")
    clear_flag = resp_dict.get("clear_flag", False)

    question_suffix = question_suffix_dict[dataset_name]

    if question_suffix is not None:
        if clear_flag:
            prompt_best = [
                {
                    "role": "user",
                    "content": question + "\n" + question_suffix,
                },
            ]
        else:
            prompt_best = [
                {
                    "role": "user",
                    "content": response + "\n" + question_suffix,
                },
            ]
    else:
        if clear_flag:
            prompt_best = [
                {
                    "role": "user",
                    "content": question,
                },
            ]
        else:
            prompt_best = [
                {
                    "role": "user",
                    "content": response,
                },
            ]

    print("=" * 17)
    print("!!! PSAO prompt string !!!")
    print(json.dumps(prompt_best, indent=2))
    print("=" * 17)
    return prompt_best, clear_flag


def segment_text(
    llm,
    question,
):

    seg_schema = create_model(
        "PSAOSegmentation",
        seg_list=list[str],
    )

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": (
                # "Do not answer the following question."
                f"Your job is to segment the following question into {num_segments} segments based on the meaning."
                "It should be able to restore the original text by concatenating the segments."
                # "Return all the segmented text without any additional or removal of the original text.\n\n"
                "Question:"
                f"\n<{question}>"
            ),
        },
    ]

    response = llm.generate(
        messages=messages,
        response_format=seg_schema,
    )

    resp_seg_dict = json.loads(response)
    seg_list = resp_seg_dict.get("seg_list", "")
    return seg_list


def annotate_text(
    llm,
    segment,
    context,
    dataset_name,
):

    ann_schema = create_model(
        "PSAOAnnotation",
        annotation=str,
    )

    ann_sys_prompt = """Role Assignment: You are an expert legal reasoning assistant. Your role is to identify legal principles, analyze intent, and evaluate defenses to arrive at the correct answer for a legal problem based on the information provided. You will also segment the input question into parts and annotate them with reasoning to explain each segment's relevance.\n\nInstructions:\n\nAnnotation: Provide concise annotations for each segment, focusing on its importance and legal context. Specifically:\nHighlight intent, malice, and negligence in the given facts.\nEmphasize the relevance of statutory language such as "intentional killing" and "malice aforethought."\nAssess the relationship between the proposed defenses and legal principles."""

    messages = [
        # {"role": "system", "content": ann_sys_prompt[dataset_name]},
        {"role": "system", "content": ann_sys_prompt},
        {
            "role": "user",
            "content": (
                # f"Annotate the following text with one of the following options: {','.join(annotation_options)}."
                # "Generate a short meaningful annotation for the segment based on the context so that others can understand the
                #  reconstructed annotation + seg + annotation + seg ... better, and provide more accurate responses."
                # "If the segment is already clear enough, just return empty string as the annotation"
                # "\n\n"
                f"Segment:\n\n{segment}\n\n"
                f"Context:\n\n{context}\n\n"
            ),
        },
    ]

    response = llm.generate(
        messages=messages,
        response_format=ann_schema,
    )

    resp_ann_dict = json.loads(response)
    annotation = resp_ann_dict.get("annotation", "")

    return annotation


def psao_optimisation(
    llm,
    prompt,
    dataset_name,
):
    # print(prompt)
    # return prompt, True
    print("=" * 17)
    print("=" * 17)
    print(dataset_name)
    print("=" * 17)
    print("=" * 17)
    """
    Optimise the prompt using PSAO and LLM.
    """
    prompt_str = ""
    for _m in prompt:
        if "content" in _m:
            prompt_str += _m["content"] + "\n"

    question_suffix = question_suffix_dict[dataset_name]

    if question_suffix is not None:
        prompt_str = prompt_str.replace(question_suffix, "")
    print("=" * 17)
    print("!!! prompt string !!!")
    print(prompt_str)
    print("=" * 17)

    return one_go(
        llm,
        prompt_str,
        dataset_name,
    )

    # seg_lst = segment_text(
    #     llm,
    #     prompt_str,
    # )
    # print("=" * 17)
    # print(seg_lst)
    # print("=" * 17)
    # ann_lst = []
    # for _seg in seg_lst:
    #     ann = annotate_text(
    #         llm,
    #         _seg,
    #         prompt_str,
    #         dataset_name,
    #     )
    #     ann_lst.append(ann)
    # print("=" * 17)
    # print(ann_lst)
    # print("=" * 17)
    # seg_ann_prompt_str = " ".join(
    #     [
    #         f"Annotation:\n{_ann}\nText:\n{_seg}\n"
    #         for _seg, _ann in zip(seg_lst, ann_lst)
    #     ]
    # )
    # if question_suffix is not None:
    #     seg_ann_prompt_str += "\n" + question_suffix.replace(", None}", "}")

    # prompt_best = [
    #     # {
    #     #     "role": "system",
    #     #     "content": "Follow the priority levels indicated in brackets carefully when solving the problem below:\n",
    #     # },
    #     # {
    #     #     "role": "system",
    #     #     "content": "Follow the priority levels indicated in brackets carefully when solving the problem below:\n",
    #     # },
    #     {
    #         "role": "user",
    #         "content": seg_ann_prompt_str,
    #     },
    # ]

    # print("=" * 17)
    # print("!!! PSAO prompt string !!!")
    # print(json.dumps(prompt_best, indent=2))
    # print("=" * 17)
    # return prompt_best
