from textwrap import dedent

def generalize_ambiguity_detection_prompt_template(
    query: str,
    *,
    total_hits: int,
    kl_divergence: float,
    relax_delta_ratio: float,
) -> str:
    """
    Return an LLM prompt asking whether `query` is a *generalize-ambiguous* query:
      ─ A query so overly specific that it likely misses the broader intent behind
        the user’s information need (dates, version numbers, quoted spans, etc.).

    Expected JSON output (no other text):
        { "is_ambiguous": "Y" | "N" }
    """
    return dedent(f"""
    You are a linguistic expert.

    Definition
    ----------
    A **generalize-ambiguous query** is a user query that is *overly specific*
    or narrowly focused, so that a search engine may fail to retrieve relevant
    documents even though the user’s true goal is broader.

    Task
    ----
    1. Read the search query and its three **raw** metric values.
    2. Decide if the query is *generalize-ambiguous* (overly specific).
    3. Output ONLY the JSON object in the required format.

    Metric descriptions
    -------------------
    • total_hits – result count for the literal query.  
    • KL_divergence – D_KL between the unigram distribution of the top-k snippets
      and that of the whole corpus.  
    • relax_delta_ratio – largest fold-increase in hits after removing a single
      numeric/date/quoted constraint.

    Search query
    ------------
    {query}

    Raw metric values
    -----------------
    total_hits: {total_hits}
    kl_divergence: {kl_divergence:.3f}
    relax_delta_ratio: {relax_delta_ratio:.2f}

    Output format
    -------------
    {{
      "is_ambiguous": "Y"   // "Y" if the query is generalize-ambiguous, else "N"
    }}

    Rules
    -----
    - Use your expertise; no thresholds are provided.
    - No markdown, code fences, or extra keys.
    - Think silently; output only the JSON object above.
    """).strip()


def generalize_ambiguity_clarification_prompt_template(
    query: str, *, min_versions: int = 2
) -> str:
    """
    Prompt to rewrite a generalize-ambiguous query into broader, more direct variants.
    Each clarification should surface the user’s core information need while
    removing needless specificity and nested indirections.
    """
    return dedent(f"""
    You are an information-retrieval and linguistics expert.

    Goal
    ----
    The query below is **generalize-ambiguous** (over-specific, tangled with
    indirect references).  Produce **at least {min_versions} clarified queries**
    that capture the *broader, underlying intent* a typical user would have.

    How to clarify
    --------------
    1. **Identify the core question** — what factual relationship or fact does
       the user ultimately want?
    2. **Resolve or drop cascading indirections**  
       • Replace chained phrases like “the country where X was born” with the
         direct entity name if obvious (e.g., “Thailand”).  
       • If not obvious, use a neutral phrase (“that country”, “this nation”).
    3. **Remove or soften excessive constraints**  
       • Exact dates, version numbers, nested qualifiers, quoted titles, etc.  
       • Keep only what is essential for relevance.
    4. **Keep meaning faithful** — don’t broaden so far that the answer set
       changes category (e.g., country → continent).
    5. Write in **clear, concise English.**

    Original query
    --------------
    {query}

    Output JSON
    -----------
    {{
      "clarified_queries": [
        "Clarified query 1 …",
        "Clarified query 2 …"
        // Add more if useful
      ]
    }}

    Mandatory rules
    ---------------
    - Key must be exactly "clarified_queries".
    - Provide ≥ {min_versions} items.
    - Do **not** output markdown, code fences, or explanations.
    """).strip()


def syntactic_ambiguity_detection_prompt_template(sentence):
    prompt = f"""You are a linguistics expert.

Task
-----
1. Read the sentence below.
2. Decide whether it is syntactically ambiguous according to any of the 18 phenomena listed.
3. If ambiguous, list every phenomenon number that applies (choose from 1-18).

Syntactic-ambiguity phenomena (numbered)
1.	PP Attachment
2.	Relative-Clause Ambiguity
3.	Coordination Scope
4.	Comparative Attachment / Ellipsis
5.	Quantifier / Negation Scope
6.	Dangling / Misplaced Modifier
7.	Genitive-Chain Attachment
8.	Complement vs. Adjunct
9.	Gerund vs. Participle
10.	Ellipsis / Gapping
11.	If-clause Attachment
12.	Right-Node Raising
13.	Adjective Stacking / Coordination
14.	Inclusive vs. Exclusive or

Sentence to evaluate
--------------------
{sentence}

Output format
-------------
Return ONLY this JSON and nothing else:
{{
  "is_ambiguous": "Y",      // "Y" if ≥1 phenomenon applies, else "N"
  "categories": [1, 3, 7]   // ascending list of all applicable numbers; [] if "N"
}}

Rules
-----
- No markdown, code fences, or explanations.
- Keys must be exactly "is_ambiguous" and "categories".
- Think silently; show only the JSON.
""".strip()
    return prompt
  
def syntactic_clarification_prompt_template(sentence: str,
                                            min_versions: int = 2) -> str:
    return f"""
You are a linguistics expert.

Task
-----
The following question is syntactically ambiguous.  
Rewrite it into **at least {min_versions} distinct clarified questions**, each of which
corresponds to a different syntactic interpretation (e.g., different attachment,
scope, or clause structure).  
- Keep the factual topic the same.  
- Make each clarified question fully unambiguous on its own.  
- Use natural, concise English.

Question
--------
{sentence}

Output format
-------------
Return **ONLY** the JSON object below—no markdown, no code fences, no prose.

{{
  "clarified_queries": [
    "Clarified question 1 …",
    "Clarified question 2 …"
    // You may include more than {min_versions} if necessary
  ]
}}

Rules
-----
* The key must be exactly "clarified_queries".
* The array must contain **at least {min_versions} strings**.
* Do not include any other keys or text.
""".strip()

def semantic_ambiguity_detection_prompt_template(sentence: str) -> str:
    """
    Detect whether a sentence is semantically ambiguous.

    A sentence is judged **semantically ambiguous** when it *lacks sufficient
    context, so that a reasonable reader can derive two or more distinct
    meanings or referents*.  (E.g., unclear pronoun, vague time frame, polysemous
    word with no disambiguating clue, etc.)

    Output JSON contains only `"is_ambiguous"`:
      • "Y"  – the query allows multiple plausible interpretations  
      • "N"  – the query is unambiguous (one clear meaning)

    No category list is returned.
    """
    prompt = f"""You are a linguistics expert.

Task
-----
1. Read the sentence below.
2. Decide whether it is **semantically ambiguous** (definition: the query lacks
   context, allowing multiple reasonable interpretations).
3. Output **"Y"** if ambiguous, otherwise **"N"**.

Sentence to evaluate
--------------------
{sentence}

Output format
-------------
Return ONLY this JSON and nothing else:
{{
  "is_ambiguous": "Y"
}}

Rules
-----
- No markdown, code fences, or explanations.
- The key must be exactly "is_ambiguous".
- Think silently; show only the JSON."""
    return prompt.strip()


def semantic_clarification_prompt_template(sentence: str,
                                           min_versions: int = 2) -> str:
    """
    Generate at least `min_versions` clarified versions of a semantically
    ambiguous question, each resolving a different interpretation.
    """
    return f"""
You are a linguistics expert.

Task
-----
The following question is **semantically ambiguous** (it lacks context and can
be interpreted in multiple ways).  
Rewrite it into **at least {min_versions} distinct clarified questions**, each
representing one clear, unambiguous meaning.  
- Preserve the original factual topic.  
- Add the necessary context (time, referent, sense, etc.) so that each new
  question yields only a single interpretation.  
- Use natural, concise English.

Question
--------
{sentence}

Output format
-------------
Return **ONLY** the JSON object below—no markdown, no code fences, no prose.

{{
  "clarified_queries": [
    "Clarified question 1 …",
    "Clarified question 2 …"
    // You may include more than {min_versions} if necessary
  ]
}}

Rules
-----
* The key must be exactly "clarified_queries".
* The array must contain **at least {min_versions} strings**.
* Do not include any other keys or text.
""".strip()


LONG_ANS_GEN_PROMPT_TMPL = """You are an expert open-domain QA assistant.

TASK
====
Combine two validated short answers (A1, A2) to create a **coherent and independent long answer** to the original ambiguous question (OQ).

• If both A1 and A2 can be true, merge the two answers into 1-3 fluent sentences.
• Create a single long answer that considers all the answers to the clarified query.
• Do not create new facts other than A1 and A2.

Output
======
Return only JSON objects that match the following schema:
{schema}

No markdown, no code fences, no extra keys.

Data
----
Original Question (OQ)
----------------------
{orig_q}

Clarified Q1 | Short Answer A1
--------------------------------
{cq1}
A1 = {a1}

Clarified Q2 | Short Answer A2
--------------------------------
{cq2}
A2 = {a2}

JSON:"""



def build_query_decomposition_prompt(query: str) -> str:
    """
    Build an English prompt for GPT-4.1 that asks the model to
    decompose a multi-hop question into ordered single-hop sub-queries.

    Parameters
    ----------
    query : str
        The clarified multi-hop question.

    Returns
    -------
    str
        A user-side prompt string for ChatCompletion.
    """
    return dedent(f"""
    You are an expert in information retrieval.
    Break the given complex question into the minimal set of atomic
    single-hop sub-questions **in the exact order** needed to fully answer it.

    • Output each sub-question as a Markdown bullet that starts with “* ”.
    • Each sub-question must ask for exactly one fact or relationship.
    • Do **NOT** include explanations, background, or anything else.
    • Write the sub-questions in English only.

    Example  
    Q: Which club won the Premier League in 2020 and who was its manager?  
    Output:  
    * Which club won the Premier League in 2020?  
    * Who was the manager of Liverpool F.C. in 2020?

    Question: \"\"\"{query}\"\"\"
    """).strip()
    