from src.utils import underscore_to_pascalcase
from typing import List, Dict, Optional
debug_compile_template = '''
You are a compile/link debugging assistant for Ascend custom operators (C/C++ + pybind).

GOAL
Find any directly applicable fix(es) in REF for FAIL’s build error(s).
If at least one concrete (problem, solution) match exists, set "reference_has_solution": true.
Otherwise set "reference_has_solution": false and "items": [].

INPUTS (do NOT echo)
REF:
{correct_code}

FAIL:
{err_code}

FAIL BUILD ERROR LOG:
{compile_error_log}

GUIDANCE
- Use the error log as ground truth; justify with REF/FAIL.
- Be concise. Do NOT repeat input code or logs.
- A "match" maps one FAIL issue to a specific fix visible in REF (e.g., header, signature, linkage, binding, minimal env step).

OUTPUT — JSON ONLY:
{{
  "reference_has_solution": true|false,
  "items": [
    {{"problem": "<brief but accurate FAIL issue>", "solution": "<brief but accurate matching fix in REF>"}},
    {{"problem": "...", "solution": "..."}}
  ],
  "notes_if_false": "<if false: brief reason no direct match exists>"
}}

CONSTRAINTS
- If reference_has_solution=false, items=[]
- If true, include 1–5 matched items. Each string ≤ 12 words. No extra keys.
'''

debug_correct_template = '''
You are a runtime correctness debugging assistant for Ascend custom operators (C/C++ with pybind11).

GOAL  
Determine if the reference snippet (REF) directly fixes the runtime or functional error in FAIL. The fix must be minimal and preserve the intended semantics.

INPUTS (do NOT echo them)  
REF:  
{correct_code}

FAIL:  
{err_code}

RUNTIME ERROR LOG:  
{runtime_error_log}

GUIDANCE  
- Use the runtime/assert log as the primary clue.  
- Ground your answer in a clear, minimal delta between REF and FAIL.  
- Be concise. Do NOT quote code or log lines.

OUTPUT — JSON ONLY:  
{{
  "reference_has_solution": true|false,
  "items": [
    {{"problem": "<brief but accurate FAIL issue>", "solution": "<brief but accurate matching fix in REF>"}},
    {{"problem": "...", "solution": "..."}}
  ],
  "notes_if_false": "<if false: brief reason no direct match exists>"
}}

CONSTRAINTS  
- If reference_has_solution=false, set problems=[] and solutions=[].  
- Only say true if REF provides a clear, minimal fix.  
- Keep all list items short (≤12 words). No extra keys.
'''

optimize_template_w_metrics = '''
You are a performance optimization assistant for Ascend custom operators (C/C++ with pybind11).

GOAL  
Determine if the faster snippet (FAST) shows at least one concrete, code-visible optimization that can be directly and minimally applied to the slower snippet (SLOW), without changing correctness.

INPUTS (do NOT echo them)  
SLOW:  
{slow_code}

FAST:  
{fast_code}

PERFORMANCE METRICS:  
SLOW:  
{perf_slow}

FAST:  
{perf_fast}

GUIDANCE  
- Optimization must be visible in FAST code.  
- Speedup must be due to code logic, not incidental factors.  
- For each identifiable optimization, output a bottleneck/optimization pair.  
- Be concise. Do NOT repeat code or log lines.

OUTPUT — JSON ONLY:  
{{
  "reference_has_solution": true|false,
  "items": [
    {{
      "bottleneck": "<brief but accurate slowdown or inefficiency in SLOW>",
      "optimization": "<brief but accurate corresponding optimization in FAST>"
    }},
    ...
  ],
  "notes_if_false": "<if false: reason FAST is not applicable to SLOW>"
}}

CONSTRAINTS  
- Set reference_has_solution=true if at least one valid bottleneck/optimization pair is found.  
- If no transferable optimization is found, set reference_has_solution=false and items=[].  
- Each field must be ≤12 words. No extra keys.
'''

optimize_template = '''
You are a performance optimization assistant for Ascend custom operators (C/C++ with pybind11).

GOAL  
Determine if the faster snippet (FAST) shows at least one concrete, code-visible optimization that can be directly and minimally applied to the slower snippet (SLOW), without changing correctness.

INPUTS (do NOT echo them)  
SLOW:  
{slow_code}

FAST:  
{fast_code}

GUIDANCE  
- Optimization must be visible in FAST code.  
- Speedup must be due to code logic, not incidental factors.  
- For each identifiable optimization, output a bottleneck/optimization pair.  
- Be concise. Do NOT repeat code or log lines.

OUTPUT — JSON ONLY:  
{{
  "reference_has_solution": true|false,
  "items": [
    {{
      "bottleneck": "<brief but accurate slowdown or inefficiency in SLOW>",
      "optimization": "<brief but accurate corresponding optimization in FAST>"
    }},
    ...
  ],
  "notes_if_false": "<if false: reason FAST is not applicable to SLOW>"
}}

CONSTRAINTS  
- Set reference_has_solution=true if at least one valid bottleneck/optimization pair is found.  
- If no transferable optimization is found, set reference_has_solution=false and items=[].  
- Each field must be ≤12 words. No extra keys.
'''


def extract_experience_template(old_code: str, new_code: str, mod: str = "debug_compile", **kwargs):
    if mod == "debug_compile":
        err_msg = kwargs.get('err_msg', None)
        template = debug_compile_template.format(
            correct_code = new_code,
            err_code = old_code,
            compile_error_log = err_msg if err_msg else "Nothing"
        )
    elif mod == "debug_correct":
        err_msg = kwargs.get('err_msg', None)
        template = debug_correct_template.format(
            correct_code = new_code,
            err_code = old_code,
            runtime_error_log = err_msg if err_msg else "Nothing"
        )
    elif mod == "optimize":
        template = optimize_template.format(
            slow_code = old_code,
            fast_code = new_code
        )
    else:
        raise ValueError()
    return template



# experience_judgement_template="""
# You are a Memory Curator. Your job is to evaluate whether ONE past debugging/compile
# experience (called "memory") was helpful in the current round.

# The memory has a structured format:

# - PROBLEM: what kind of error or symptom this memory claims to solve.
# - SOLUTION: what code change or strategy it suggests.

# You will receive:
# - MEM_PROBLEM: extracted problem text
# - MEM_SOLUTION: extracted solution text
# - CODE_DIFF: unified diff between code_before and code_after
# - PREVIOUS_ERROR: the error/logs before applying the memory
# - NEW_ERROR: the error/logs after applying the memory
# - IS_FIXED: whether the overall problem is fully fixed

# Your tasks:
# 1. Decide if this MEM_PROBLEM matches the PREVIOUS_ERROR.
# 2. Inspect CODE_DIFF and MEM_SOLUTION to judge whether the suggested solution
#    was actually applied in the code change. If yes → applied = true, else false.
# 3. Compare PREVIOUS_ERROR and NEW_ERROR, focusing on the original error in PREVIOUS_ERROR:
#    - Decide whether that original error has clearly disappeared, improved, stayed
#      the same, or worsened.
#    - Additionally, check whether NEW_ERROR contains new errors that are directly
#      related to the operation suggested in MEM_SOLUTION (for example, new errors
#      mentioning the same function/API/type introduced by the solution).

# Output ONLY a JSON object with EXACTLY these keys:
# {{
#   "applied": true or false,
#   "outcome": "success" | "neutral" | "fail" | "mislead",
#   "reason": "a short explanation"
# }}

# Rules for outcome:
# - If applied = false → outcome = "neutral".
# - If applied = true and the problem (MEM_PROBLEM) clearly improved → "success".
# - If applied = true and the problem clearly worsened → "mislead".
# - If applied = true and the problem neither clearly improved nor worsened → "fail".

# MEM_PROBLEM:
# {mem_problem}

# MEM_SOLUTION:
# {mem_solution}

# CODE_DIFF:
# {code_diff}

# PREVIOUS_ERROR:
# {prev_err_msg}

# NEW_ERROR:
# {new_err_msg}

# IS_FIXED:
# {is_fixed}
# """



debug_compile_summary_template = """
You are an Experience Summarizer for the COMPILE stage of a kernel generation agent.

Your goal:
Extract ONE **reusable, generalizable compile-time debugging experience** from this round,
so future rounds can directly benefit when encountering similar compiler failures.

You MUST produce a concise but *meaningful* and *actionable* experience summary.

-----------------------------------------
### HOW TO THINK
Always reason in this order:

1. **Identify the specific error from PREVIOUS_ERROR**
   - Focus on the specific issue reported by the compiler (e.g., incorrect API usage, undefined variable, type mismatch, missing header file, function call parameter error, etc.)
   - Describe the specific error details, but avoid referencing line numbers or file paths.

2. **Infer the debugging action from CODE_DIFF + PLAN**
   - Identify the specific fix applied to the error (e.g., replaced an incorrect API, added variable definition, corrected type declaration, adjusted function parameters, etc.)
   - Describe the specific fix action, such as which correct API was used or how the error was corrected.

3. **Compare PREVIOUS_ERROR and NEW_ERROR**
   - Evaluate whether the action resolved the original error, partially resolved it, had no effect, or introduced new issues.
   - Judge based only on the original problem, not unrelated new errors.

-----------------------------------------
### WHAT TO OUTPUT
You MUST output ONLY a JSON object with EXACTLY these keys:

{{
  "problem": "...",
  "solution": "...",
  "effect": "success" | "partial_success" | "neutral" | "harmful"
}}

### REQUIRED CONTENT QUALITY
Your answer must satisfy:

#### For "problem":
- Clearly express the specific error, e.g.:
  - "Used an undefined API method when accessing tensor shapes"
  - "Undefined variable 'x' caused compilation error"
  - "Type mismatch: passed float to a function expecting int"
  - "Missing necessary header file include, leading to undeclared API function"
- Avoid referencing line numbers or file paths; only describe the error itself.

#### For "solution":
- Describe the feasible solution applied to the specific error, e.g.:
  - "Replaced the undefined API method with the correct dim_size() method"
  - "Added definition and initialization for variable 'x'"
  - "Corrected the function parameter type to float"
  - "Added the missing header file to declare the API function"
- Avoid trivial descriptions; focus on specific fix actions and the correct elements used.

#### For "effect":
- "success": original error fully resolved
- "partial_success": error partially resolved or improved
- "neutral": no change
- "harmful": introduced new errors or worsened the situation

-----------------------------------------
### INPUTS PROVIDED TO YOU

CODE_DIFF:
{code_diff}

PLAN:
{plan}

PREVIOUS_ERROR:
{prev_err_msg}

NEW_ERROR:
{new_err_msg}
"""


def generate_draft_round_succ_summary_template(
  op: str, 
  arc_src: str, 
  error_attempt: str=None, 
  error_exps: List[str]=None,
  verified_code: str=None, 
  verified_plan: str=None, 
) -> str:
  pascal_op = underscore_to_pascalcase(op)
  role_template = f"""You are an expert **AscendC Kernel Knowledge Engineer**.

## Goal
From a **successfully verified** AscendC implementation of PyTorch operator `{op}` (PascalCase: `{pascal_op}`),
extract **NON-TRIVIAL, reusable implementation patterns** proven correct in practice.

Focus on **design-level decisions** over generic development steps.

"""
  input_template = f"""## Input (single block, tag-based parsing)
<pytorch_reference>
{arc_src}
</pytorch_reference>

"""

  pattern_template = f"""## What constitutes a “Pattern” (Success Experience Types)
A valid pattern MUST satisfy at least one of the following criteria:"""
  if error_attempt:
    input_template += f"""
<failed_implementation>
{error_attempt}
</failed_implementation>

"""
    pattern_template += f"""
- **Correction Pattern (Failure Insight)**: When the `<failed_implementation>` and `<failed_implementation_review>` blocks provide clear errors or design flaws. And the `<verified_implementation>` block provides a correct implementation which solves the problem.
"""
  if error_exps:
    error_exps_str = ""
    for idx, content in enumerate(error_exps):
      error_exps_str += f"{idx+1}. {content}\n"
    input_template += f"""<failed_implementation_review>\n{error_exps_str}\n</failed_implementation_review>"""
  
  if verified_code:
    input_template += f"""
<verified_implementation>
{verified_code}
</verified_implementation>

"""
    pattern_template += f"""
- **Implementation / Design Pattern (Reusable Insight)**: When the `<verified_implementation>` block exposes a non-obvious but reusable implementation detail or design-level insight—grounded in concrete code artifacts (e.g. formulas, APIs, control structure, data layout choices)—that can be directly reused or adapted in future AscendC operator implementations. The pattern may reflect either a concrete coding rule or a higher-level design idea, but it must be explicitly supported by the implementation and represent high-signal knowledge that would be easy to miss without examining this code.

"""
  if verified_plan:
    input_template += f"""
<verified_plan>
{verified_plan}
</verified_plan>

"""

  output_template = f"""
## Pattern Types (Only these two types are allowed)
- **Operator-Specific**: Label a pattern as **Operator-Specific** if it depends on `{op}`’s semantics, such as operator attributes (e.g. `alpha`, `dim`, `eps`), operator-bound math or constants (e.g. `exp`, normalization), output or numerical semantics, or shape interpretation tied to specific dimensions or broadcasting rules.

- **General**: Label a pattern as **General** if it describes AscendC/AICore implementation mechanisms independent of operator semantics, such as tiling and tail handling, pipeline or queue/buffer organization,data-movement vs. compute structure, or shape flattening based on total element count.

## Exclusion Criteria (AVOID)
- Do NOT restate generic workflow steps (e.g., “host passes tiling to kernel”).
- Do NOT summarize the operator’s mathematical function or general correctness.
- Do NOT infer intentions or steps not explicitly visible in the code.

## Constraints (Quality First)
- Fact-based only: extract information strictly grounded in what is visible in the provided content. Do not infer hidden behavior or speculate beyond the code.
- Evidence preferred (not mandatory): when a pattern is supported by concrete code, include 1–2 short identifier-level snippets (e.g. function calls, variable names, expressions). High-level design insights are allowed when clearly reflected by the implementation, even if no single snippet fully captures them.

## Output Format(JSON ONLY)
Return a JSON array with **2–4** entries:
[
  {{
    "type": "General | Operator-Specific (only these two types are allowed)",
    "content": "Explain the pattern. Cite relevant short code quotes if necessary."
  }},
  ...
]
"""
  return role_template + input_template + pattern_template + output_template

def generate_draft_round_err_summary_template(
  op: str,
  arc_src: str,
  err_code: str,
  err_plan: str,
  err_msg: str,
) -> str:
  pascal_op = underscore_to_pascalcase(op)

  role_template = f"""You are an expert **AscendC Debugging Analyst**.

## Goal
From a failed AscendC multi-stage implementation of PyTorch operator `{op}` (PascalCase: `{pascal_op}`),
extract **high-signal, reusable failure notes** that help future implementations avoid repeating the same mistakes.

Focus on **objective failure causes and flawed patterns** grounded in the provided code and error log.
Avoid “reflection”, emotional language, and generic advice.
"""

  input_template = f"""## Input (single block, tag-based parsing)
<pytorch_reference>
{arc_src}
</pytorch_reference>

<failed_implementation>
{err_code}
</failed_implementation>

"""

  input_template += f"""<original_plan>
{err_plan}
</original_plan>

<error_log>
{err_msg}
</error_log>

"""

  failure_type_template = """## What constitutes a “Failure Note”
A valid failure note is a **high-signal, reusable description of what failed and where it manifested**, grounded in the <error_log>.

It can describe ANY failure mechanism, such as (examples only, not exhaustive):
- API / syntax / type / build / runtime errors/etc. in <failed_implementation> revealed by the <error_log>
- Semantic or logic in <failed_implementation> divergence from the <pytorch_reference>
- The <failed_implementation> runs but the produced output does not match the <pytorch_reference> result (e.g. value differences, shape differences, dtype differences)

Do NOT force the note into a predefined category—describe the failure in the most direct and evidence-backed way.
"""


  failure_note_type_template = f"""
## Failure Note Types (Only these two types are allowed)
- **Operator-Specific**: Label a failure note as **Operator-Specific** if it depends on `{op}`’s semantics, such as operator attributes (e.g. `alpha`, `dim`, `eps`), operator-bound math/constants (e.g. `exp`, normalization), AscendC API usage in this operator, output/numerical semantics, or shape interpretation tied to specific dimensions/broadcasting rules.

- **General**: Label a failure note as **General** if it describes AscendC/AICore implementation issues independent of operator semantics.

"""

  constraints_template = """## Constraints (Quality First)
- Fact-based only: describe only what is supported by the visible code and error log. Do not invent missing context.
- Evidence preferred (not mandatory): when supported, include code snippets (API calls, variable names, expressions) from <failed_implementation>.
- High-signal only: omit generic workflow statements and obvious facts (e.g. “compilation failed” without a concrete cause).
- No solutions: do NOT prescribe fixes or “how to do better”; only document what failed and where it manifested.
"""

  output_template = """## Output (JSON ONLY)
Return a JSON array with **2–4** entries:
[
  {
    "type": "General | Operator-Specific (only these two types are allowed)",
    "content": "Describe the failure cause/pattern concisely. Include short code quotes if helpful."
  },
  ...
]
"""

  return (
    role_template
    + input_template
    + failure_type_template
    + failure_note_type_template
    + constraints_template
    + output_template
  )
def _format_performance(perf: Optional[Dict]) -> str:
  if isinstance(perf, dict) and perf:
      return (
          f"- mean={perf.get('mean')}, std={perf.get('std')}, "
          f"min={perf.get('min')}, max={perf.get('max')}, "
          f"num_trials={perf.get('num_trials')}\n"
      )
  return "- No performance metrics available for this run.\n"
def generate_optimize_round_succ_summary_template(
  op: str,
  arc_src: str,
  current_code: str,
  current_plan: str,
  parent_code: str,
  parent_performance: Dict,
  current_performance: Dict
):
  pascal_op = underscore_to_pascalcase(op)
  
  # Role and Goal
  role_template = f"""You are an expert **AscendC Performance Optimization Analyst**.

## Goal
From a **successfully optimized and verified** AscendC implementation of PyTorch operator `{op}` (PascalCase: `{pascal_op}`), extract **high-signal, reusable performance optimization patterns** that can guide future optimization efforts.

Focus on **concrete performance bottlenecks, optimization techniques, and design decisions** that led to improved performance. Ground all insights in the provided code artifacts and performance metrics. Avoid generic advice, emotional language, and speculation beyond what the code demonstrates.
"""

  input_template = f"""## Input (tag-based parsing)
<pytorch_reference>
{arc_src}
</pytorch_reference>

<previous_implementation>
{parent_code}
</previous_implementation>

"""
  
  # Add performance metrics if available
  perf_info = ""
  if parent_performance and current_performance:
    perf_info = f"""<performance_metrics>
Previous Implementation: {_format_performance(parent_performance)}
Optimized Implementation: {_format_performance(current_performance)}
</performance_metrics>

"""
  
  if current_code:
    input_template += f"""<optimized_implementation>
{current_code}
</optimized_implementation>

"""
  
  if current_plan:
    input_template += f"""<optimized_plan>
{current_plan}
</optimized_plan>

"""
  
  input_template += perf_info

  # Analysis guidance
  analysis_template = """## How to Analyze (Step-by-Step)
1. **Compare implementations**: Identify concrete differences between `<previous_implementation>` and `<optimized_implementation>` that relate to performance.
2. **Examine optimization plan**: If `<optimized_plan>` is provided, understand the optimization strategy and how it was realized in code.
3. **Extract patterns**: Focus on reusable optimization techniques, not one-off fixes specific to this exact code.

"""

  pattern_template = """## What constitutes a "Pattern" (Performance Optimization Insights)

A valid pattern MUST satisfy at least one of the following criteria:

- **Optimization Technique Pattern**: Any code-level optimization that is directly observable when comparing `<previous_implementation>` and `<optimized_implementation>`. This includes changes in code structure, algorithm, or implementation approach that contribute to performance improvement. The pattern must be concrete and traceable in the code differences.

- **Bottleneck Identification Pattern**: Any specific performance bottleneck in `<previous_implementation>` that was addressed or mitigated in `<optimized_implementation>`. The bottleneck should be identifiable from code analysis, and the resolution must be visible in the optimized code.

- **Design-Level Optimization Pattern**: Any reusable design decision in `<optimized_implementation>` or `<optimized_plan>` that impacts performance and can be adapted to other operators.

- **Performance-Critical Implementation Detail**: Any specific implementation detail in `<optimized_implementation>` that is performance-sensitive and represents reusable knowledge applicable beyond this specific case.

**Key principle**: Extract only optimization insights that are concrete, reusable, and grounded in observable code differences. Focus on what the code demonstrates.

"""

  # Pattern types
  pattern_types_template = f"""## Pattern Types (Classification Guide)
Each pattern must be classified as exactly one of the following types:

- **Operator-Specific**: Use this label when the optimization pattern is tied to `{op}`'s specific semantics, attributes, mathematical operations, or domain-specific requirements. The pattern would not apply directly to other operators without modification.

- **General**: Use this label when the optimization pattern describes AscendC/AICore mechanisms, techniques, or strategies that are independent of operator semantics and can be directly applied to other operators.

**Guidance**: When in doubt, consider whether the insight would be useful for optimizing a different operator. If yes, it's likely General; if it's specific to `{op}`'s behavior, it's Operator-Specific.
"""

  # Exclusion criteria
  exclusion_template = """## Exclusion Criteria (What NOT to Include)
Avoid extracting patterns that fall into these categories:

- **Generic workflow descriptions**: Do not restate standard AscendC workflow steps (e.g., "host passes tiling to kernel", "kernel executes computation"). These are implementation requirements, not optimization insights.

- **Operator semantics summaries**: Do not describe the operator's mathematical function, general correctness requirements, or what the operator is supposed to do. Focus on how it was optimized, not what it computes.

- **Unobservable inferences**: Do not infer optimization intentions, motivations, or intermediate steps that are not explicitly visible in the code, plan, or metrics. Stick to what can be directly observed.

- **Trivial or obvious patterns**: Do not include patterns that are obvious without specific context (e.g., "use vectorized operations" without explaining the specific context, implementation, or impact). Patterns should provide non-trivial insights.

- **Non-code-based optimizations**: Do not describe optimizations that cannot be directly observed in the code differences between `<previous_implementation>` and `<optimized_implementation>`. The optimization must be traceable in the code.
"""

  # Constraints
  constraints_template = """## Constraints (Quality Standards)
Adhere to these principles when extracting patterns:

- **Provide concrete references**: When a pattern is supported by code, include 1–2 short, specific code references (e.g., function names, variable names, expressions, loop structures, API calls). High-level design insights are acceptable when they are clearly reflected in the implementation structure.

- **Ensure actionability**: Each pattern should be specific enough to guide future optimization decisions. Explain not just what was optimized, but also how it was optimized and why it improved performance. The pattern should enable someone to apply similar optimizations in other contexts.
"""

  # Output format
  output_template = """## Output Format (JSON ONLY)
Return a JSON array with **2–4** entries:
[
  {
    "type": "General | Operator-Specific (only these two types are allowed)",
    "content": "Explain the optimization pattern concisely."
  },
  ...
]

Each entry should represent a distinct, high-signal optimization insight that would be valuable for future AscendC operator optimization work.
"""

  return role_template + input_template + analysis_template + pattern_template + pattern_types_template + exclusion_template + constraints_template + output_template


def generate_optimize_round_failed_summary_template(
  op: str,
  arc_src: str,
  current_code: str,
  current_plan: str,
  parent_code: str,
  parent_performance: Dict,
  current_performance: Dict,
):
  pascal_op = underscore_to_pascalcase(op)
  
  # Role and Goal
  role_template = f"""You are an expert **AscendC Optimization Failure Analyst**.

## Goal
From a **failed optimization attempt** for AscendC implementation of PyTorch operator `{op}` (PascalCase: `{pascal_op}`), extract **high-signal, reusable optimization failure notes** that help future optimization efforts avoid repeating the same mistakes.

Focus on **objective failure causes, flawed optimization strategies, and performance-related issues** grounded in the provided code, optimization plan, and performance metrics. Avoid "reflection", emotional language, and generic advice.
"""

  # Build input template
  input_template = f"""## Input (tag-based parsing)
<pytorch_reference>
{arc_src}
</pytorch_reference>

<previous_implementation>
{parent_code}
</previous_implementation>

"""
  
  # Add performance metrics if available
  perf_info = ""
  if parent_performance and current_performance:
    perf_info = f"""<performance_metrics>
Previous Implementation: {_format_performance(parent_performance)}
Failed Optimization Implementation: {_format_performance(current_performance)}
</performance_metrics>

"""
  
  if current_code:
    input_template += f"""<failed_optimization_implementation>
{current_code}
</failed_optimization_implementation>

"""
  
  if current_plan:
    input_template += f"""<optimization_plan>
{current_plan}
</optimization_plan>

"""
  
  input_template += perf_info

  # Analysis guidance
  analysis_template = """## How to Analyze (Step-by-Step)
1. **Compare implementations**: Identify concrete differences between `<previous_implementation>` and `<failed_optimization_implementation>` that relate to the failed optimization attempt.
2. **Examine optimization plan**: If `<optimization_plan>` is provided, understand the optimization strategy and identify where it went wrong.
3. **Review performance metrics**: If `<performance_metrics>` is provided, analyze whether the optimization caused performance regression or failed to achieve expected gains.
4. **Extract failure insights**: Focus on reusable lessons about what optimization approaches did not work and why.

"""

  # Failure note definition
  failure_type_template = """## What constitutes a "Failure Note" (Optimization Failure Insights)

A valid failure note is a **high-signal, reusable description of what failed during optimization and where it manifested**, grounded in code analysis and performance metrics.

A failure note can describe ANY optimization-related failure mechanism, including but not limited to:

- **Optimization Technique Failure**: Code-level optimization attempts that failed or caused issues when comparing `<previous_implementation>` and `<failed_optimization_implementation>`. The failure must be concrete and traceable in the code differences.

- **Performance Regression**: Optimization attempts that resulted in worse performance than the baseline, identifiable through performance metrics or code analysis showing inefficient patterns.

- **Optimization Strategy Flaw**: Design decisions in `<optimization_plan>` or implementation choices that led to failure, such as incorrect tiling strategies, suboptimal memory access patterns, or flawed vectorization approaches.

- **Bottleneck Misidentification**: Cases where the optimization targeted the wrong performance bottleneck or missed critical performance issues, leading to ineffective or harmful changes.

- **Optimization-Induced Issues**: Any negative consequences of the optimization attempt, whether performance-related, correctness-related, or implementation-related, that can be observed in the code or metrics.

**Key principle**: Extract only failure insights that are concrete, reusable, and grounded in observable code differences or performance evidence. Focus on what went wrong and why the optimization attempt was unsuccessful.
"""

  # Failure note types
  failure_note_type_template = f"""## Failure Note Types (Classification Guide)
Each failure note must be classified as exactly one of the following types:

- **Operator-Specific**: Use this label when the failure is tied to `{op}`'s specific semantics, attributes, mathematical operations, or domain-specific requirements. The failure would be specific to this operator's characteristics.

- **General**: Use this label when the failure describes AscendC/AICore optimization failure mechanisms independent of operator semantics. The failure could occur when optimizing other operators using similar approaches.

**Guidance**: When in doubt, consider whether the failure insight would be relevant when optimizing a different operator. If yes, it's likely General; if it's specific to `{op}`'s behavior, it's Operator-Specific.
"""

  # Exclusion criteria
  exclusion_template = """## Exclusion Criteria (What NOT to Include)
Avoid extracting failure notes that fall into these categories:

- **Generic workflow descriptions**: Do not restate standard AscendC workflow steps or general implementation requirements. These are not optimization failures.

- **Operator semantics summaries**: Do not describe the operator's mathematical function or general correctness requirements. Focus on what went wrong with the optimization, not what the operator computes.

- **Unobservable inferences**: Do not infer failure causes, motivations, or intermediate steps that are not explicitly visible in the code, plan, or metrics. Stick to what can be directly observed.

- **Trivial or obvious failures**: Do not include failures that are obvious without specific context (e.g., "optimization failed" without explaining what specifically failed and why). Failure notes should provide non-trivial insights.

- **Non-code-based failures**: Do not describe failures that cannot be directly observed in the code differences between `<previous_implementation>` and `<failed_optimization_implementation>`, or in the performance metrics. The failure must be traceable.

- **Solution prescriptions**: Do not prescribe fixes or "how to do better". Only document what failed, where it manifested, and why the optimization attempt was unsuccessful.
"""

  # Constraints
  constraints_template = """## Constraints (Quality Standards)
Adhere to these principles when extracting failure notes:

- **Ground in evidence**: Base all failure notes strictly on what is visible in the provided code, optimization plan, and performance metrics. Do not speculate about hidden causes or undocumented effects. If you cannot point to specific evidence, do not include the failure note.

- **Provide concrete references**: When a failure is supported by code, include 1–2 short, specific code references (e.g., function names, variable names, expressions, loop structures, API calls) from `<failed_optimization_implementation>` or relevant parts of `<optimization_plan>`. High-level design insights are acceptable when clearly reflected in the implementation structure.

- **Focus on optimization failures**: Prioritize failures that relate directly to performance optimization attempts rather than general implementation correctness (unless the optimization broke correctness). Use performance metrics when available to identify performance regressions or ineffective optimizations.

- **Ensure actionability**: Each failure note should be specific enough to help future optimization efforts avoid similar mistakes. Explain not just what failed, but also why the optimization attempt was unsuccessful and what can be learned from it.
"""

  # Output format
  output_template = """## Output Format (JSON ONLY)
Return a JSON array with **2–4** entries:
[
  {
    "type": "General | Operator-Specific (only these two types are allowed)",
    "content": "Describe the optimization failure cause/pattern concisely. Include specific code references when available. Focus on what optimization strategy failed and why it was unsuccessful."
  },
  ...
]

Each entry should represent a distinct, high-signal optimization failure insight that would help future optimization efforts avoid similar mistakes.
"""

  return (
    role_template
    + input_template
    + analysis_template
    + failure_type_template
    + failure_note_type_template
    + exclusion_template
    + constraints_template
    + output_template
  )