SYSTEM_PROMPT_TEMPLATE = """
You are an ActionSummarizer agent. Evaluate the success of actions executed on an Android device by comparing 'before' and 'after' screenshots.

[Output Format]
Your output should consist of the following things:
### Screen Changes:
Summarize the primary differences between the before and after screens (max 30 words). If none, respond with "Nothing Happens."
### Execution Summary:
In one concise line (max 50 words), state whether the page changed as expected, including the intent, outcome, and key insights.

[Your Workflow]
1. Compare Screenshots: Focus on differences related to the target element in the 'before' screenshot and the executed code.
2. Verify Purpose: Check if the executed code aligns with its intended purpose (reason for code) and if the highlighted element meets expectations.
3. Compare Code: Confirm that the expected code matches the executed code; if not, identify discrepancies.
4. Assess Outcome: Determine if the executed code met the intended goal.
5. Highlight Findings: Note key insights for future actions.

[Guidelines]
- If actions like `answer` or `wait` do not change the screen, assume success.
- If no change occurs, clearly state the failure and possible reasons.
- Rely primarily on screenshot analysis.
- Focus on actionable insights; avoid redundant details.
- For file-related operations, make sure to operate only on the exact target file; do not interact with similar files. You must locate and use the precise file required.
- When naming files, ensure proper file extensions.

Note:
- The 'before' screenshot has a "before" label at the bottom right.
- The 'after' screenshot has an "after" label at the bottom right. Screenshots are taken 1 second after each action; the “after” image may capture an intermediate loading state.
{marking_note}
"""

USER_PROMPT_TEMPLATE = """
- User goal: {goal}

- Executed code and execution result:
{execution_info}

- Reason for Code:
{reason}

- Before screenshot details (UI elements):
{before_ui_content}

- After screenshot details (UI elements):
{after_ui_content}
"""

def get_summarizer_prompt(
  reason: str,
  goal: str,
  execution_info: str = "",
  before_ui_content_full_dict: list[dict] = [],
  after_ui_content_full_dict: list[dict] = [],
  action_space_mode: str = 'index',
) -> dict[str, str]:
  from autorpa.utils import agent_utils
  # Set marking note based on action_space_mode
  if action_space_mode == 'coordinate':
    marking_note = """- The 'before' screenshot marking rules for coordinate mode:
  * Single-point actions (click/long_press/input_text): Large yellow circle with white outline and cyan center dot at the target coordinate
  * Swipe with coordinates (start_x, start_y, end_x, end_y): Yellow circle at start point → Cyan circle at end point, connected by a thick colored arrow. Both points have clear "START" and "END" labels in large bold black text on white background box for maximum visibility.
  * Direction-based swipe (up/down/left/right): Large bright multi-colored arrow (white outline with yellow/cyan colors) at screen center pointing in the swipe direction, with a prominent bold black text label (e.g., "SWIPE UP") on white background box for high visibility.
  * Actions without coordinate info: No visual marking (rely on text description)"""
  else:
    marking_note = "- The 'before' screenshot highlights the target elements mentioned in the code with bounding boxes and index numbers."
  
  system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
    marking_note=marking_note,
  )

  user_prompt = USER_PROMPT_TEMPLATE.format(
    goal=goal,
    before_ui_content=agent_utils._generate_ui_elements_description_str(before_ui_content_full_dict or []),
    after_ui_content=agent_utils._generate_ui_elements_description_str(after_ui_content_full_dict or []),
    execution_info=execution_info,
    reason=reason,
  )
  
  return {
    'system': system_prompt,
    'user': user_prompt
  }