from .general_prompt import FIND_ELEMENT_PROMPT, ASK_MLLM_PROMPT


OUTPUT_EXAMPLE = """
[Output Example]
Example 1 (Index-based action):
{{
  "thought": "The hardcoded input is targeting a field to name a note. The UI list shows a field with the placeholder "my_note", described as part of a dialog.",
  "soft_action": "kwargs = {{\"text\": \"my_note\", \"actions\": [\"input_text\"], \"target_description\": \"Input field for the file name in the note creation dialog\"}}\nindex = env_op.find_element(**kwargs)\nenv_op.input_text('ordered_items', index, True)"
}}

Example 2 (Coordinate-based action):
{{
  "thought": "The coordinate (230, 930) corresponds to the OK button. From the UI element at this coordinate, I extract its text and description to create matching criteria.",
  "soft_action": "kwargs = {{\"text\": \"OK\", \"target_description\": \"OK button in the dialog\"}}\nindex = env_op.find_element(**kwargs)\nenv_op.click(index)"
}}

Example 3 (Action without find_element):
{{
  "thought": "...",
  "soft_action": "env_op.swipe('up')"
}}
"""

INDEX_REPLACEMENT_PROMPT = (
  "\n[Index Replacement]\nYou need to use this function to replace the hardcoded `index` value with the index variable generated by the `find_element()`."
  + FIND_ELEMENT_PROMPT
)

COORDINATE_REPLACEMENT_PROMPT = """
[Coordinate Replacement]
For coordinate-based actions (e.g. swipe(start_x, start_y, end_x, end_y)):

1. **For swipe with coordinates:**
   - If the start/end coordinates clearly correspond to specific UI elements (e.g., drag from one card to another): conceptually convert them to element-based actions by describing the start and end elements and using `find_element` for both.
   - If the swipe is used to scroll a region (e.g., swipe up the list) rather than operate on specific elements: you may convert it to a direction-based swipe (e.g., `env_op.swipe("up")`) or use `ask_mllm("According to the description 'swipe up the list', output the corresponding coordinates in the format: env_op.swipe(start_x, start_y, end_x, end_y)")` to generate reasonable coordinates.

2. **Priority for element selection when multiple elements overlap at a coordinate:**
   - Prefer clickable elements
   - Prefer smaller elements (more precise)
   - Prefer elements that appear on top (higher index in UI list)
"""

SHELL_REPLACEMENT_PROMPT = """
[Shell Command Replacement]
For shell actions with hardcoded parameters (e.g., values, file names):

**Parameter Sources (Priority Order):**
1. Task parameters/goal (e.g., brightness value from task description)
2. UI elements from current/previous observations (e.g., file name displayed in list)
3. Task context (e.g., app name mentioned in task goal)
4. Ask MLLM (last resort - use only when above methods fail)

**Examples:**
- Settings value: shell("settings put system screen_brightness 50") → Extract "50" from task goal/description
- File name: shell("cat /sdcard/Download/report.txt") → Extract "report.txt" or the complete file path from previous UI observations where it was listed
- App-specific: shell("pm path com.android.calendar") → Extract "calendar" or the complete package name from task context or use task parameter
- Query only (NO translation): shell("dumpsys battery"), shell("wm size"), shell("pm list packages")
"""

def _build_prompt_template(include_shell_prompt: bool = False) -> dict[str, str]:
  """Build action translator prompt template."""
  # Lazy import to avoid circular dependency
  from gui_agents.react_star.prompts.action_space import ACTIONS_PROMPT
  
  base_prompt = """
Generate a Soft-coded Action by dynamically replacing the hardcoded index or coordinates in the given action with an element-matching strategy.
Before that, determine from the observation and action justification whether the original action can be decided by mechanical element matching; if not, consider using `ask_mllm()` for combined visual and language reasoning.
MLLMs are inherently random, so `ask_mllm` may not always return expected results. To ensure stability, use strict prompt constraints or minimize their usage.

If using `find_element()`, ensure that:
1. The revised logic maintains the same intended behavior as the original hardcoded action;
2. If indexing is not required, do not use the find_element method.
"""
  
  # Add replacement prompts
  replacement_prompts = INDEX_REPLACEMENT_PROMPT + COORDINATE_REPLACEMENT_PROMPT
  
  # Only add shell prompt if needed
  # if include_shell_prompt:
  #   replacement_prompts += SHELL_REPLACEMENT_PROMPT
  
  action_prompts = (
    "\n[ADMISSIBLE ACTIONS]"
    + "\n# Use these only when appropriate based on the original action and UI context:" + ACTIONS_PROMPT
    + "\n# You may additionally consider the following smart action patterns if relevant:" + ASK_MLLM_PROMPT
  )
  
  output_format = """
[Output Format]
Your output must include two sections:
### Thought:
Provide a brief explanation (under 30 words) for how you constructed the dynamic UI element matching parameters (i.e. the `kwargs`).
### Soft-coded Action:
- **All actions must be prefixed with `env_op.`** (e.g., `env_op.click()`, `env_op.input_text()`)
- If not using `find_element()`: write new code with `env_op.` prefix.
- If the original action uses a hardcoded index:
Replace the hardcoded index with a dynamic lookup using `env_op.find_element`. You must output *exactly* these three lines of code:
```python
kwargs = {{...}}  # A dictionary describing the target UI element
index = env_op.find_element(**kwargs)  # Use env_op to locate the element dynamically
env_op.xxx(...)  # Replace xxx with the correct action using the index
```
- If the original action uses coordinates (x, y) for click/long_press/input_text:
  - If the action clearly targets a specific UI element: convert it to the same `find_element + index` pattern as above, using a well-constructed `kwargs`.
  - If the target is ambiguous or not a concrete element: use `ask_mllm` to get a textual description of the target, then build `kwargs` from that description, with the coordinate as a fallback.
- If the original action uses coordinates for swipe or drag_and_drop:
  - If targeting specific elements: convert to element-based action (describe start/end elements and use `find_element` for both, then perform the appropriate action).
  - If used purely for scrolling or dragging in blank areas: you may use direction-based swipes (e.g., `env_op.swipe("up")`) or relative coordinates, possibly assisted by `ask_mllm`.
- If the original action does NOT require an index or coordinates: Simply output the soft-coded action without calling env_op.find_element().
"""
  
  input_template = """
**User goal/request**: {goal}

[Input Provided]
- Observations Analysis:
{obs_analysis}

- Action Justification:
{action_reason}
   
- Hard Coded Action:
{action}

- Action Related element:
{related_element}

- Current screenshot, annotated screenshot, and UI elements list:
{ui_info_str}
"""
  
  return (base_prompt + replacement_prompts + action_prompts + output_format + OUTPUT_EXAMPLE), input_template


def get_action_translator_prompt(
  goal: str,
  obs_analysis: str,
  action_reason: str,
  action: str,
  related_element: str,
  ui_info_str: str = "",
) -> dict[str, str]:
  # Check if action is a shell command
  include_shell_prompt = 'shell(' in action or 'env_op.shell(' in action
  
  # Build prompt template dynamically
  system_prompt, user_prompt_template = _build_prompt_template(include_shell_prompt=include_shell_prompt)
  
  user_prompt = user_prompt_template.format(
    goal=goal,
    obs_analysis=obs_analysis,
    action_reason=action_reason,
    action=action,
    related_element=related_element,
    ui_info_str=ui_info_str if ui_info_str else 'Not available',
  )
  
  return {
    'system': system_prompt,
    'user': user_prompt
  }
