OPERATION_GUIDANCE_PROMPT = """
[OPERATION_GUIDANCE]
Follow these guidelines:
- Choose the simplest method.
- Ensure the xpath for click_xpath and click_option is visible in both the screenshot and html.
- Confirm that a xpath supports the intended action before interacting.
"""

ACTIONS_PREFIX = """
[ADMISSIBLE ACTIONS]
Use xpath to refer to elements.
Only use the actions listed below. Do not use any other actions.
When writing XPath expressions in this environment, always use the form //*[name()='tag' and ...] instead of //tag[...], to avoid namespace issues with SVG or mixed documents.
IMPORTANT: Use Python syntax. Boolean values must be True or False (capitalized), NOT true or false.
"""

ACTIONS_PROMPT = """
### Page Operation Actions:
env_op.click_xpath(xpath: str) # Click an HTML element with an XPath, e.g., env_op.click_xpath("//*[name()='button' and @id='button1']")
env_op.click_option(xpath: str) # Click an option HTML element in a list with an XPath
env_op.move_mouse_on(xpath: str) # Move the mouse cursor on an HTML element with an XPath
env_op.type(xpath: str, characters: str, clear_existing: bool = True, press_enter_after: bool = True) # Type text into a typeable field. By default, the existing content in the field will be cleared unless clear_existing is set to False, and the "Enter" key is pressed after typing unless press_enter_after is set to False. Example: env_op.type("//*[@id='input1']", "text", clear_existing=True, press_enter_after=False)
env_op.press_key(key: str) # Press a key on the keyboard to the selected element. The input is chosen from ["enter", "space", "arrow_left", "arrow_right", "arrow_up", "arrow_down", "backspace"]
"""

FIND_ELEMENT_PROMPT = """
### Get Element xpath
env_op.find_element(xpath: str, target_description: str = "") -> str
# Locate an element using XPath. If the xpath matches 0 or multiple elements, uses MLLM to find the best match.
# Returns the xpath of the matched element, or None if no match is found.

- Parameters:
  - xpath: XPath expression to locate the target element. Can use full XPath syntax for precise matching.
  - target_description: (Always include) A brief note on the element's 
  role, appearance, screen position, and any dynamic/contextual details. Helps MLLM when xpath is ambiguous.

- XPath Best Practices:
  - Use //*[name()='tag' and ...] instead of //tag[...] to avoid namespace issues
  - Combine multiple attributes for robustness: //*[name()='button' and @id='submit' and contains(text(), 'OK')]
  - Use ancestor/descendant relationships: //div[@class='email-thread' and .//div[text()='Evy']]//span[@class='star']
  - Prefer specific attributes (tag, text, id, data-color, aria-label attributes) over special keys (class, exclude_classes, attr, or nested dicts) when available
  - For matching multiple element types, use "or" condition instead of union operator:
    * CORRECT: //*[name()='div']//(*[(name()='span' or name()='a') and text()='OK'])
    * WRONG: //*[name()='div'](//*[name()='span'] | //*[name()='a'])  # Invalid syntax!

- Examples:
  - xpath = env_op.find_element("//*[name()='button' and text()='Submit']", "Submit button in the form")
  - xpath = env_op.find_element("//div[@class='email-thread' and .//div[contains(text(), 'Evy')]]//span[@class='star']", "Star icon in Evy's email thread")
  - xpath = env_op.find_element("//*[name()='option' and text()='California']", "Dropdown option California")
  - xpath = env_op.find_element("//*[@id='input-email']", "Email input field")
  - xpath = env_op.find_element("//*[@aria-hidden='false']//*[(name()='span' or name()='a') and normalize-space(.)='Click']", "Link or span with text 'Click' in visible panel")
"""

INDEX_REPLACEMENT_PROMPT = (
  "\n[Xpath Replacement]\nYou need to use `find_element()` to generate a more robust xpath that can reliably locate the target element across different task instances."
  + FIND_ELEMENT_PROMPT
)

ONE_STEP_ACTOR_PROMPT = """
### One-Step-Actor (must be issued alone)
Don’t rely too much on one_step_actor.
env_op.one_step_actor(goal: str) -> int # Trigger the MLLM to observe the current Android UI and take one atomic action toward fulfilling the user’s goal.
Executes a single UI operation (e.g., click_xpath, type, move_mouse_on) based on the goal and current screen.
- Use When:
   - The target element is unclear or requires semantic understanding of the UI.
   - A single, goal-driven UI step is needed. (e.g., "Open the settings")
"""

ASK_MLLM_PROMPT = ("""
### Ask MLLM to answer one question
env_op.ask_mllm(question: str) -> str # Will return the answer in plain text.
# For advanced parsing, you can ask mllm to return a json object, and then use json.loads(answer) to get targets.
# Example:
# answer = env_op.ask_mllm(question="...Return a json object with the name of the reviews as keys and the number of stars as values.")
# json.loads(answer) # {{'name1': '...', 'name2': '...'}}
""")

GET_UI_CONTENT_PROMPT = ("""
### Get Current page's html body content
env_op.get_ui_content() -> str # return the origin html content
- Example return:
<div id="wrap" data-wob_ref="2" data-wob_eps="e0">
    <div id="query">Click on the "Ok" button.</div>
    <div id="area" data-wob_ref="3" data-wob_eps="e0"><div data-wob_ref="4" data-wob_eps="e0"> cursus dis justo</div><div data-wob_ref="5" data-wob_eps="e0"> facilisis proin aliquam</div><button data-wob_ref="6" data-wob_eps="e0">Ok</button><div data-wob_ref="7" data-wob_eps="e0"> pharetra turpis scelerisque</div><div data-wob_ref="8" data-wob_eps="e0"> rutrum lectus adipiscing</div><div data-wob_ref="9" data-wob_eps="e0"> pretium, aliquet egestas</div></div>
  </div>
""")

FUNCTION_ACTIONS_PROMPT = ACTIONS_PREFIX + ACTIONS_PROMPT

FUNCTION_ACTIONS_FIND_ELEMENT_PROMPT = (
  ACTIONS_PREFIX
  + ACTIONS_PROMPT
  + FIND_ELEMENT_PROMPT
  + GET_UI_CONTENT_PROMPT
  + ASK_MLLM_PROMPT
)

FUNCTION_ACTIONS_ONE_STEP_ACTOR_PROMPT = (
  ACTIONS_PREFIX
  + ONE_STEP_ACTOR_PROMPT
  + ACTIONS_PROMPT
  + FIND_ELEMENT_PROMPT
  + ASK_MLLM_PROMPT
  + GET_UI_CONTENT_PROMPT
)
