# Task Description & Output Format
GROUNDING_TASK = """Your task is to evaluate whether the agent's ACTION is properly grounded in its THOUGHT, considering the current state of the webpage.
Use the user instruction, the current webpage state, and the agent's thought and action as evidence for your judgment. Your evaluation should assess whether the ACTION logically follows from the THOUGHT and is feasible and appropriate in the given environment.
Mark the action as 'Yes' only if it is clearly and fully grounded in the thought and current webpage state. If there is any inconsistency, ambiguity, irrelevance, or if the action is not supported by the current page state, mark it as 'No'."""

GROUNDING_FORMAT = """Please return your response in the following format:
REASON: [Your explanation for whether the action is properly grounded]
JUDGE: [Yes / No]"""


PROGRESS_LIKERT_SCALE_TASK = """Evaluate how helpful the given thought and action is for achieving the goal. Use the following scale:
**Scoring Criteria (1 to 5):**
- **5 (Very Helpful)**: The action directly and effectively moves toward fulfilling a key part of the goal.
- **4 (Helpful)**: The action contributes meaningfully to progress, though it may require follow-up actions.
- **3 (Somewhat Helpful)**: The action is partially relevant or a preparatory step, but doesn’t make immediate progress.
- **2 (Slightly Helpful)**: The action is weakly related to the goal or might only indirectly help.
- **1 (Not Helpful)**: The action is unrelated, redundant, or distracts from the goal."""

PROGRESS_LIKERT_SCALE_FORMAT = """Please return your response in the following format:
REASON: [Your explanation for the score]
SCORE: [1-5]"""


PROGRESS_THREE_CLASS_TASK = """Evaluate how helpful the given thought and action is for achieving the goal. Use the following scale:
**Scoring Criteria:**
- **1 (Helpful)**: The action clearly contributes to achieving the goal. It takes a necessary or productive step toward completing the task.
- **0 (Neutral)**: The action is neither helpful nor harmful. It may be a placeholder, irrelevant at the current step, or too ambiguous to evaluate.
- **-1 (Not Helpful)**: The action works against the goal, causes confusion, repeats a previous step unnecessarily, or leads the agent off track."""

PROGRESS_THREE_CLASS_FORMAT = """Please return your response in the following format:
REASON: [Your explanation for the score]
SCORE: [-1 / 0 / 1]"""


PROGRESS_WITH_CHECKLIST_TASK = """Your task is to evaluate how well the agent's THOUGHT and ACTION satisfy each item in the checklist.
Use the task instruction, trajectory (including previously completed steps from history), current webpage state, and the agent's current response as evidence for your evaluation.
For each checklist item:
- Mark it as 'Yes' if it is clearly and fully satisfied either in the current response or already completed in the history.
- Mark it as 'No' if there is ambiguity, insufficient evidence, or the step is incomplete or not yet started."""

PROGRESS_WITH_CHECKLIST_FORMAT = """Please return your response in the following format:
REASON: [Write a single, coherent paragraph explaining how well the agent's response satisfies the checklist overall. Use both the history and the agent's current thought/action as evidence. Mention specific strengths or missing elements that influence your decision.]
CHECKLIST EVALUATION:  
Checklist X: [Yes / No]  
"""

PROGRESS_WITH_CHECKLIST_IN_PROGRESS_TASK = """Your task is to evaluate how well the agent's THOUGHT and ACTION satisfy each item in the checklist.
Use the task instruction, trajectory (including previously completed steps from history), current webpage state, and the agent's current response as evidence for your evaluation. Clearly consider any items already successfully completed or currently in progress according to the provided trajectory.
For each checklist item:
- Mark it as 'Yes' if it is clearly and fully satisfied either in the current response or already completed in the history.
- Mark it as 'In Progress' if the agent has made partial but meaningful progress toward completing the item.
- Mark it as 'No' if there is ambiguity, insufficient evidence, or the step is incomplete or not yet started."""

PROGRESS_WITH_CHECKLIST_IN_PROGRESS_FORMAT = """Please return your response in the following format:
REASON: [Write a single, coherent paragraph explaining how well the agent's response satisfies the checklist overall. Use both the history and the agent's current thought/action as evidence. Mention specific strengths or missing elements that influence your decision.]
CHECKLIST EVALUATION:  
Checklist X: [Yes / In Progress / No]  
"""


GROUNDING_OURS_TASK = """
"""

GROUNDING_OURS_FORMAT = """
"""

PROGRESS_OURS_TASK = """
"""

PROGRESS_OURS_FORMAT = """
"""

## EVALUATION TYPE
GROUNDING = {
    "task_description": GROUNDING_TASK,
    "output_format": GROUNDING_FORMAT,
}

GROUNDING_OURS = {
    "task_description": GROUNDING_OURS_TASK,
    "output_format": GROUNDING_OURS_FORMAT,
}

PROGRESS_LIKERT_SCALE = {
    "task_description": PROGRESS_LIKERT_SCALE_TASK,
    "output_format": PROGRESS_LIKERT_SCALE_FORMAT,
}

PROGRESS_THREE_CLASS = {
    "task_description": PROGRESS_THREE_CLASS_TASK,
    "output_format": PROGRESS_THREE_CLASS_FORMAT,
}

PROGRESS_WITH_CHECKLIST = {
    "task_description": PROGRESS_WITH_CHECKLIST_TASK,
    "output_format": PROGRESS_WITH_CHECKLIST_FORMAT,
}

PROGRESS_WITH_CHECKLIST_IN_PROGRESS = {
    "task_description": PROGRESS_WITH_CHECKLIST_IN_PROGRESS_TASK,
    "output_format": PROGRESS_WITH_CHECKLIST_IN_PROGRESS_FORMAT,
}

PROGRESS_OURS = {
    "task_description": PROGRESS_OURS_TASK,
    "output_format": PROGRESS_OURS_FORMAT,
}