import ast
import json
import math
import os
import random
import re
from collections import defaultdict
from pathlib import Path
from typing import Any

valid_actions = [
    "click",
    "hover",
    "click_and_type",
    "key_press",
    "goto",
    "go_back",
    "go_forward",
    "new_tab",
    "close_tab",
    "switch_tab",
    "type",
]


def extract_arguments(code_str: str) -> dict[str, Any]:
    # Function to extract keys and values from the function call
    def extract_keys_values(node):
        keys_values = {}
        # Handle positional arguments
        for i, arg in enumerate(node.args):
            if isinstance(arg, ast.Str):
                value = arg.s
            elif isinstance(arg, ast.Num):
                value = arg.n
            else:
                value = None
            keys_values[f"arg{i}"] = value
        # Handle keyword arguments
        for keyword in node.keywords:
            key = keyword.arg
            if isinstance(keyword.value, ast.Str):
                value = keyword.value.s
            elif isinstance(keyword.value, ast.Num):
                value = keyword.value.n
            else:
                value = None
            keys_values[key] = value
        return keys_values

    parsed_code = ast.parse(code_str)
    # Traverse the AST to find the function call
    for node in ast.walk(parsed_code):
        if (
            isinstance(node, ast.Call)
            and isinstance(node.func, ast.Name)
            and node.func.id in valid_actions
        ):
            result = extract_keys_values(node)
            return result


file_name = "data/clueweb/popular_tmp.jsonl"
save_file = file_name.replace(".jsonl", ".parsed.jsonl")
print(save_file)
if os.path.exists(save_file):
    os.remove(save_file)

with open(file_name, "r") as in_file:
    err_dict = defaultdict(int)
    for line in in_file:
        try:
            orig_data = json.loads(line)
            data = {}
            actn_type = "0"
            data["task"] = orig_data["task_title"]
            data["axt"] = orig_data["ax_tree"]
            prev_actions = orig_data["prev_actions"]
            prev_actn_lines = prev_actions.split("\n")
            prev_actn_lines = [x for x in prev_actn_lines if len(x) > 1]
            last_step_cnt = 0
            actn_type = "2"
            for i in range(len(prev_actn_lines)):
                if (
                    "#" in prev_actn_lines[i]
                    and "step" in prev_actn_lines[i]
                    and ":" in prev_actn_lines[i]
                    and "(" not in prev_actn_lines[i]
                ):
                    # print(prev_actn_lines[i])
                    last_step_cnt = re.search(
                        r"step\s+(\d+)", prev_actn_lines[i]
                    ).group(1)
            data["prev_actions"] = "\n".join(prev_actn_lines)

            actn = orig_data["next_action"]
            axt_nodeid = 0
            if "click" in actn or "hover" in actn or "click_and_type" in actn:
                actn_type = "3"
                # print(actn)
                axt_nodeid = re.search(r"(\d+)\)", actn).group(1)
                # print(axt_nodeid)
            actn_type = "none"
            data["axt_nodeid"] = axt_nodeid

            subtask_line = ""
            actn_comment = ""
            actn_only_line = ""
            actn_lines = actn.split("\n")
            actn_summary = actn_lines[-1]
            for line in actn_lines:
                if (
                    "click_and_type(" in line
                    or "click(" in line
                    or "hover(" in line
                ):
                    actn_only_line = line
                if (
                    "# step" in line
                    and ":" in line
                    and "# step summary:" not in line
                ):
                    actn_comment = line
                if "sub-task" in line:
                    subtask_line = line
            if "click_and_type" in actn_only_line:
                actn_type = "click_and_type"
                arguments = extract_arguments(actn_only_line)
                for v in ["content", "arg1"]:
                    if v in arguments:
                        type_content = arguments[v]
                        break

                if subtask_line:
                    actn = subtask_line + "\n"
                else:
                    actn = ""
                actn += (
                    actn_comment
                    + "\n"
                    + f'type(element_id="{str(axt_nodeid)}",string="{type_content}")'
                    + "\n"
                    + actn_summary
                )
            elif "click" in actn_only_line:
                actn_type = "click"
                if subtask_line:
                    actn = subtask_line + "\n"
                else:
                    actn = ""
                actn += (
                    actn_comment
                    + "\n"
                    + f'click(element_id="{str(axt_nodeid)}")'
                    + "\n"
                    + actn_summary
                )
            elif "hover" in actn_only_line:
                actn_type = "hover"
                if subtask_line:
                    actn = subtask_line + "\n"
                else:
                    actn = ""
                actn += (
                    actn_comment
                    + "\n"
                    + f'hover(element_id="{str(axt_nodeid)}")'
                    + "\n"
                    + actn_summary
                )

            data["next_action"] = actn.replace("# next action\n", "")
            with open(save_file, "a") as jsonl_file:
                jsonl_file.write(json.dumps(data) + "\n")
        except:
            err_dict[actn_type] += 1
            # if actn_type == '3':
            # print(orig_data)
            pass

    print(err_dict)
