import re
import ast
import json
from typing import Dict, Any, Tuple

"""
最终版转换代码
注意: 
- 此版本根据要求，【不会】对坐标进行任何尺寸缩放/转换。
- 此版本【已修复】对 click/long_press 动作的解析，使其能同时兼容 point 和 start_box 两种参数名。
"""

def parse_action_string(action_str: str) -> Tuple[str, Dict[str, str]]:
    """健壮的解析器, 从 "action(key='value')" 中提取名称和参数。"""
    match = re.match(r'^\s*(\w+)\s*\((.*)\)\s*$', action_str, re.DOTALL)
    if not match:
        # 针对 click(start_box='(425,810)') 这种只有一个参数且无引号的情况做兼容
        match = re.match(r'^\s*(\w+)\s*\((.*)\s*=\s*(.*)\)\s*$', action_str, re.DOTALL)
        if not match:
            raise ValueError(f"Invalid action format: {action_str}")
        name, key, value = match.groups()
        return name, {key.strip(): value.strip()}

    name, params_str = match.groups()
    params = {}
    if params_str.strip():
        # 这个正则表达式可以正确处理值中包含逗号等字符的情况
        param_pattern = re.compile(r"(\w+)\s*=\s*('([^']*)'|\"([^\"]*)\"|\((.*?)\))")
        for p_match in param_pattern.finditer(params_str):
            key = p_match.group(1)
            # 优先匹配带引号的内容或括号内容
            value = next((g for g in p_match.groups()[2:] if g is not None), None)
            if value is not None:
                 # 如果是括号内容，需要把括号本身加上
                if p_match.group(5) is not None:
                    value = f"({value})"
                params[key] = value

    return name, params

def _parse_coordinates(coord_str: str) -> Tuple[int, int]:
    """辅助函数, 解析多种格式的坐标字符串。"""
    coord_str = coord_str.strip()
    point_match = re.search(r'<point>\s*(\d+)\s+(\d+)\s*</point>', coord_str)
    if point_match:
        x, y = point_match.groups()
        return int(x), int(y)
    try:
        # 使用 ast.literal_eval 来安全地解析元组/列表字符串, e.g., "(425,810)"
        coords = ast.literal_eval(coord_str)
        if isinstance(coords, (list, tuple)) and len(coords) == 2:
            return int(coords[0]), int(coords[1])
    except (ValueError, SyntaxError):
        pass
    raise ValueError(f"Unknown coordinate format: {coord_str}")

def map_new_to_old_action(
    new_action_str: str, 
    screen_width: int = 1080, 
    screen_height: int = 2400
) -> Dict[str, Any]:
    """将新Prompt格式的动作字符串映射为旧Prompt的动作字典。"""
    name, params = parse_action_string(new_action_str)
    old_action = {"name": "", "parameters": {}}
    
    # --- START: 修改点 ---
    if name == 'click' or name == 'long_press':
        old_action['name'] = name
        
        # 兼容 'point' 和 'start_box' 两种键名
        coord_str = None
        if 'point' in params:
            coord_str = params['point']
        elif 'start_box' in params:
            coord_str = params['start_box']
        else:
            raise ValueError(f"Action '{name}' is missing a coordinate parameter ('point' or 'start_box').")
            
        point = _parse_coordinates(coord_str)
        old_action['parameters']['coordinate'] = point
    # --- END: 修改点 ---

    elif name == 'type':
        old_action['name'] = 'type'
        old_action['parameters']['text'] = params.get('content', '')
    elif name == 'open_app':
        old_action['name'] = 'open'
        old_action['parameters']['text'] = params.get('app_name', '')
    elif name == 'drag':
        old_action['name'] = 'swipe'
        start_key = 'start_point' if 'start_point' in params else 'start_box'
        end_key = 'end_point' if 'end_point' in params else 'end_box'
        old_action['parameters']['coordinate'] = _parse_coordinates(params[start_key])
        old_action['parameters']['coordinate2'] = _parse_coordinates(params[end_key])
    elif name == 'scroll':
        old_action['name'] = 'swipe'
        start_point = _parse_coordinates(params['point'])
        direction = params['direction']
        scroll_dist_y = screen_height // 3
        scroll_dist_x = screen_width // 3
        x1, y1 = start_point
        x2, y2 = start_point
        if direction == 'down': y2 = y1 - scroll_dist_y
        elif direction == 'up': y2 = y1 + scroll_dist_y
        elif direction == 'left': x2 = x1 + scroll_dist_x
        elif direction == 'right': x2 = x1 - scroll_dist_x
        end_point = (max(0, x2), max(0, y2))
        old_action['parameters']['coordinate'] = start_point
        old_action['parameters']['coordinate2'] = end_point
    elif name == 'press_home':
        old_action['name'] = 'system_button'
        old_action['parameters']['button'] = 'Home'
    elif name == 'press_back':
        old_action['name'] = 'system_button'
        old_action['parameters']['button'] = 'Back'
    elif name == 'finished':
        old_action['name'] = 'terminate'
        old_action['parameters']['status'] = 'success'
        old_action['parameters']['answer'] = params.get('content', '')
    else:
        raise ValueError(f"Unknown action name: '{name}'")
    return old_action

def format_as_tool_call_json(old_action_dict: Dict[str, Any]) -> str:
    """将旧格式的动作字典, 封装成您系统所需的 tool-call JSON 字符串。"""
    arguments = {"action": old_action_dict['name']}
    arguments.update(old_action_dict['parameters'])
    for key, value in arguments.items():
        if isinstance(value, tuple):
            arguments[key] = list(value)
    tool_call = {"name": "mobile_use", "arguments": arguments}
    return json.dumps(tool_call)

def transform_to_json_tool_call_output(
    raw_content: str,
    screen_width: int = 1080, 
    screen_height: int = 2400
) -> str:
    """主函数：接收模型原始输出, 转换为旧的 tool-call JSON 格式, 并返回修改后的完整字符串。"""
    match = re.search(r"Thought:(.*?)Action:(.*)", raw_content, re.DOTALL)
    if not match:
        raise ValueError("Could not find 'Thought:' and 'Action:' in the content.")

    thought_part, new_action_str = match.groups()
    thought_part = thought_part.strip()
    new_action_str = new_action_str.strip()
    
    old_action_dict = map_new_to_old_action(new_action_str, screen_width, screen_height)
    json_tool_call_str = format_as_tool_call_json(old_action_dict)
    
    return f"Thought: {thought_part}\nAction: {json_tool_call_str}"

# --- 使用示例 ---
if __name__ == '__main__':
    # 您提供的导致错误的内容
    error_content = """Thought: I found the Audio Recorder app on the home screen, and its icon is quite recognizable—it's the one with the red background and a white "REC" symbol. To start recording audio, I need to open this app first, so I'm going to click on its icon now.
Action: click(start_box='(425,810)')"""

    # 使用修正后的函数进行转换
    final_content = transform_to_json_tool_call_output(error_content)

    print("--- Original Agent Output ---")
    print(error_content)
    print("\n" + "="*30 + "\n")
    print("--- Transformed Content (Readable by your old system) ---")
    print(final_content)
    
    # 验证
    final_action_str = final_content.split("Action:")[1].strip()
    print("\nParsed final action JSON object:")
    print(json.loads(final_action_str))