import argparse
import os
import json
import ast
from prompt.androidControlPrompt import ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FOROSATLAS, ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FOROSATLAS, ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FORUITARAS, ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FORUITARAS, ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FORGUIR1, ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FORGUIR1
import re
from tqdm import tqdm
import sys
sys.path.append("./")
from utils.logging_utils import setup_logger_to_stdout
from utils.schema.GUI_OWL.common import pil_to_base64, message_translate
from preprocess_base import BasePreProcess
logger = setup_logger_to_stdout()

def parse_args(args=None, namespace=None):
    parser = argparse.ArgumentParser(description='Origin Dataset To Json')
    parser.add_argument('--dataset_name', type=str, default="AndroidControl",
                        help='dataset name')
    parser.add_argument('--dataset_type', type=str, default='low', help='dataset type')
    parser.add_argument('--dataset_path', type=str, default="/data3/cpz/datasets/android_control_parsed",
                        help='dataset path')
    parser.add_argument('--model_name', type=str, default="GUI_OWL",
                        help='model name')
    parser.add_argument('--save_path', type=str, default="/home/chengpengzhou/GUI_VISION/GUI-Speaker/datasets/json",
                        help='save path')
    return parser.parse_args()


      
class AndroidControlPreProcess(BasePreProcess):
    def __init__(self, dataset_type, dataset_path, dataset_name, save_path, model_name):
        super().__init__(dataset_path, dataset_name, save_path, model_name)
        self.dataset_type = dataset_type
        self.split_json_name = "android_control_splits.json"
        # 只有在dataset_path存在时才加载split_json_data
        if dataset_path and os.path.exists(os.path.join(dataset_path, self.split_json_name)):
            self.split_json_data = self._merge_train_validation()
        else:
            self.split_json_data = None
    
    def OS_ATLAS(self):
        sample = super().OS_ATLAS()
        def actionMapping(action, image_size):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            if action['action_type'] == 'click':
                return f"CLICK <point>[[{action['x']/image_size[0]*1000}, {action['y']/image_size[1]*1000}]]</point>" 
            elif action['action_type'] == "wait":
                return "WAIT"
            elif action['action_type'] == "long_press":
                return f"LONG_CLICK <point>[[{action['x']}, {action['y']}]]</point>"
            elif action['action_type'] == "open_app":
                return f"OPENAPP [{action['app_name']}]"
            elif action['action_type'] == "navigate_home":
                return "PRESS_HOME"
            elif action['action_type'] == "navigate_back":
                return "PRESS_BACK"
            elif action["action_type"] == "scroll":
                return f"SCROLL [{action['direction'].upper()}]"
            elif action["action_type"] == "input_text":
                return f"TYPE [{action['text']}]"
            elif action['action_type'] == "complete":
                return "COMPLETE"
            else:
                logger.error(f"Action mapping error: {action}")
        for key in self.split_json_data.keys():
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for item in tqdm(self.split_json_data[key]):
                  logger.info(f"Processing the episode: {item}")
                  episod_path = os.path.join(self.path, "episode_"+str(item))
                  metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                  metadata['actions'] += [{"action_type":"complete"}]
                  metadata['image_size'] = [[width, height] for width, height in zip(metadata['screenshot_widths'], metadata['screenshot_heights'])]
                  action_traslate = [actionMapping(action, image_size) for (action, image_size) in zip(metadata['actions'], metadata['image_size'])]
                  metadata['step_instructions'] += ["task is finished"]
        
                  for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      record["images"] = [metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['messages'][1]['content'] = action_traslate[i]
                      record['label'] = "action:\n"+action_traslate[i]
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      if self.dataset_type == 'low':
                          record['messages'][0]['content'] = ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FOROSATLAS.replace("{finalGoal}", metadata['goal'])
                          record['messages'][0]['content'] = record['messages'][0]['content'].replace("{actionDesc}", metadata['step_instructions'][i])
                      else:
                          record['messages'][0]['content'] = ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FOROSATLAS.replace("{finalGoal}", metadata['goal'])
                      record['messages'][0]['content'] = record['messages'][0]['content'].replace("{previousActions}", str(metadata['step_instructions'][:i]))
                      data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def UI_TARS(self):
        sample = super().UI_TARS()
        def actionMapping(action, image_size):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            if action['action_type'] == 'click':
                if "1.5" in self.model_name:
                    return f"click(start_box='({action['x']},{action['y']})')" 
                return f"click(start_box='({int(action['x']/image_size[0]*1000)},{int(action['y']/image_size[1]*1000)})')" 
            elif action['action_type'] == "wait":
                return "wait()"
            elif action['action_type'] == "long_press":
                if "1.5" in self.model_name:
                    return f"long_press(start_box='({action['x']},{action['y']})', time='')"
                return f"long_press(start_box='({int(action['x']/image_size[0]*1000)},{int(action['y']/image_size[1]*1000)})', time='')"
            elif action['action_type'] == "open_app":
                return f"open_app(app_name='{action['app_name']}')"
            elif action['action_type'] == "navigate_home":
                return "press_home()"
            elif action['action_type'] == "navigate_back":
                return "press_back()"
            elif action["action_type"] == "scroll":
                return f"scroll(direction='{action['direction']}')"
            elif action["action_type"] == "input_text":
                return f"type(content='{action['text']}')"
            elif action['action_type'] == "complete":
                return "finished()"
            else:
                logger.error(f"Action mapping error: {action}")

        def build_history(index, metadata, image_size_list):
            history = []
          
            image_indices = range(0, index) if index <= 4 else range(index - 4, index)

            for i in range(len(metadata['screenshots'])):
                if i in image_indices:
                    image_history = {
                        "role": "user",
                        "content": [
                            {
                                "type": "image",
                                "image": metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                            }
                        ]
                    }
                    history.append(image_history)
                if i in image_indices:
                    action = actionMapping(metadata['actions'][i], image_size_list[i])
                    thought = metadata['step_instructions'][i]
                    text_history = {
                        "role": "assistant",
                        "content": [
                            {"type": "text", "text": f"Thought: {thought}\nAction: {action}"}
                        ]
                    }
                    history.append(text_history)
            return history
      
        for key in self.split_json_data.keys():
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for item in tqdm(self.split_json_data[key]):
                  logger.info(f"Processing the episode: {item}")
                  episod_path = os.path.join(self.path, "episode_"+str(item))
                  metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                  metadata['actions'] += [{"action_type":"complete"}]
                  metadata['image_size'] = [[width, height] for width, height in zip(metadata['screenshot_widths'], metadata['screenshot_heights'])]
                  action_traslate = [actionMapping(action, image_size) for (action, image_size) in zip(metadata['actions'], metadata['image_size'])]
                  metadata['step_instructions'] += ["task is finished"]
          
                  for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      if self.dataset_type == 'low':
                          record['messages'][1]['content'][0]['text'] = ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FORUITARAS.replace("{instruction}", metadata['goal'])
                          if i != 0:
                              record['messages'].extend(build_history(i, metadata, metadata['image_size'])) 
                          record['messages'].extend([
                              {
                                  "role": "user",
                                  "content": [
                                      {
                                          "type": "image",
                                          "image": metadata['screenshots'][i].replace(
                                              "android_control_parsed_data_fixed/",
                                              "/data3/cpz/datasets/android_control_parsed/"
                                          )
                                      }
                                  ]
                              },
                              {
                                  "role": "assistant",
                                  "content": [
                                      {
                                          "type": "text",
                                          "text": f"Thought: {metadata['step_instructions'][i]}\n"
                                      }
                                  ]
                              }
                          ])
                      else:
                          record['messages'][1]['content'][0]['text'] = ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FORUITARAS.replace("{instruction}", metadata['goal'])
                          if i != 0:
                              record['messages'].extend(build_history(i, metadata, metadata['image_size'])) 
                          record['messages'].extend([
                              {
                                  "role": "user",
                                  "content": [
                                      {
                                          "type": "image",
                                          "image": metadata['screenshots'][i].replace(
                                              "android_control_parsed_data_fixed/",
                                              "/data3/cpz/datasets/android_control_parsed/"
                                          )
                                      }
                                  ]
                              }
                          ])

                          
                      record['label'] = f"Thought: {metadata['step_instructions'][i]}\nAction: {action_traslate[i]}"
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      record["images"] = [metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      
                      data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")
        
    def GUI_R1(self):
        sample = super().GUI_R1()
        def actionMapping(action):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            t = action['action_type']
            if t == 'navigate_home':
                action_name = 'press_home'
            elif t == 'navigate_back':
                action_name = 'press_back'
            elif t == 'complete':
                action_name = 'complete'
            else:
                action_name = t

            if t in ['click', 'long_press']:
                point = [action.get('x', -100), action.get('y', -100)]
            else:
                point = [-100, -100]

            if t in ['click', 'long_press', 'wait', 'complete', 'navigate_home', 'navigate_back']:
                input_text = 'no input text'
            elif t == 'open_app':
                input_text = action.get('app_name', '')
            elif t == 'scroll':
                map_direction = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
                input_text = map_direction[action['direction']]
            elif t == 'input_text':
                action_name = 'type'
                input_text = action.get('text', '')
            else:
                input_text = 'no input text'
            
            formatted_action = [{
                'action': action_name,
                'point': point,
                'input_text': input_text
            }]
            return str(formatted_action)
        for key in self.split_json_data.keys():
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for item in tqdm(self.split_json_data[key]):
                logger.info(f"Processing the episode: {item}")
                episod_path = os.path.join(self.path, "episode_"+str(item))
                metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                metadata['actions'] += [{"action_type":"complete"}]
                action_traslate = [actionMapping(action) for action in metadata['actions']]
                metadata['step_instructions'] += ["task is finished"]
            
                for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      record["images"] = [metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['messages'][0]['content'][0]['image'] = record['images'][0]          
                      record['label'] = "<think></think><answer>"+action_traslate[i]+"</answer>"
                      if self.dataset_type == 'low':
                          record['messages'][0]['content'][1]['text'] = '<image>\n' + ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FORGUIR1.replace("{goal}", metadata['step_instructions'][i])
                      else:
                          record['messages'][0]['content'][1]['text'] = '<image>\n' + ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FORGUIR1.replace("{goal}", metadata['goal'])
                      record['messages'][0]['content'][1]['text'] = record['messages'][0]['content'][1]['text'].replace("{history}", str(metadata['step_instructions'][:i]))
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      data.append(record)
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def Agent_CPM(self):
        sample = super().Agent_CPM()
        def actionMapping(action, image_size):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            t = action['action_type']
   
            if t == 'click':
                return str({"thought":"", "POINT": [action['x']/image_size[0]*1000, action['y']/image_size[1]*1000]})
            elif t == 'long_press':
                return str({"thought":"", "POINT": [action['x']/image_size[0]*1000, action['y']/image_size[1]*1000], "duration": 1000})
            elif t == 'navigate_home':
                return str({"thought":"", "PRESS": "HOME"})
            elif t == 'navigate_back':
                return str({"thought":"", "PRESS": "BACK"})
            elif t == 'scroll':
                map_direction = {'left': 'right', 'right': 'left', 'up': 'down', 'down': 'up'}
                direction = map_direction[action['direction']]
                return str({"thought":"", "POINT": [-100, -100], "to": direction})
            elif t == 'input_text':
                return str({"thought":"", "TYPE": action['text']})
            elif t == "wait":
                return str({"thought":"", 'duration': -100})
            elif t == "complete":
                return str({"thought":"", 'STATUS': 'finish'})
            else:
                return "error" 
        from prompt.androidControlPrompt import AGENT_CPM_SYSTEM_PROMPT
        ACTION_SCHEMA = json.load(open('/data1/home/chengpengzhou/GUI_VISION/GUI-Speaker/utils/schema/agentCPMSchema.json', encoding="utf-8"))
        items = list(ACTION_SCHEMA.items())
        insert_index = 3
        items.insert(insert_index, ("required", ["thought"])) 
        ACTION_SCHEMA = dict(items)
        AGENT_CPM_SYSTEM_PROMPT = AGENT_CPM_SYSTEM_PROMPT.replace("ACTION_SCHEMA", str(ACTION_SCHEMA))
                
        for key in self.split_json_data.keys():
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for item in tqdm(self.split_json_data[key]):
                logger.info(f"Processing the episode: {item}")
                episod_path = os.path.join(self.path, "episode_"+str(item))
                metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                metadata['actions'] += [{"action_type":"complete"}]
                metadata['image_size'] = [[width, height] for width, height in zip(metadata['screenshot_widths'], metadata['screenshot_heights'])]
                action_traslate = [actionMapping(action, image_size) for (action, image_size) in zip(metadata['actions'], metadata['image_size'])]
                metadata['step_instructions'] += ["task is finished"]
                for i in range(len(action_traslate)):
                      if action_traslate[i] == 'error':
                          continue
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      record["images"] = [metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']                  
                      record['label'] = action_traslate[i]
                      if self.dataset_type == 'low':
                          record['messages'][0]['content'][0] = record['messages'][0]['content'][0].replace("text_prompt", metadata['step_instructions'][i])
                      else:
                          record['messages'][0]['content'][0] = record['messages'][0]['content'][0].replace("text_prompt", metadata['goal'])
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      record['system_prompt'] = AGENT_CPM_SYSTEM_PROMPT
                      data.append(record)
               
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def OS_Genesis(self):
        sample = super().OS_Genesis()
        def actionMapping(action, thought, dataset_type):
            t = action['action_type']
            if t == 'click':
                return f'Low-level thought: {thought} action: {{"action_type": "click", "x": {action["x"]}, "y": {action["y"]}}}'
            elif t == 'long_press':
                return f'Low-level thought: {thought} action: {{"action_type": "long_press", "x": {action["x"]}, "y": {action["y"]}}}'
            elif t == 'navigate_home':
                return f'Low-level thought: {thought} action: {{"action_type": "navigate_home"}}'
            elif t == 'navigate_back':
                return f'Low-level thought: {thought} action: {{"action_type": "navigate_back"}}'
            elif t == 'scroll':
                direction = action['direction']
                if dataset_type =='low':
                    map_direction = {"left": "right", "right": "left", "up": "down", "down": "up"}
                    direction = map_direction[action['direction']]
                return f'Low-level thought: {thought} action: {{"action_type": "scroll", "direction": "{direction}"}}'
            elif t == 'input_text':
                action_dict = {
                    "action_type": "type",
                    "text": action["text"],
                    "x": -100,
                    "y": -100
                }
                json_action = json.dumps(action_dict)
                return f"Low-level thought: {thought} action: {json_action}"
            elif t == "wait":
                return f"Low-level thought: {thought} action: {{'action_type': 'wait'}}"
            elif t == "open_app":
                return f'Low-level thought: {thought} action: {{"action_type": "open_app", "app_name": "{action["app_name"]}"}}'
            elif t == 'complete':
                return f"Low-level thought: {thought} action: {{'action_type': 'stop'}}"
            else:
                return "error" 
        
        def get_a11_tree(a11_tree_path):
            clickable_nodes = {}
            with open(a11_tree_path, 'r', encoding='utf-8') as f:
                for line in f:
                    try:
                        node = json.loads(line)
                        text = node.get("text") or node.get("content_description")
                        if not text:
                            continue
                        bbox = node.get("bbox_pixels")
                        if not bbox:
                            continue
                        center_x = (bbox["x_min"] + bbox["x_max"]) / 2
                        center_y = (bbox["y_min"] + bbox["y_max"]) / 2
                        clickable_nodes[text.strip()] = (center_x, center_y)
                    except (json.JSONDecodeError, KeyError, TypeError):
                        continue
            return clickable_nodes
        from prompt.androidControlPrompt import OS_GENESIS_HIGH_PROMPT, OS_GENESIS_LOW_PROMPT
        
        for key in self.split_json_data.keys():
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for item in tqdm(self.split_json_data[key]):
                  logger.info(f"Processing the episode: {item}")
                  episod_path = os.path.join(self.path, "episode_"+str(item))
                  metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                  metadata['actions'] += [{"action_type":"stop"}]
                  metadata['step_instructions'] += ["task is finished"]
                  action_traslate = [actionMapping(action, low_level_instruction, self.dataset_type) for (action, low_level_instruction) in zip(metadata['actions'], metadata['step_instructions'])]
                  
                  for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      record["images"] = [metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['label'] = action_traslate[i]
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      previous_actions = [f"Step {step}:{low_level}" for step, low_level in enumerate(metadata['step_instructions'][:i])]
                      if self.dataset_type == 'low':
                          record['question'] = OS_GENESIS_LOW_PROMPT.format(instruction=record['goal'], history='\n'.join(previous_actions), a11y_tree=str(get_a11_tree(record['accessibility_trees'])),low_level_thought=metadata['step_instructions'][i])
                      else:
                          record['question'] = OS_GENESIS_HIGH_PROMPT.format(instruction=record['goal'], history=previous_actions, a11y_tree=str(get_a11_tree(record['accessibility_trees'])))           
                      data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def Aguvis(self):
        sample = super().Aguvis()
        from utils.schema.aguvisConstants import user_instruction
        def actionMapping(action, image_size):
            """
            ['click', 'open_app', 'long_press', 'navigate_home', 'scroll', 'navigate_back', 'wait', 'input_text']
            """
            t = action['action_type']
   
            if t == 'click':
                return f"assistantos\npyautogui.click(x={action['x']/image_size[0]}, y={action['y']/image_size[1]})"
            elif t == 'long_press':
                return f"assistantos\nmobile.long_press(x={action['x']/image_size[0]}, y={action['y']/image_size[1]})"
            elif t == 'navigate_home':
                return f"assistantos\nmobile.home()"
            elif t == 'navigate_back':
                return f"assistantos\nmobile.back()"
            elif t == 'scroll':
                direction = action['direction']
                if direction in ['left', 'right']:
                    if direction == 'left':
                        return "assistantos\npyautogui.hscroll(page=-0.1)"
                    else:
                        return "assistantos\npyautogui.hscroll(page=0.1)"
                else:
                    if direction == 'up':
                        return "assistantos\npyautogui.scroll(page=0.1)"
                    else:
                        return "assistantos\npyautogui.scroll(page=-0.1)"
            elif t == 'input_text':
                return f"assistantos\npyautogui.write(message='{action['text']}')"
            elif t == "wait":
                return f"assistantos\nmobile.wait(seconds=3)"
            elif t == "complete":
                return "assistantos\nmobile.terminate(status='success')"
            elif t == "open_app":
                return f"assistantos\nmobile.open_app(app_name='{action['app_name']}')"
            else:
                return "error" 
        
        for key in self.split_json_data.keys():
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            for item in tqdm(self.split_json_data[key]):
                  logger.info(f"Processing the episode: {item}")
                  episod_path = os.path.join(self.path, "episode_"+str(item))
                  metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                  metadata['actions'] += [{"action_type":"complete"}]
                  metadata['image_size'] = [[width, height] for width, height in zip(metadata['screenshot_widths'], metadata['screenshot_heights'])]
                  action_traslate = [actionMapping(action, image_size) for (action, image_size) in zip(metadata['actions'], metadata['image_size'])]
                  metadata['step_instructions'] += ["task is finished"]

                  for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      record["images"] = [metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['label'] = action_traslate[i]
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      previous_actions = [f"Step{step}:{low_level}" for step, low_level in enumerate(metadata['step_instructions'][:i])]
                      if self.dataset_type == 'low':
                          record['messages']['content'][1]['text'] = user_instruction.format(overall_goal=record['goal'], previous_actions=previous_actions, low_level_instruction=metadata['step_instructions'][i])
                          record['is_low_level_instruction'] = True
                          record['low_level_instruction'] = metadata['step_instructions'][i]
                      else:
                          record['messages']['content'][1]['text'] = user_instruction.format(overall_goal=record['goal'], previous_actions=previous_actions, low_level_instruction="")
                          record['is_low_level_instruction'] = False 
                      record['mode'] = 'force-plan'                  
                      data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            print(len(data))
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")
      
    def GUI_Odyssey(self):
        sample = super().GUI_Odyssey()
        def actionMapping(action, image_size):
            if action['action_type'] == 'click':
                return f"CLICK: ({int(action['x']/image_size[0]*1000)}, {int(action['y']/image_size[1]*1000)})"
            elif action['action_type'] == "wait":
                return "WAIT"
            elif action['action_type'] == "long_press":
                return f"LONG_PRESS: ({int(action['x']/image_size[0]*1000)}, {int(action['y']/image_size[1]*1000)})"
            elif action['action_type'] == "open_app":
                return f"OPENAPP: {action['app_name']}"
            elif action['action_type'] == "navigate_home":
                return "PRESS_HOME"
            elif action['action_type'] == "navigate_back":
                return "PRESS_BACK"
            elif action["action_type"] == "scroll":
                return f"SCROLL: {action['direction'].upper()}"
            elif action["action_type"] == "input_text":
                return f"TYPE: {action['text']}"
            elif action['action_type'] == "complete":
                return "COMPLETE"
            else:
                logger.error(f"Action mapping error: {action}")
        
        for key in self.split_json_data.keys():
            if key == 'train':
                continue
            data = []
            hit_index = {}
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            count = 0
            for item in self.split_json_data[key]:
                  logger.info(f"Processing the episode: {item}")
                  episod_path = os.path.join(self.path, "episode_"+str(item))
                  metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                  metadata['actions'] += [{"action_type":"complete"}]
                  metadata['image_size'] = [[metadata['screenshot_widths'][i], metadata['screenshot_heights'][i]] for i in range(len(metadata['screenshots']))]
                  action_traslate = [actionMapping(action, image_size) for action, image_size in zip(metadata['actions'], metadata['image_size'])]
                  metadata['step_instructions'] += ["task is finished"]
                  previous_action_history: list = []
                  previous_screenshot_history: list = []
                  count += len(action_traslate)
                  for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      img = metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record["images"] = [img]
                      hit_index[f"{img}"] = previous_screenshot_history[:i]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['label'] = action_traslate[i]
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      if self.dataset_type == 'low':
                          question = record['question'].format(
                              instruction=metadata['step_instructions'][i], 
                              image_path=record['images'][0]
                          )
                      else:
                          question = record['question'].format(
                              instruction=record['goal'], 
                              image_path=record['images'][0]
                          )

                      if i > 0:
                            his_img = f'\nPrevious screenshots: <img>image-history: {img}</img>'
                            his_str = '\nPrevious Actions: '
                            for idx, hi in enumerate(previous_action_history[-4:]):
                                his_str += f"{idx+1}. {hi}\n"
                            question = f"{question}{his_img}{his_str}"
                      else:
                            question += f'\nPrevious screenshots: None'
                            question += f'\nPrevious Actions: None'
                      question += '\nProvide the command-style action directly.'
                      record['question'] = question
                      record['low_level_instruction'] = metadata['step_instructions'][i]
                      previous_action_history.append(action_traslate[i])
                      previous_screenshot_history.append(img)
                      data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            self.saveJson(hit_index, os.path.join("/data1/home/chengpengzhou/GUI_VISION/GUI-Speaker/utils/utils_odyssey", f"his_index.json"))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    def GUI_OWL(self):
        build_system_messages, getResizedImage, build_user_messages, sample = super().GUI_OWL()
        def actionMapping(action):
            if action['action_type'] == 'click':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "click", "coordinate": [{action["x"]}, {action["y"]}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action['action_type'] == "wait":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "wait", "time": {2}}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action['action_type'] == "long_press":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "long_press", "coordinate": [{action["x"]}, {action["y"]}], "time": {2}}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action['action_type'] == "open_app":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "open", "text": "{action['app_name']}"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action['action_type'] == "navigate_home":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Home"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action['action_type'] == "navigate_back":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Back"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action["action_type"] == "scroll":
                direction = action['direction'].upper()
                if direction == 'UP':
                    x1, y1 = 0, 0
                    x2, y2 = 0, 1
                elif direction == 'DOWN':
                    x1, y1 = 0, 1
                    x2, y2 = 0, 0
                elif direction == 'LEFT':
                    x1, y1 = 0, 0
                    x2, y2 = 1, 0
                else:
                    x1, y1 = 1, 0
                    x2, y2 = 0, 1
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [{x1}, {y1}], "coordinate2": [{x2}, {y2}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action["action_type"] == "input_text":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "type", "text": "{action['text']}"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif action['action_type'] == "complete":
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "terminate", "status": "success"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            else:
                logger.error(f"Action mapping error: {action}")
        
        for key in self.split_json_data.keys():
            if key == 'train':
                continue
            data = []
            logger.info(f"Processing the {key} samples in the {self.dataset_name}")
            count = 0
            for item in self.split_json_data[key]:
                  logger.info(f"Processing the episode: {item}")
                  episod_path = os.path.join(self.path, "episode_"+str(item))
                  metadata = self.readJson(os.path.join(episod_path, "metadata_episode_"+str(item)+".json"))
                  metadata['actions'] += [{"action_type":"complete"}]
                  metadata['image_size'] = [[metadata['screenshot_widths'][i], metadata['screenshot_heights'][i]] for i in range(len(metadata['screenshots']))]
                  action_traslate = [actionMapping(action) for action in metadata['actions']]
                  metadata['step_instructions'] += ["task is finished"]

                  for i in range(len(action_traslate)):
                      from copy import deepcopy
                      record = deepcopy(sample)
                      record["episode_id"] = metadata["episode_id"]
                      record["step_id"] = i+1
                      img = metadata['screenshots'][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record["images"] = [img]
                      record['accessibility_trees'] = metadata["accessibility_trees"][i].replace("android_control_parsed_data_fixed/", "/data3/cpz/datasets/android_control_parsed/")
                      record['goal'] = metadata['goal']          
                      record['label'] = action_traslate[i]
                      record['image_size'] = [(metadata['screenshot_widths'][i], metadata['screenshot_heights'][i])]
                      dummy_image = getResizedImage(record['images'][0])
                      system_messages = build_system_messages(dummy_image.height, dummy_image.width)
                      
                      if self.dataset_type == 'low':
                          user_messages = build_user_messages(metadata['step_instructions'][i], enable_think=True, history=metadata['step_instructions'][:i])
                      else:
                          user_messages = build_user_messages(record['goal'], enable_think=True, history=metadata['step_instructions'][:i])

                      user_messages['content'].append({"image": record['images'][0]})
                      messages = [system_messages, user_messages]
                      record['messages'] = message_translate(messages, to_format='qwen')
                      data.append(record)
             
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            self.saveJson(data, os.path.join(self.save_path, self.dataset_type+"_"+str(key)+"_"+self.model_name.lower()+'.json'))
            logger.info(f"transform {self.dataset_type} of {self.dataset_name} dataset to json succesfuully")
        logger.info("Finished")

    

    def _merge_train_validation(self):
        split_json_path = os.path.join(self.path, self.split_json_name)
        split_json_data = self.readJson(split_json_path)
        split_json_data['train'] += split_json_data['validation']
        del split_json_data['validation']
        return split_json_data

    def process_from_json(self, json_file_path, model_name):
        """
        从JSON文件读取数据并处理为指定模型格式
        """
        logger.info(f"正在从JSON文件读取数据: {json_file_path}")
        data = self.readJson(json_file_path)
        logger.info(f"读取到 {len(data)} 条记录")
        
        if model_name == "OS_ATLAS":
            self._process_os_atlas_from_json(data)
        elif model_name == "UI_TARS" or model_name == "UI_TARS_1.5":
            self._process_ui_tars_from_json(data, model_name)
        elif model_name == "GUI_OWL":
            self._process_gui_owl_from_json(data)
        else:
            logger.error(f"不支持的模型名称: {model_name}")

    def _gt_action_to_os_atlas(self, gt_action, image_size=None):
        """将gt_action转换为OS_ATLAS格式"""
        action_type = gt_action.get('action', '')
        if action_type == 'click':
            coord = gt_action.get('coordinate', [0, 0])
            if image_size:
                x = int(coord[0] / image_size[0] * 1000)
                y = int(coord[1] / image_size[1] * 1000)
            else:
                x, y = coord[0], coord[1]
            return f"CLICK <point>[[{x}, {y}]]</point>"
        elif action_type == 'system_button':
            button = gt_action.get('button', '')
            if button == 'Back':
                return "PRESS_BACK"
            elif button == 'Home':
                return "PRESS_HOME"
            elif button == 'Enter':
                return "ENTER"
        elif action_type == 'type':
            text = gt_action.get('text', '')
            return f"TYPE [{text}]"
        elif action_type == 'swipe':
            direction = gt_action.get('direction', 'UP')
            return f"SCROLL [{direction.upper()}]"
        elif action_type == 'terminate':
            return "COMPLETE"
        elif action_type == 'wait':
            return "WAIT"
        elif action_type == 'long_press':
            coord = gt_action.get('coordinate', [0, 0])
            if image_size:
                x = int(coord[0] / image_size[0] * 1000)
                y = int(coord[1] / image_size[1] * 1000)
            else:
                x, y = coord[0], coord[1]
            return f"LONG_CLICK <point>[[{x}, {y}]]</point>"
        else:
            logger.warning(f"未知的action类型: {action_type}")
            return "WAIT"

    def _gt_action_to_ui_tars(self, gt_action, image_size=None, model_name="UI_TARS"):
        """将gt_action转换为UI_TARS格式"""
        action_type = gt_action.get('action', '')
        if action_type == 'click':
            coord = gt_action.get('coordinate', [0, 0])
            if "1.5" in model_name:
                x, y = coord[0], coord[1]
            else:
                if image_size:
                    x = int(coord[0] / image_size[0] * 1000)
                    y = int(coord[1] / image_size[1] * 1000)
                else:
                    x, y = coord[0], coord[1]
            return f"click(start_box='({x},{y})')"
        elif action_type == 'system_button':
            button = gt_action.get('button', '')
            if button == 'Back':
                return "press_back()"
            elif button == 'Home':
                return "press_home()"
            elif button == 'Enter':
                return "enter()"
        elif action_type == 'type':
            text = gt_action.get('text', '')
            return f"type(content='{text}')"
        elif action_type == 'swipe':
            direction = gt_action.get('direction', 'down')
            return f"scroll(direction='{direction}')"
        elif action_type == 'terminate':
            return "finished()"
        elif action_type == 'wait':
            return "wait()"
        elif action_type == 'long_press':
            coord = gt_action.get('coordinate', [0, 0])
            if "1.5" in model_name:
                x, y = coord[0], coord[1]
            else:
                if image_size:
                    x = int(coord[0] / image_size[0] * 1000)
                    y = int(coord[1] / image_size[1] * 1000)
                else:
                    x, y = coord[0], coord[1]
            return f"long_press(start_box='({x},{y})', time='')"
        else:
            logger.warning(f"未知的action类型: {action_type}")
            return "wait()"

    def _gt_action_to_gui_owl(self, gt_action):
        """将gt_action转换为GUI_OWL格式"""
        action_type = gt_action.get('action', '')
        if action_type == 'click':
            coord = gt_action.get('coordinate', [0, 0])
            x, y = coord[0], coord[1]
            return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "click", "coordinate": [{x}, {y}]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        elif action_type == 'system_button':
            button = gt_action.get('button', '')
            if button == 'Back':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Back"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif button == 'Home':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Home"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif button == 'Enter':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "system_button", "button": "Enter"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        elif action_type == 'type':
            text = gt_action.get('text', '')
            return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "type", "text": "{text}"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        elif action_type == 'swipe':
            direction = gt_action.get('direction', 'UP')
            # 简化处理，使用固定坐标
            if direction.upper() == 'UP':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [0, 1], "coordinate2": [0, 0]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif direction.upper() == 'DOWN':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [0, 0], "coordinate2": [0, 1]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            elif direction.upper() == 'LEFT':
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [1, 0], "coordinate2": [0, 0]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
            else:  # RIGHT
                return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "swipe", "coordinate": [0, 0], "coordinate2": [1, 0]}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        elif action_type == 'terminate':
            status = gt_action.get('status', 'success')
            return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "terminate", "status": "{status}"}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        elif action_type == 'wait':
            time = gt_action.get('time', 2)
            return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "wait", "time": {time}}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        elif action_type == 'long_press':
            coord = gt_action.get('coordinate', [0, 0])
            time = gt_action.get('time', 2)
            x, y = coord[0], coord[1]
            return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "long_press", "coordinate": [{x}, {y}], "time": {time}}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""
        else:
            logger.warning(f"未知的action类型: {action_type}")
            return f"""<thinking>\n""\n</thinking>\n<tool_call>\n{{"name": "mobile_use", "arguments": {{"action": "wait", "time": 2}}}}\n</tool_call>\n<conclusion>\n""\n</conclusion>"""

    def _process_os_atlas_from_json(self, data):
        """处理JSON数据为OS_ATLAS格式"""
        sample = super().OS_ATLAS()
        processed_data = []
        
        for item in tqdm(data, desc="处理OS_ATLAS数据"):
            from copy import deepcopy
            record = deepcopy(sample)
            
            record["episode_id"] = item.get("episode_id", "")
            record["step_id"] = item.get("step_id", 0)
            record["images"] = item.get("images", [])
            # 保留data_type字段
            if "data_type" in item:
                record["data_type"] = item["data_type"]
            
            # 获取图片尺寸
            image_size = None
            if record["images"]:
                try:
                    image_size = self.readImage(record["images"][0])
                except:
                    pass
            
            # 转换动作
            gt_action = item.get("gt_action", {})
            action_str = self._gt_action_to_os_atlas(gt_action, image_size)
            record['messages'][1]['content'] = action_str
            record['label'] = "action:\n" + action_str
            
            # 构建prompt
            instruction = item.get("instruction", "")
            action_history = item.get("action_history", "")
            
            # 解析action_history为列表
            previous_actions = []
            if action_history:
                # 简单解析，假设格式为 "Step 1: ... ; Step 2: ..."
                steps = action_history.split(';')
                for step in steps:
                    step = step.strip()
                    if step:
                        previous_actions.append(step)
            
            if self.dataset_type == 'low':
                # 对于low类型，需要action_desc，但新数据格式没有，使用instruction的一部分
                prompt = ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FOROSATLAS.replace("{finalGoal}", instruction)
                prompt = prompt.replace("{actionDesc}", instruction)  # 使用instruction作为actionDesc
                prompt = prompt.replace("{previousActions}", str(previous_actions))
            else:
                prompt = ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FOROSATLAS.replace("{finalGoal}", instruction)
                prompt = prompt.replace("{previousActions}", str(previous_actions))
            
            record['messages'][0]['content'] = prompt
            
            processed_data.append(record)
        
        # 保存数据
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        output_file = os.path.join(self.save_path, f"test_{self.model_name.lower()}.json")
        self.saveJson(processed_data, output_file)
        logger.info(f"已保存 {len(processed_data)} 条OS_ATLAS数据到 {output_file}")

    def _process_ui_tars_from_json(self, data, model_name):
        """处理JSON数据为UI_TARS格式"""
        sample = super().UI_TARS()
        processed_data = []
        
        for item in tqdm(data, desc="处理UI_TARS数据"):
            from copy import deepcopy
            record = deepcopy(sample)
            
            record["episode_id"] = item.get("episode_id", "")
            record["step_id"] = item.get("step_id", 0)
            record["images"] = item.get("images", [])
            # 保留data_type字段
            if "data_type" in item:
                record["data_type"] = item["data_type"]
            
            # 获取图片尺寸
            image_size = None
            if record["images"]:
                try:
                    image_size = self.readImage(record["images"][0])
                except:
                    pass
            
            # 转换动作
            gt_action = item.get("gt_action", {})
            action_str = self._gt_action_to_ui_tars(gt_action, image_size, model_name)
            
            instruction = item.get("instruction", "")
            action_history = item.get("action_history", "")
            
            # 构建prompt
            if self.dataset_type == 'low':
                prompt = ANDROIDCONTROLLOWACTIONPREDICTPROMPT_FORUITARAS.replace("{instruction}", instruction)
            else:
                prompt = ANDROIDCONTROLHIGHACTIONPREDICTPROMPT_FORUITARAS.replace("{instruction}", instruction)
            
            record['messages'][1]['content'][0]['text'] = prompt
            
            # 添加图片和动作
            record['messages'].extend([
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "image": record["images"][0] if record["images"] else ""
                        }
                    ]
                },
                {
                    "role": "assistant",
                    "content": [
                        {
                            "type": "text",
                            "text": f"Thought: {instruction}\nAction: {action_str}"
                        }
                    ]
                }
            ])
            
            record['label'] = f"Thought: {instruction}\nAction: {action_str}"
            
            processed_data.append(record)
        
        # 保存数据
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        output_file = os.path.join(self.save_path, f"test_{self.model_name.lower()}.json")
        self.saveJson(processed_data, output_file)
        logger.info(f"已保存 {len(processed_data)} 条UI_TARS数据到 {output_file}")

    def _process_gui_owl_from_json(self, data):
        """处理JSON数据为GUI_OWL格式"""
        build_system_messages, getResizedImage, build_user_messages, sample = super().GUI_OWL()
        processed_data = []
        
        for item in tqdm(data, desc="处理GUI_OWL数据"):
            from copy import deepcopy
            record = deepcopy(sample)
            
            record["episode_id"] = item.get("episode_id", "")
            record["step_id"] = item.get("step_id", 0)
            record["images"] = item.get("images", [])
            # 保留data_type字段
            if "data_type" in item:
                record["data_type"] = item["data_type"]
            
            # 转换动作
            gt_action = item.get("gt_action", {})
            action_str = self._gt_action_to_gui_owl(gt_action)
            record['label'] = action_str
            
            instruction = item.get("instruction", "")
            action_history = item.get("action_history", "")
            
            # 解析action_history
            history = []
            if action_history:
                steps = action_history.split(';')
                for step in steps:
                    step = step.strip()
                    if step:
                        history.append(step)
            
            # 构建messages
            if record["images"]:
                try:
                    dummy_image = getResizedImage(record["images"][0])
                    system_messages = build_system_messages(dummy_image.height, dummy_image.width)
                    
                    if self.dataset_type == 'low':
                        user_messages = build_user_messages(instruction, enable_think=True, history=history)
                    else:
                        user_messages = build_user_messages(instruction, enable_think=True, history=history)
                    
                    user_messages['content'].append({"image": record["images"][0]})
                    messages = [system_messages, user_messages]
                    record['messages'] = message_translate(messages, to_format='qwen')
                except Exception as e:
                    logger.warning(f"处理图片失败: {e}")
                    continue
            
            processed_data.append(record)
        
        # 保存数据
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        output_file = os.path.join(self.save_path, f"test_{self.model_name.lower()}.json")
        self.saveJson(processed_data, output_file)
        logger.info(f"已保存 {len(processed_data)} 条GUI_OWL数据到 {output_file}")
            
    
if __name__ == '__main__':
    args = parse_args()
    logger.info(args)
    if args.dataset_name == 'AndroidControl':
        process = AndroidControlPreProcess(
            args.dataset_type, args.dataset_path, args.dataset_name, args.save_path, args.model_name)
        if args.model_name == "OS_ATLAS":
            process.OS_ATLAS()
        elif args.model_name == "UI_TARS" or args.model_name == "UI_TARS_1.5":
            process.UI_TARS()
        elif args.model_name == 'GUI_R1':
            process.GUI_R1()
        elif args.model_name == 'Agent_CPM':
            process.Agent_CPM()
        elif args.model_name == 'OS_Genesis':
            process.OS_Genesis()
        elif args.model_name == 'Aguvis':
            process.Aguvis()
        elif args.model_name == 'GUI_Odyssey':
            process.GUI_Odyssey()
        elif args.model_name == 'GUI_OWL':
            process.GUI_OWL()
        else:
            logger.info("error processing")
   