#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
运行预处理脚本，从JSON文件生成OS-atlas, gui-owl, ui-tars三种格式的数据
"""
import os
import sys
sys.path.append(os.path.dirname(__file__))

from preprocess_ac import AndroidControlPreProcess
from preprocess_aitz import AITZPreProcess
from utils.logging_utils import setup_logger_to_stdout

logger = setup_logger_to_stdout()

def main():
    # 配置
    json_file = "/home/chengpengzhou/hhw/rft_data/baseline/test.json"
    save_path = "/home/chengpengzhou/hhw/rft_data/baseline/data"
    dataset_type = "low"  # 或 "high"
    
    # 确保保存目录存在
    os.makedirs(save_path, exist_ok=True)
    
    # 要生成的模型格式
    models = ["UI_TARS"]  # GUI_OWL需要额外依赖，放在最后
    
    # 处理AndroidControl数据
    logger.info("=" * 50)
    logger.info("处理AndroidControl数据")
    logger.info("=" * 50)
    
    for model_name in models:
        logger.info(f"\n正在处理 {model_name} 格式...")
        try:
            process = AndroidControlPreProcess(
                dataset_type=dataset_type,
                dataset_path="",  # 不需要，因为从JSON读取
                dataset_name="AndroidControl",
                save_path=save_path,
                model_name=model_name
            )
            process.process_from_json(json_file, model_name)
            logger.info(f"✓ {model_name} 处理完成")
        except Exception as e:
            logger.error(f"✗ {model_name} 处理失败: {e}")
            import traceback
            traceback.print_exc()
    
    # 处理AITZ数据（如果需要）
    logger.info("\n" + "=" * 50)
    logger.info("处理AITZ数据")
    logger.info("=" * 50)
    
    for model_name in models:
        logger.info(f"\n正在处理 {model_name} 格式...")
        try:
            process = AITZPreProcess(
                dataset_type="all_low",  # AITZ使用all_low
                dataset_path="",  # 不需要，因为从JSON读取
                dataset_name="AITZ",
                save_path=save_path,
                model_name=model_name
            )
            process.process_from_json(json_file, model_name)
            logger.info(f"✓ {model_name} 处理完成")
        except Exception as e:
            logger.error(f"✗ {model_name} 处理失败: {e}")
            import traceback
            traceback.print_exc()
    
    logger.info("\n" + "=" * 50)
    logger.info("所有处理完成！")
    logger.info("=" * 50)

if __name__ == "__main__":
    main()
