import os
import json
from collections import defaultdict
import os
import json
import argparse
from subprocess import Popen
import subprocess
from threading import Timer
import shutil
import time


def read_json_file(file_path):
    """读取 JSON 文件并返回内容"""
    with open(file_path, 'r', encoding='utf-8') as f:
        # print(f"Reading file: {file_path}")
        return json.load(f)

def count_total(root_dir):
    tasks_count = {}
    for tid in range(812):
        dir = 'config_files/' + f"{tid}" + '.json'
        with open(dir, 'r', encoding='utf-8') as f:
            data = json.load(f)
        if len(data['sites']) == 1: 
            task = data['sites'][0]
            if "How much refund I should expect from my order canlled in 2022, including shipping fee" in data["intent"]:
                print(tid)
            if task in tasks_count:
                tasks_count[task] += 1
            else:
                tasks_count[task] = 1

            # if task == 'shopping_admin':
            #     result_dir = f'{root_dir}/webarena.{tid}/gpt-4o_autoeval.json'
            #     if not os.path.exists(result_dir):
            #         print(tid)

    print(tasks_count)

def main(root_dir):
    # 用于统计的字典
    stats = defaultdict(lambda: {'true': 0, 'false': 0})
    print(f"Total dir:{len(os.listdir(root_dir))}")

    # 遍历所有文件夹
    for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        if os.path.isdir(folder_path) and "webarena." in folder_name and "Generic" not in folder_name:
            autoeval_json_path = os.path.join(folder_path, 'gpt-4o_autoeval.json')
            if os.path.exists(autoeval_json_path):
                data = read_json_file(autoeval_json_path)

                if isinstance(data, list):
                    rm_value = data[0]['rm']
                    # 解析文件夹名称获取编号
                    folder_number = int(folder_name.split('.')[1])
                    config_json_path = os.path.join('config_files', f'{folder_number}.json')
                    # print(config_json_path)
                    if os.path.exists(config_json_path):
                        config_data = read_json_file(config_json_path)
                        if(len(config_data['sites']) == 1):
                            site_type = config_data['sites'][0]  # 获取 sites[0] 的值
                            
                            # 统计 rm 的值
                            if rm_value is not None:
                                if rm_value:
                                    stats[site_type]['true'] += 1
                                else:
                                    stats[site_type]['false'] += 1
                            else:
                                print(f"Here is not value:{folder_name}")
                else:
                    print(f"Here is not list:{folder_name}")
                    if os.path.exists(f'{root_dir}/{folder_name}'):
                        print(f'{root_dir}/{folder_name}')
                        for attempt in range(3):
                            try:
                                shutil.rmtree(f'{root_dir}/{folder_name}')
                            except Exception as e:
                                time.sleep(1)
        
    # 输出统计结果
    for site_type, counts in stats.items():
        total = counts['true'] + counts['false']
        print(f"{site_type}:{counts['true']}/{total},{counts['true'] / total}")
        # print(f"Type: {site_type}, True: {counts['true']}, Toal: {counts['false']}")

def find(root_dir1, root_dir2, site = 'reddit'):
    for tid in range(812):
        dir = 'config_files/' + f"{tid}" + '.json'
        with open(dir, 'r', encoding='utf-8') as f:
            data = json.load(f)
        if len(data['sites']) == 1: 
            task = data['sites'][0]
            if task == site:
                print(tid)
                return
                result_dir1 = f'{root_dir1}/webarena.{tid}/gpt-4o_autoeval.json'
                result_dir2 = f'{root_dir2}/webarena.{tid}/gpt-4o_autoeval.json'
                if os.path.exists(result_dir1) and os.path.exists(result_dir2):
                    data1 = read_json_file(result_dir1)
                    data2 = read_json_file(result_dir2)
                    if isinstance(data1, list) and isinstance(data2, list):
                        rm_value1 = data1[0]['rm']
                        rm_value2 = data2[0]['rm']
                        if not rm_value1 and rm_value2:
                            print(tid)

def mv(root_dir1, root_dir2, site = 'map'):
    for tid in range(812):
        dir = 'config_files/' + f"{tid}" + '.json'
        with open(dir, 'r', encoding='utf-8') as f:
            data = json.load(f)
        if len(data['sites']) == 1: 
            task = data['sites'][0]
            if task == site:
                if os.path.exists(f"{root_dir1}/webarena.{tid}"):
                    print(f"Moving {tid}")
                    shutil.move(f"{root_dir1}/webarena.{tid}", f"{root_dir2}/webarena.{tid}")

def pure(path):
    json_data = json.load(open(path))
    # 去重和保留第一个出现的 task_id
    unique_data = []
    seen_ids = set()

    for item in json_data:
        task_id = int(item['task_id'])
        if task_id not in seen_ids:
            unique_data.append(item)
            seen_ids.add(task_id)

    # 按照 task_id 从大到小排序
    sorted_unique_data = sorted(unique_data, key=lambda x: int(x['task_id']), reverse=False)

    # 打印最终结果
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(sorted_unique_data, f, indent=4)

def clean(dir):
    # 定义要保留的文件名称或文件扩展名
    files_to_keep = {'experiment.log', 'gpt-4o_autoeval.json', 'summary_info.json'}

    # 遍历根路径下的所有文件夹
    for foldername in os.listdir(dir):
        folder_path = os.path.join(dir, foldername)
        
        # 继续处理，如果它是一个目录
        if os.path.isdir(folder_path):
            print(f'Processing directory: {folder_path}')
            
            # 遍历子文件夹中的所有文件
            for item in os.listdir(folder_path):
                item_path = os.path.join(folder_path, item)

                # 如果当前项目是文件并且不是要保留的文件，则删除该文件
                if os.path.isfile(item_path) and item not in files_to_keep:
                    print(f'Removing file: {item_path}')
                    os.remove(item_path)

            # 你也可以选择删除空的子文件夹，如果需要的话
            # if not os.listdir(folder_path):
            #     print(f'Removing empty directory: {folder_path}')
            #     os.rmdir(folder_path)

    print("Cleanup completed.")

if __name__ == "__main__":
    root_dir = 'results/Qwen3-32B'
    # root_dir = 'results/Qwen3-32B/nobank'
    # root_dir = 'results/Qwen3-32B/insights/irrelevant'
    # root_dir = 'results/Qwen3-32B/insights/filler_tokens'
    count_total(root_dir)
    main(root_dir)

    # root_dir1 = 'results'
    # root_dir2 = 'results/insights/wo'
    # find(root_dir1, root_dir2, site='map')
    # pure("bank/reddit_.json")
    # clean("../awm/results/insights/wo")

    # json_data = json.load(open("bank/reddit_.json"))
    # for item in json_data:
    #     task_id = int(item['task_id'])
    #     dir = 'config_files/' + f"{task_id}" + '.json'
    #     with open(dir, 'r', encoding='utf-8') as f:
    #         data = json.load(f)
    #     task = data['sites']
    #     if not task == ['reddit']:
    #         print(task_id)