import os
import json
import re
from collections import defaultdict

def get_max_round_files(directory):
    """
    获取目录下 num1 的所有取值，并找到相同 num1 时 num2 取值最大的文件
    """
    pattern = re.compile(r'_batch_(\d+)_round_(\d+)\.json')
    num1_dict = defaultdict(lambda: {"max_round": -1, "file_path": None})

    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            num1, num2 = int(match.group(1)), int(match.group(2))
            file_path = os.path.join(directory, filename)

            if num2 > num1_dict[num1]["max_round"]:
                num1_dict[num1]["max_round"] = num2
                num1_dict[num1]["file_path"] = file_path

    return sorted(num1_dict.values(), key=lambda x: int(re.search(r'_batch_(\d+)', x["file_path"]).group(1)))

def merge_json_files(file_list, output_file):
    """
    合并 JSON 文件并保存到指定路径，确保中文字符不被转义
    """
    merged_data = []

    for file_info in file_list:
        file_path = file_info["file_path"]
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            # 处理 JSON 文件内容，如果是列表，则扩展，否则直接添加
            if isinstance(data, list):
                merged_data.extend(data)
            else:
                merged_data.append(data)

    # 确保最终存储时不会出现 JSON 嵌套列表
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(merged_data, f, indent=4, ensure_ascii=False)

def process_folders(base_path):
    """
    处理单个 base_path 下的 biology, chemistry, math, physics 文件夹，
    生成单独的 JSON，并最终在该 base_path 下生成 all.json
    """
    # output_final_path = os.path.join(base_path, "output_final")
    # os.makedirs(output_final_path, exist_ok=True)
    output_final_path = base_path
    category_files = []

    for folder in ["math","biology","chemistry","physics"]:
        folder_path = os.path.join(output_final_path, folder)
        if not os.path.exists(folder_path):
            continue

        max_round_files = get_max_round_files(folder_path)
        if not max_round_files:
            continue

        output_file = os.path.join(output_final_path, f"{folder}.json")
        merge_json_files(max_round_files, output_file)
        category_files.append(output_file)

    # 合并当前 base_path 下的所有学科 JSON 生成 all.json
    all_json_path = os.path.join(output_final_path, "all.json")
    merge_json_files([{"file_path": f} for f in category_files], all_json_path)

if __name__ == "__main__":
    base_paths = [
    ]

    for base_path in base_paths:
        process_folders(base_path)
