import os
import json

src = " = " = os.path.join(dst_dir, "tools_unique_by_tool_error.json")

# 如果只想处理某个 tool，就填名字；否则 None 表示全部 tool
ONLY_TOOL_NAME = None  # 例如 "get_hotel_detail_with_products"

os.makedirs(dst_dir, exist_ok=True)

def find_tool_items(obj, out):
    """递归找出所有包含 'tool' 字段的 dict"""
    if isinstance(obj, dict):
        if "tool" in obj and isinstance(obj.get("tool"), str):
            out.append(obj)
        for v in obj.values():
            find_tool_items(v, out)
    elif isinstance(obj, list):
        for x in obj:
            find_tool_items(x, out)

def get_error(tool_item: dict):
    """
    兼容两种常见结构：
    1) {"tool":..., "result": {"error": "..."}}
    2) {"tool":..., "error": "..."}
    """
    err = None
    if isinstance(tool_item.get("result"), dict):
        err = tool_item["result"].get("error")
    if err is None:
        err = tool_item.get("error")
    # 统一成字符串/None，避免 dict/list 之类不可哈希
    if err is None:
        return None
    return str(err)

seen = {}   # (tool, error) -> output_item
bad_lines = 0

with open(src, "r", encoding="utf-8") as f:
    for line_no, line in enumerate(f, 1):
        line = line.strip()
        if not line:
            continue
        try:
            rec = json.loads(line)
        except json.JSONDecodeError:
            bad_lines += 1
            continue

        found = []
        find_tool_items(rec, found)

        for item in found:
            tool = item.get("tool")
            if ONLY_TOOL_NAME is not None and tool != ONLY_TOOL_NAME:
                continue

            err = get_error(item)
            key = (tool, err)

            if key not in seen:
                # 保留一条“样例”记录：tool/args/result（你也可以直接 seen[key]=item 保留原样）
                out_item = {
                    "tool": tool,
                    "error": err,
                    "args": item.get("args"),
                    "result": item.get("result"),
                    "count": 1,  # 不想要统计就删掉这行 & 下面的累加
                }
                seen[key] = out_item
            else:
                seen[key]["count"] += 1  # 不想要统计就删掉这行

out = list(seen.values())

with open(dst, "w", encoding="utf-8") as f:
    json.dump(out, f, ensure_ascii=False, indent=2)

print(f"Saved: {dst}")
print(f"Unique (tool,error): {len(out)}")
print(f"Bad json lines skipped: {bad_lines}")
