# import os
# import json
# from collections import Counter, defaultdict

# base_dir = "interact/traces/test_easy_single_turn/LongCat-Flash-Chat_"

# # rubric_key -> 总次数
# rubric_counter = Counter()

# # rubric_key -> validate_func -> 次数
# rubric_validate_counter = defaultdict(Counter)

# # 遍历所有 json 文件
# for root, _, files in os.walk(base_dir):
#     for filename in files:
#         if not filename.endswith(".json"):
#             continue

#         file_path = os.path.join(root, filename)

#         try:
#             with open(file_path, "r", encoding="utf-8") as f:
#                 data = json.load(f)
#         except Exception as e:
#             print(f"[跳过] 读取失败 {file_path}: {e}")
#             continue

#         rounds = data.get("rounds", [])
#         if not isinstance(rounds, list):
#             continue

#         for r in rounds:
#             eval_result = r.get("eval_result", {})
#             id_false_results = eval_result.get("id_false_results", [])

#             if not isinstance(id_false_results, list):
#                 continue

#             for item in id_false_results:
#                 rubric_key = item.get("rubric_key")
#                 validate_func = item.get("validate_func")

#                 if not rubric_key:
#                     continue

#                 rubric_counter[rubric_key] += 1

#                 if validate_func:
#                     rubric_validate_counter[rubric_key][validate_func] += 1

# # ===== 打印所有统计结果 =====
# if not rubric_counter:
#     print("没有找到任何 id_false_results")
# else:
#     print("\n========== id_false_results 全量统计 ==========\n")

#     # 按 rubric_key 出现次数从多到少排序
#     for rubric_key, total_cnt in rubric_counter.most_common():
#         print(f"rubric_key: {rubric_key}")
#         print(f"  total_count: {total_cnt}")

#         validate_map = rubric_validate_counter.get(rubric_key, {})
#         if validate_map:
#             print("  validate_func breakdown:")
#             for validate_func, cnt in validate_map.most_common():
#                 print(f"    - {validate_func}: {cnt}")
#         else:
#             print("  validate_func breakdown: None")

#         print("-" * 50)


import os
import json
from collections import Counter, defaultdict

base_dir = "interact/traces/test_easy_single_turn/LongCat-Flash-Chat"

# ===== 原有统计 =====
rubric_counter = Counter()
rubric_validate_counter = defaultdict(Counter)

# ===== 新增统计 =====
total_files = 0

# ① 文件级：是否存在 validate_trip_plan_json ok=true
file_has_validate_trip_plan_ok = 0

# ② 文件级：该文件所有 general_results 是否全部 ok=true
file_all_general_ok = 0

# ③ 条目级：general_results ok 统计
general_total = 0
general_ok_total = 0

# ④ 文件级：id_false_results 是否全部为空
file_id_false_empty = 0


for root, _, files in os.walk(base_dir):
    for filename in files:
        if not filename.endswith(".json"):
            continue

        file_path = os.path.join(root, filename)

        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
        except Exception as e:
            print(f"[跳过] 读取失败 {file_path}: {e}")
            continue

        total_files += 1

        rounds = data.get("rounds", [])
        if not isinstance(rounds, list):
            continue

        # ===== 文件级标志位 =====
        has_validate_trip_plan_ok = False
        all_general_ok = True
        all_id_false_empty = True

        for r in rounds:
            eval_result = r.get("eval_result", {})

            # ---------- general_results ----------
            general_results = eval_result.get("general_results", [])
            if isinstance(general_results, list):
                for g in general_results:
                    ok = g.get("ok")
                    name = g.get("name")

                    general_total += 1
                    if ok is True:
                        general_ok_total += 1
                    else:
                        all_general_ok = False

                    if name == "validate_trip_plan_json" and ok is True:
                        has_validate_trip_plan_ok = True
            else:
                all_general_ok = False

            # ---------- id_false_results ----------
            id_false_results = eval_result.get("id_false_results", [])
            if isinstance(id_false_results, list):
                if len(id_false_results) > 0:
                    all_id_false_empty = False

                # ===== 原有 rubric 统计 =====
                for item in id_false_results:
                    rubric_key = item.get("rubric_key")
                    validate_func = item.get("validate_func")

                    if not rubric_key:
                        continue

                    rubric_counter[rubric_key] += 1
                    if validate_func:
                        rubric_validate_counter[rubric_key][validate_func] += 1
            else:
                all_id_false_empty = False

        # ===== 汇总文件级结果 =====
        if has_validate_trip_plan_ok:
            file_has_validate_trip_plan_ok += 1

        if all_general_ok:
            file_all_general_ok += 1

        if all_id_false_empty:
            file_id_false_empty += 1


# ================= 打印结果 =================
print("\n========== 文件级 & 全局统计 ==========\n")

if total_files == 0:
    print("未找到任何 JSON 文件")
else:
    print(f"总文件数: {total_files}\n")

    print("① 含 validate_trip_plan_json 且 ok=true 的文件比例:")
    print(f"   {file_has_validate_trip_plan_ok} / {total_files} = "
          f"{file_has_validate_trip_plan_ok / total_files:.2%}\n")

    print("② general_results 全部 ok=true 的文件比例:")
    print(f"   {file_all_general_ok} / {total_files} = "
          f"{file_all_general_ok / total_files:.2%}\n")

    if general_total > 0:
        print("③ general_results 中 ok=true 的条目比例:")
        print(f"   {general_ok_total} / {general_total} = "
              f"{general_ok_total / general_total:.2%}\n")
    else:
        print("③ general_results 为空，无法计算比例\n")

    print("④ id_false_results 为空的文件比例:")
    print(f"   {file_id_false_empty} / {total_files} = "
          f"{file_id_false_empty / total_files:.2%}\n")


# ================= 原有 rubric 统计 =================
if not rubric_counter:
    print("没有找到任何 id_false_results")
else:
    print("\n========== id_false_results 全量统计 ==========\n")

    for rubric_key, total_cnt in rubric_counter.most_common():
        print(f"rubric_key: {rubric_key}")
        print(f"  total_count: {total_cnt}")

        validate_map = rubric_validate_counter.get(rubric_key, {})
        if validate_map:
            print("  validate_func breakdown:")
            for validate_func, cnt in validate_map.most_common():
                print(f"    - {validate_func}: {cnt}")
        else:
            print("  validate_func breakdown: None")

        print("-" * 50)
