# /mnt/petrelfs/zhengzhijie/mutiAgent4Fraud/utils/dataset_make/export_bad_pairs_jsonl.py
# -*- coding: utf-8 -*-

import os
import sys
import json

# 写死参数（按需修改）
DB_DIR = "/mnt/petrelfs/zhengzhijie/mutiAgent4Fraud/data/simu_db/dataset"
BAD_ID_START = 100
BAD_ID_END = 109
INCLUDE_EVENTS = True
MIN_DEPTH = 3  # 深度阈值，小于该值不导出

# 输出文件命名（单一合并 JSONL）
OUTPUT_DIR = "/mnt/petrelfs/zhengzhijie/mutiAgent4Fraud/data/simu_db/dataset"
DIR_BASENAME = os.path.basename(DB_DIR.rstrip("/"))
OUTPUT_JSONL = os.path.join(
    OUTPUT_DIR, f"combined_{DIR_BASENAME}_bad_{BAD_ID_START}_{BAD_ID_END}_min{MIN_DEPTH}.jsonl"
)

# 便捷导入 build_pair_trajectory（与本文件同目录）
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, THIS_DIR)
from get_history import build_pair_trajectory  # noqa: E402


def list_db_files(db_dir: str):
    if not os.path.isdir(db_dir):
        print(f"[错误] 目录不存在或不可访问: {db_dir}")
        return []
    files = []
    for name in sorted(os.listdir(db_dir)):
        if name.endswith(".db"):
            files.append(os.path.join(db_dir, name))
    return files


def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    db_files = list_db_files(DB_DIR)
    if not db_files:
        print(f"[信息] 在目录中未找到 .db 文件: {DB_DIR}")
        return

    line_id = 1
    exported_total = 0
    scanned_pairs_total = 0

    with open(OUTPUT_JSONL, "w", encoding="utf-8") as fout:
        for db_path in db_files:
            model_name = os.path.basename(db_path)
            exported_this_db = 0
            scanned_pairs_this_db = 0
            print(f"[处理] {model_name}")

            for uid1 in range(BAD_ID_START, BAD_ID_END + 1):
                for uid2 in range(uid1 + 1, BAD_ID_END + 1):
                    scanned_pairs_this_db += 1
                    scanned_pairs_total += 1
                    try:
                        result = build_pair_trajectory(
                            db_path=db_path,
                            uid1=uid1,
                            uid2=uid2,
                            include_events=INCLUDE_EVENTS,
                        )
                    except Exception as e:
                        print(f"[警告] 构建轨迹失败 {model_name} ({uid1},{uid2}): {e}")
                        continue

                    if result is None:
                        continue

                    depth = int(result.get("对话的深度", 0))
                    if depth < MIN_DEPTH:
                        continue

                    obj = {
                        "id": line_id,
                        "模型种类": model_name,
                        "对话的深度": depth,
                        "对话的内容": result["对话的内容"],
                    }
                    fout.write(json.dumps(obj, ensure_ascii=False) + "\n")
                    line_id += 1
                    exported_total += 1
                    exported_this_db += 1

            print(f"[统计] {model_name} 扫描对数: {scanned_pairs_this_db}, 导出(深度≥{MIN_DEPTH}): {exported_this_db}")

    print(f"[完成] 目录: {DB_DIR}")
    print(f"[完成] 总扫描对数: {scanned_pairs_total}, 总导出(深度≥{MIN_DEPTH}): {exported_total}")
    print(f"[完成] 输出文件: {OUTPUT_JSONL}")


if __name__ == "__main__":
    main()