#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import json
import sys
from pathlib import Path
from typing import Dict, Any


def iter_json_items(path: Path):
    """
    支持 JSON 数组 / JSON 对象 / JSONL 格式
    """
    text = path.read_text(encoding="utf-8", errors="ignore")

    # 尝试普通 JSON
    try:
        data = json.loads(text)
        if isinstance(data, list):
            for i, obj in enumerate(data):
                yield i, obj
        elif isinstance(data, dict):
            yield 0, data
        else:
            raise ValueError("Unsupported JSON type")
    except Exception:
        # JSONL 格式
        for i, line in enumerate(text.splitlines(), start=1):
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
                yield i, obj
            except Exception:
                continue


def process_file(file_path: Path) -> Dict[str, Any]:
    """
    将 JSON/JSONL 文件中每条记录转换为 {id_or_index: input}
    """
    result = {}
    for idx, obj in iter_json_items(file_path):
        if not isinstance(obj, dict):
            continue
        if "id_or_index" in obj and "input" in obj:
            key = str(obj["id_or_index"])
            result[key] = obj["input"]
    return result


def main():
    parser = argparse.ArgumentParser(
        description="将单个 JSON/JSONL 文件的 input 字段提取为 {id_or_index: input} 格式。"
    )
    parser.add_argument(
        "-i", "--input",
        default="dict_questions.json",
        help="输入 JSON 文件路径"
    )
    parser.add_argument(
        "-o", "--output",
        default="question_inputs.json",
        help="输出文件名（默认 question_inputs.json）"
    )
    args = parser.parse_args()

    inp = Path(args.input)
    if not inp.is_file():
        print(f"[ERROR] 找不到输入文件：{inp}", file=sys.stderr)
        sys.exit(1)

    result = process_file(inp)

    if not result:
        print(f"[WARN] 未提取到任何包含 'id_or_index' 和 'input' 的记录。")

    out_path = Path(args.output)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")

    print(f"✅ 已处理 {len(result)} 条记录，输出至 {out_path}")


if __name__ == "__main__":
    main()
