# scripts/build_neighbor_pairs.py
import argparse
from m4olgen.data import build_neighbor_pairs

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--input", required=True, help="JSON list of molecules")
    ap.add_argument("--out",   required=True, help="Output (jsonl/json)")
    ap.add_argument("--format", default="jsonl", choices=["jsonl", "json"])
    ap.add_argument("--chunk_size", type=int, default=100000)
    ap.add_argument("--procs", type=int, default=4)
    ap.add_argument("--max_neighbors", type=int, default=10)
    ap.add_argument("--min_len", type=int, default=3)
    args = ap.parse_args()

    build_neighbor_pairs(
        input_json=args.input,
        output_path=args.out,
        chunk_size=args.chunk_size,
        processes=args.procs,
        max_neighbors_per_i=args.max_neighbors,
        min_len_fragments=args.min_len,
        output_format=args.format,
    )
