#!/usr/bin/env python3
import argparse, json
from pathlib import Path

def load_map(p):
    rows = [json.loads(x) for x in open(p, "r", encoding="utf-8")]
    return {r["text_id"]: r for r in rows}

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--c1", required=True, help="preds_c.jsonl from classifier 1")
    ap.add_argument("--c2", required=True, help="preds_c2.jsonl from classifier 2")
    ap.add_argument("--w1", type=float, default=0.5, help="weight for c1 prob")
    ap.add_argument("--w2", type=float, default=0.5, help="weight for c2 prob")
    ap.add_argument("--thresh", type=float, default=0.50, help="decision threshold")
    ap.add_argument("--out", default="outputs/preds_c_blend.jsonl")
    args = ap.parse_args()

    m1 = load_map(args.c1)
    m2 = load_map(args.c2)
    ids = sorted(set(m1) & set(m2))
    if not ids:
        raise SystemExit("No overlapping text_id between C1 and C2.")

    Path("outputs").mkdir(exist_ok=True)
    with open(args.out, "w", encoding="utf-8") as f:
        for tid in ids:
            r1, r2 = m1[tid], m2[tid]
            p1 = float(r1["p_flagged"])
            p2 = float(r2["p_flagged"])
            pw = args.w1*p1 + args.w2*p2
            pred = "FLAGGED" if pw >= args.thresh else "NOT FLAGGED"
            f.write(json.dumps({
                "text_id": tid,
                "dataset": r1.get("dataset", r2.get("dataset","")),
                "true": r1.get("true", r2.get("true")),
                "pred": pred,
                "p_flagged": float(pw),
                "p_flagged_c1": float(p1),
                "p_flagged_c2": float(p2),
                "w1": args.w1, "w2": args.w2
            })+"\n")
    print(f"[DONE] wrote {args.out} (rows={len(ids)})")

if __name__ == "__main__":
    main()
