import sys, json, numpy as np, pandas as pd
from pathlib import Path

def load(path): return [json.loads(x) for x in open(path)]
def eval_at_tau(D, tau):
    K=[d for d in D if d.get("u_score",1.0) <= tau]
    if not K: return 0.0,0.0,0.0
    def EM(g,p): return float(g.strip()==p.strip())
    def F1(g,p):
        gg=g.lower().split(); pp=p.lower().split()
        if not gg and not pp: return 1.0
        if not gg or not pp: return 0.0
        inter=set(gg)&set(pp)
        prec=len(inter)/len(pp); rec=len(inter)/len(gg)
        return 2*prec*rec/(prec+rec+1e-12)
    em=np.mean([EM(d["gold_answer"],d["pred_answer"]) for d in K])
    f1=np.mean([F1(d["gold_answer"],d["pred_answer"]) for d in K])
    cov=len(K)/len(D)
    return cov,em,f1

def quantile_tau(D, q): 
    U=sorted([d.get("u_score",1.0) for d in D])
    if not U: return 0.0
    i=int(np.floor(q*(len(U)-1))); return float(U[i])

if __name__=="__main__":
    base_json = sys.argv[1] if len(sys.argv)>1 else "data/preds_squad_baseline.jsonl"
    D = load(base_json)

    
    U=sorted([d.get("u_score",1.0) for d in D])
    idx=int(np.floor(0.80*(len(U)-1)))
    tau_cal=float(U[idx])
    cov_c,em_c,f1_c=eval_at_tau(D, tau_cal)

    
    k=max(1,int(0.2*len(D))); C=D[:k]; T=D[k:]
    tau_conf = quantile_tau(C, 1.0-0.20)
    cov_cf,em_cf,f1_cf=eval_at_tau(T, tau_conf)

    pd.DataFrame([
      {"label":"Calibrated-τ (80%)",         "coverage":cov_c,  "EM":em_c,  "F1":f1_c},
      {"label":"Split-Conformal (α=0.20)",   "coverage":cov_cf, "EM":em_cf, "F1":f1_cf},
    ]).to_csv("results/squad_points.csv", index=False)
    print("Wrote results/squad_points.csv with τ_cal=%.4f τ_conf=%.4f" % (tau_cal,tau_conf))
