import json
from collections import Counter, defaultdict
from typing import Dict, List

CRITERIA = ["clarity","novelty","methodology","reproducibility","ethics"]
def mean(xs): 
    return sum(xs)/len(xs) if xs else 0.0
def cohen_kappa(labels1, labels2):
    classes = sorted(set(labels1) | set(labels2))
    n = len(labels1)
    po = sum(1 for a,b in zip(labels1,labels2) if a==b)/n if n else 0.0
    p1 = Counter(labels1); p2 = Counter(labels2)
    pe = sum((p1[c]/n)*(p2[c]/n) for c in classes) if n else 0.0
    return (po - pe) / (1 - pe) if (1 - pe) != 0 else 0.0
def kendall_tau(a, b):
    n = len(a); C=D=0
    for i in range(n):
        for j in range(i+1,n):
            s = (a[i]-a[j])*(b[i]-b[j])
            if s>0: C+=1
            elif s<0: D+=1
    denom = n*(n-1)/2
    return (C - D)/denom if denom>0 else 0.0
def aggregate_reviews(reviews, weights):
    k_means = {k: mean([rv["scores"][k] for rv in reviews]) for k in CRITERIA} if reviews else {k:0.0 for k in CRITERIA}
    Q = sum(weights[k]*k_means[k] for k in CRITERIA)
    return k_means, Q
def build_metrics(artifacts_dir: str, weights):
    import os
    mans = [json.loads(l) for l in open(f"{artifacts_dir}/manuscripts.jsonl",encoding="utf-8")] if os.path.exists(f"{artifacts_dir}/manuscripts.jsonl") else []
    revs = [json.loads(l) for l in open(f"{artifacts_dir}/reviews.jsonl",encoding="utf-8")] if os.path.exists(f"{artifacts_dir}/reviews.jsonl") else []
    decs = [json.loads(l) for l in open(f"{artifacts_dir}/decisions.jsonl",encoding="utf-8")] if os.path.exists(f"{artifacts_dir}/decisions.jsonl") else []
    rev_by_tr = defaultdict(list)
    for r in revs:
        rev_by_tr[(r["topic_id"], r["round"])].append(r)
    topics = sorted(set(m["topic_id"] for m in mans))
    rounds = sorted(set(m["round"] for m in mans))
    topic_metrics = {}
    for t in topics:
        q_by_round = {}
        for r in rounds:
            rvs = rev_by_tr.get((t,r), [])
            kmeans, Q = aggregate_reviews(rvs, weights)
            q_by_round[r] = Q
        topic_metrics[t] = q_by_round
    last_r = max(rounds) if rounds else 0
    accept_rate = mean([1.0 if any(d["topic_id"]==t and d["round"]==last_r and d["decision"]=="Accept" for d in decs) else 0.0 for t in topics]) if topics else 0.0
    kappas=[]; taus=[]
    for (t,r), rvs in rev_by_tr.items():
        for i in range(len(rvs)):
            for j in range(i+1,len(rvs)):
                kappas.append(cohen_kappa([rvs[i]["recommendation"]],[rvs[j]["recommendation"]]))
                ai=[rvs[i]["scores"][k] for k in CRITERIA]; aj=[rvs[j]["scores"][k] for k in CRITERIA]
                taus.append(kendall_tau(ai,aj))
    kappa = mean(kappas) if kappas else 0.0
    tau = mean(taus) if taus else 0.0
    return {"topics": topics, "rounds": rounds, "Q_by_topic": topic_metrics, "accept_rate_last": accept_rate, "kappa": kappa, "tau": tau}
