
import sys, json, csv, pathlib

def norm_row(row):
    def pick(keys, default=""):
        for k in keys:
            if k in row and row[k] not in (None, ""):
                return row[k]
        return default
    
    return {
        "id": pick(["id","qid","question_id"]),
        "gold_answer": pick(["gold_answer","gold","answer_gold","ref","reference"]),
        "pred_answer": pick(["pred_answer","pred","answer_pred","prediction","answer"]),
        "u_score": float(pick(["u_score","uncertainty","U","score_u","risk","uncertainty_score"], 0.0)),
        "retrieved_text": pick(["retrieved_text","context","passages","evidence","ctx"], ""),
        "docs_scored": int(float(pick(["docs_scored","docs","n_docs","k_retrieved"], 0))),
        "rerank_depth": int(float(pick(["rerank_depth","rerank","depth"], 0))),
        "context_tokens": int(float(pick(["context_tokens","tokens","ctx_tokens"], 0))),
        "latency_ms": float(pick(["latency_ms","latency","ms","time_ms"], 0.0)),
    }

def read_csv(p):
    with open(p, newline='') as f:
        for row in csv.DictReader(f):
            yield row

def read_jsonl(p):
    with open(p) as f:
        for line in f:
            yield json.loads(line)

if __name__=="__main__":
    if len(sys.argv)<3:
        print("Usage: convert_to_schema.py INPUT.{csv|jsonl} OUTPUT.jsonl"); sys.exit(1)
    inp, out = sys.argv[1], sys.argv[2]
    ext = pathlib.Path(inp).suffix.lower()
    rows = read_csv(inp) if ext==".csv" else read_jsonl(inp)
    with open(out, "w") as g:
        for r in rows:
            g.write(json.dumps(norm_row(r), ensure_ascii=False) + "\n")
    print("Wrote", out)
