import os
import json
import argparse
from tqdm import tqdm

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--annotation-file", type=str, required=True)
    parser.add_argument("--result-file", type=str, required=True)
    parser.add_argument("--yes2no-file", type=str, default="yes2no.jsonl")
    parser.add_argument("--no2yes-file", type=str, default="no2yes.jsonl")
    args = parser.parse_args()

    # load files
    ref_labels = [json.loads(line) for line in open(os.path.expanduser(args.annotation_file), "r")]
    res_labels = [json.loads(line) for line in open(os.path.expanduser(args.result_file), "r")]

    assert len(ref_labels) == len(res_labels), "Files have unequal length"

    yes2no, no2yes = [], []

    for ref, pred in tqdm(zip(ref_labels, res_labels), total=len(ref_labels)):
        assert ref["question_id"] == pred["question_id"], "Mismatched IDs"

        gt  = ref["label"].strip().lower()
        out = pred["pred"].strip().lower()

        if gt == "yes" and ("no" in out or "not" in out):
            yes2no.append(ref)
        elif gt == "no" and "yes" in out:
            no2yes.append(ref)

    # save results
    with open(args.yes2no_file, "w", encoding="utf-8") as f:
        for rec in yes2no:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")

    with open(args.no2yes_file, "w", encoding="utf-8") as f:
        for rec in no2yes:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")

    print(f"Saved {len(yes2no):4d} GT-yes → pred-no samples to {args.yes2no_file}")
    print(f"Saved {len(no2yes):4d} GT-no  → pred-yes samples to {args.no2yes_file}")
