import os
import json
import argparse
from tqdm import tqdm

parser = argparse.ArgumentParser()
parser.add_argument("--ref-files", type=str)
parser.add_argument("--res-files", type=str)
args = parser.parse_args()


# open ground truth answers & generated answers
ref_files = [json.loads(q) for q in open(os.path.expanduser(args.ref_files), "r")]
res_files = [json.loads(q) for q in open(os.path.expanduser(args.res_files), "r")]

TP = 0
TN = 0
FP = 0
FN = 0

num_all = len(ref_files)
num_yes = 0

for index, line in enumerate(ref_files):
    idx = line['question_id']
    ref = line['label']
    # ref = line['text']
    assert idx == res_files[index]['question_id']
    res = res_files[index]['text']
    ref = ref.lower().strip()
    res = res.lower().strip()

    if ref == 'yes':
        if 'yes' in res:
            TP += 1
            num_yes += 1
        else:
            FN += 1

    else:
        if 'no' in res:
            TN += 1
        else:
            FP += 1
            num_yes += 1

precision = TP/(TP+FP)
recall = TP/(TP+FN)
f1_score = 2*precision*recall/(precision+recall)
accuracy = (TP+TN)/num_all

# report results
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1_score}')
print(f'Accuracy: {accuracy}')
print(f'Num-Yes: {num_yes}')
