import json
import nltk
from nltk.translate.bleu_score import sentence_bleu
# nltk.download('punkt')  # Download necessary resources

with open('../open_question.json', 'r') as f:
    data = json.load(f)[0:1000]


predictions = [entry['my_output'] for entry in data]
references = [entry['output'] for entry in data]

total_bleu = 0.0
for pred, ref in zip(predictions, references):
    pred_tokens = pred.lower().split()
    ref_tokens = ref.lower().split()
    bleu = sentence_bleu([ref_tokens], pred_tokens)
    # print(ref_tokens,pred_tokens)
    total_bleu += bleu
    # print(pred_tokens,ref_tokens)
    # print()
    # print(bleu)
    # if bleu==0:
    #     print(pred_tokens,ref_tokens)
    #     print()
average_bleu = total_bleu / len(predictions)

print(f"Total BLEU score: {average_bleu:.4f}")
