from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
import json

prediction_file = f'./icl_inference_output/commongen/model=EleutherAI/gpt-neo-2.7B_noise=0.0_retriever=random/prediction.json'
label_file = f'./icl_inference_output/commongen/model=EleutherAI/gpt-neo-2.7B_noise=0.0_retriever=random/label.json'
with open(prediction_file, 'r') as f:
    predictions = data_loaded = json.load(f)
with open(label_file, 'r') as f:
    references = data_loaded = json.load(f)



prediction = predictions
reference = references

for idx in range(len(prediction)):
    if len(prediction[idx]) == 0:
        prediction[idx] = ' '
    else:
        pass


scores = round(corpus_bleu(prediction, reference, [0.5, 0.5, 0, 0], smoothing_function=SmoothingFunction().method4), 4)



print(scores)