import evaluate 
import os 
import numpy as np
if __name__ == '__main__':
    data_dir = '../t2i_steal_0223/DATASETS/coco_subset_preprocessed/'
    caption_paths = os.listdir(data_dir)
    caption_paths = [data_dir + p for p in caption_paths if 'txt' in p]
    caption_paths.sort()
    caption_gt_list = []
    answer_list = []
   

    file1 = open('blip2.txt', 'r')
    Lines = file1.readlines()
    for line in Lines:
        answer = ' '.join(line.split('.')[1:]).strip()
        answer_list.append(answer)
        print("answer:",answer)

    for i,caption_path in enumerate(caption_paths):
        if i >= len(answer_list):
            break
        file = open(caption_path, "r")
        prompt = file.read().strip()
        caption_gt_list.append(prompt)
        
    print("ans:{} gt:{}".format(len(answer_list),len(caption_gt_list)))
    for ans,gt in zip(answer_list,caption_gt_list):
        print("Ans:{} GT:{}".format(ans,gt))
    
    rouge = evaluate.load('rouge')
    rouge_results = rouge.compute(predictions=answer_list, references=caption_gt_list)
    print(rouge_results)
    bleu = evaluate.load("bleu")
    bleu_results = bleu.compute(predictions=answer_list, references=caption_gt_list)
    print(bleu_results)
    bertscore = evaluate.load("bertscore")
    bertscore_results = bertscore.compute(predictions=answer_list, references=caption_gt_list, lang="en")
    print("bertscore:",np.array(bertscore_results['precision']).mean())
