# gt_file = "/data/linxi/workspace/POPE/output/coco/coco_ground_truth_segmentation.json"
gt_file = "/data/linxi/workspace/POPE/llava_qa/obj/I4_gt_obj.json"
pred_file = "/data/linxi/workspace/LLaVA/llava/backbone/results/I4_obj_dectection.json"

import json
import os
import shutil
with open(gt_file, 'r') as f:
    gt_data = json.load(f)
with open(pred_file, 'r') as f:
    pred_data = json.load(f)

gt_labels_ls = []
pred_labels_ls = []
pred_dict_ls = []
for gt, pred in zip(gt_data, pred_data):
    gt_labels = gt['objects']
    pred_labels = list(set(pred['keep_labels'][0]))
    # {"id": 1, "image": "COCO_val2014_000000144305.jpg", "objects": ["keyboard", "laptop", "dining table"]}
    pred_dict_ls.append({"id": gt['id'], "image": gt['image'], "objects": pred_labels})
    print(gt_labels)
    print(pred_labels)
    all_labels = set(gt_labels + pred_labels)

    # turn labels into one-hot vectors
    gt_labels_ls.extend([1 if label in gt_labels else 0 for label in all_labels])
    pred_labels_ls.extend([1 if label in pred_labels else 0 for label in all_labels])

    # compute accuracy, precision, recall using sklearn evaluation metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score
accuracy = accuracy_score(gt_labels_ls, pred_labels_ls)
precision = precision_score(gt_labels_ls, pred_labels_ls)
recall = recall_score(gt_labels_ls, pred_labels_ls)

print(accuracy, precision, recall)
# I4 all: 0.7822023567655425 0.8785942492012779 0.876993166287016

# save pred_dict_ls to json file
pred_save_file = "/data/linxi/workspace/POPE/llava_qa/obj/I4_pred_obj_detr.json"
with open(pred_save_file, 'w') as f:
    json.dump(pred_dict_ls, f)
print("pred_dict_ls saved to {}".format(pred_save_file))
