{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TP\tFP\tTN\tFN\t\n",
      "1384\t278\t1222\t116\n",
      "Accuracy: 0.8686666666666667\n",
      "Precision: 0.8327316486161251\n",
      "Recall: 0.9226666666666666\n",
      "F1 score: 0.8753953194180898\n",
      "Yes ratio: 0.554\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "label_file = ''\n",
    "MSCOCO = True\n",
    "if MSCOCO:\n",
    "    with open(label_file, \"r\") as f:\n",
    "        label_list = [json.loads(line)['label'] for line in f]\n",
    "else:\n",
    "    with open(label_file, \"r\") as f:\n",
    "        data = json.load(f)  # This will be a list of dictionaries\n",
    "        label_list = [entry['label'] for entry in data]\n",
    "    \n",
    "ans_file = ''\n",
    "\n",
    "with open(ans_file, 'r') as f:\n",
    "    answers = json.load(f)  # Parse the JSON file\n",
    "\n",
    "for answer in answers:\n",
    "    text = answer['response']\n",
    "\n",
    "    # Only keep the first sentence\n",
    "    if text.find('.') != -1:\n",
    "        text = text.split('.')[0]\n",
    "\n",
    "    text = text.replace(',', '')\n",
    "    words = text.split(' ')\n",
    "    if 'No' in words or 'not' in words or 'no' in words:\n",
    "        answer['answer'] = 'no'\n",
    "    else:\n",
    "        answer['answer'] = 'yes'\n",
    "\n",
    "for i in range(len(label_list)):\n",
    "    if label_list[i] == 'no':\n",
    "        label_list[i] = 0\n",
    "    else:\n",
    "        label_list[i] = 1\n",
    "\n",
    "pred_list = []\n",
    "for answer in answers:\n",
    "    if answer['answer'] == 'no':\n",
    "        pred_list.append(0)\n",
    "    else:\n",
    "        pred_list.append(1)\n",
    "\n",
    "pos = 1\n",
    "neg = 0\n",
    "yes_ratio = pred_list.count(1) / len(pred_list)\n",
    "\n",
    "TP, TN, FP, FN = 0, 0, 0, 0\n",
    "for pred, label in zip(pred_list, label_list):\n",
    "    if pred == pos and label == pos:\n",
    "        TP += 1\n",
    "    elif pred == pos and label == neg:\n",
    "        FP += 1\n",
    "    elif pred == neg and label == neg:\n",
    "        TN += 1\n",
    "    elif pred == neg and label == pos:\n",
    "        FN += 1\n",
    "\n",
    "print('TP\\tFP\\tTN\\tFN\\t')\n",
    "print('{}\\t{}\\t{}\\t{}'.format(TP, FP, TN, FN))\n",
    "\n",
    "precision = float(TP) / float(TP + FP)\n",
    "recall = float(TP) / float(TP + FN)\n",
    "f1 = 2*precision*recall / (precision + recall)\n",
    "acc = (TP + TN) / (TP + TN + FP + FN)\n",
    "print('Accuracy: {}'.format(acc))\n",
    "print('Precision: {}'.format(precision))\n",
    "print('Recall: {}'.format(recall))\n",
    "print('F1 score: {}'.format(f1))\n",
    "print('Yes ratio: {}'.format(yes_ratio))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llava",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
