{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import prettytable\n",
    "import random\n",
    "from collections import defaultdict \n",
    "import numpy as np\n",
    "\n",
    "random.seed(53)\n",
    "\n",
    "category_correct_1 = defaultdict(int)\n",
    "category_correct_2 = defaultdict(int)\n",
    "category_correct_3 = defaultdict(int)\n",
    "category_correct_majority = defaultdict(int)\n",
    "category_correct_absolute = defaultdict(int)\n",
    "category_total = defaultdict(int)\n",
    "\n",
    "\n",
    "with open('results/illusionvqa_comprehension_human_annotator_results.json') as f:\n",
    "    dataset = json.load(f)\n",
    "\n",
    "time1 = []\n",
    "time2 = []\n",
    "time3 = []\n",
    "for item in dataset:\n",
    "    time1.append(item['HUMAN_TIME_1'])\n",
    "    time2.append(item['HUMAN_TIME_2'])\n",
    "    time3.append(item['HUMAN_TIME_3'])\n",
    "    \n",
    "    human_answer_1 = item['HUMAN_ANSWER_1']\n",
    "    human_answer_2 = item['HUMAN_ANSWER_2']\n",
    "    human_answer_3 = item['HUMAN_ANSWER_3']\n",
    "    answer = item['answer']\n",
    "    \n",
    "    if human_answer_1 == answer:\n",
    "        category_correct_1[item['category']] += 1\n",
    "    if human_answer_2 == answer:\n",
    "        category_correct_2[item['category']] += 1\n",
    "    if human_answer_3 == answer:\n",
    "        category_correct_3[item['category']] += 1\n",
    "\n",
    "    #majority vote\n",
    "    if human_answer_1 == human_answer_2:\n",
    "        majority_vote = human_answer_1\n",
    "    elif human_answer_1 == human_answer_3:\n",
    "        majority_vote = human_answer_1\n",
    "    elif human_answer_2 == human_answer_3:\n",
    "        majority_vote = human_answer_2\n",
    "    else:\n",
    "        majority_vote = random.choice([human_answer_1, human_answer_2, human_answer_3])\n",
    "        \n",
    "    if majority_vote == answer:\n",
    "        category_correct_majority[item['category']] += 1\n",
    "    \n",
    "    #absolute consensus\n",
    "    if human_answer_1 == human_answer_2 == human_answer_3:\n",
    "        if human_answer_1 == answer:\n",
    "            category_correct_absolute[item['category']] += 1\n",
    "    category_total[item['category']] += 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.871264367816092 0.8666666666666667 0.9287356321839081 0.9103448275862069 0.7839080459770115\n"
     ]
    }
   ],
   "source": [
    "total_correct_1 = 0\n",
    "total_correct_2 = 0\n",
    "total_correct_3 = 0\n",
    "total_correct_majority = 0\n",
    "total_correct_absolute = 0\n",
    "total = 0\n",
    "\n",
    "for category in category_total:\n",
    "    total_correct_1 += category_correct_1[category]\n",
    "    total_correct_2 += category_correct_2[category]\n",
    "    total_correct_3 += category_correct_3[category]\n",
    "    total_correct_majority += category_correct_majority[category]\n",
    "    total_correct_absolute += category_correct_absolute[category]\n",
    "    total += category_total[category]\n",
    "\n",
    "\n",
    "\n",
    "print(total_correct_1/total, total_correct_2/total, total_correct_3/total, total_correct_majority/total, total_correct_absolute/total)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(16.461253224296133,\n",
       " 12.945708226061415,\n",
       " 15.569797342125026,\n",
       " 14.992252930827526)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(time1), np.mean(time2), np.mean(time3), np.mean([np.mean(time1), np.mean(time2), np.mean(time3)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "table = prettytable.PrettyTable()\n",
    "#column names\n",
    "table.field_names = [\"Category\", \"Total\", \"Accuracy Majority\"]\n",
    "for category, total in category_total.items():\n",
    "    table.add_row([category, category_total[category], category_correct_majority[category]/category_total[category]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------------+-------+--------------------+\n",
      "|         Category        | Total | Accuracy Majority  |\n",
      "+-------------------------+-------+--------------------+\n",
      "|    impossible object    |  134  | 0.9850746268656716 |\n",
      "|        real-scene       |   64  |      0.984375      |\n",
      "|           size          |   46  | 0.6304347826086957 |\n",
      "|          hidden         |   45  |        1.0         |\n",
      "|     deceptive design    |   37  | 0.9459459459459459 |\n",
      "|      angle illusion     |   26  | 0.8461538461538461 |\n",
      "|          color          |   23  | 0.6086956521739131 |\n",
      "|       edited-scene      |   21  |        1.0         |\n",
      "|         counting        |   11  |        1.0         |\n",
      "|       upside-down       |   7   |        1.0         |\n",
      "| positive-negative space |   7   |        1.0         |\n",
      "|      circle-spiral      |   6   | 0.6666666666666666 |\n",
      "|    repeating pattern    |   2   |        1.0         |\n",
      "|       perspective       |   2   |        1.0         |\n",
      "|        occlusion        |   2   |        0.5         |\n",
      "|     angle constancy     |   2   |        0.5         |\n",
      "+-------------------------+-------+--------------------+\n"
     ]
    }
   ],
   "source": [
    "#sort by total\n",
    "table.sortby = \"Total\"\n",
    "table.reversesort = True\n",
    "print(table)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8947368421052632\n"
     ]
    }
   ],
   "source": [
    "#misc\n",
    "misc_correct = category_correct_majority[\"counting\"]+category_correct_majority[\"repeating pattern\"]+category_correct_majority[\"perspective\"]+category_correct_majority[\"occlusion\"]+category_correct_majority[\"angle constancy\"]\n",
    "misc_total = category_total[\"counting\"]+category_total[\"repeating pattern\"]+category_total[\"perspective\"]+category_total[\"occlusion\"]+category_total[\"angle constancy\"]\n",
    "\n",
    "print(misc_correct/misc_total)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating agreement between human answers\n",
      "human 1 and human 2:  0.8574712643678161\n",
      "human 1 and human 3:  0.8482758620689655\n",
      "human 2 and human 3:  0.8298850574712644\n",
      "all:   0.7862068965517242\n",
      "none:  0.0367816091954023\n"
     ]
    }
   ],
   "source": [
    "h1_h2 = 0\n",
    "h1_h3 = 0\n",
    "h2_h3 = 0\n",
    "h1_h2_h3 = 0\n",
    "none = 0\n",
    "\n",
    "\n",
    "for item in dataset:\n",
    "    human_answer_1 = item['HUMAN_ANSWER_1']\n",
    "    human_answer_2 = item['HUMAN_ANSWER_2']\n",
    "    human_answer_3 = item['HUMAN_ANSWER_3']\n",
    "    answer = item['answer']\n",
    "    \n",
    "    if human_answer_1 == human_answer_2:\n",
    "        h1_h2 += 1\n",
    "\n",
    "    if human_answer_1 == human_answer_3:\n",
    "        h1_h3 += 1\n",
    "\n",
    "    if human_answer_2 == human_answer_3:\n",
    "        h2_h3 += 1\n",
    "\n",
    "    if human_answer_1 == human_answer_2 == human_answer_3:\n",
    "        h1_h2_h3 += 1\n",
    "\n",
    "    if human_answer_1 != human_answer_2 and human_answer_1 != human_answer_3 and human_answer_2 != human_answer_3:\n",
    "        none += 1\n",
    "\n",
    "print(\"evaluating agreement between human answers\")\n",
    "print(\"human 1 and human 2: \", h1_h2/len(dataset))\n",
    "print(\"human 1 and human 3: \", h1_h3/len(dataset))\n",
    "print(\"human 2 and human 3: \", h2_h3/len(dataset))\n",
    "print(\"all:  \", h1_h2_h3/len(dataset))\n",
    "print(\"none: \", none/len(dataset))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, False], [False, False, False], [False, False, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, True, True], [True, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [True, True, False], [False, False, True], [False, False, False], [False, False, True], [False, False, False], [False, False, False], [True, True, False], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, False], [False, False, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [True, False, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, False, False], [False, False, False], [False, True, False], [False, False, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [False, False, False], [True, True, False], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [True, False, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, True], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [True, False, True], [True, False, False], [True, False, False], [False, True, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, True, True], [True, False, False], [False, False, False], [False, False, False], [False, True, True], [False, False, False], [True, False, False], [False, False, True], [False, False, False], [True, True, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, True, False], [True, False, True], [False, False, False], [False, True, False], [False, False, False], [True, False, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, False, False], [False, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, True, False], [True, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, False], [False, False, True], [False, False, True], [False, False, False], [False, True, False], [True, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [False, True, True], [False, False, False], [True, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, False, False], [False, False, False], [False, True, True], [False, False, False], [False, False, False], [True, True, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, True, False], [False, False, True], [True, False, False], [False, False, False], [True, False, False], [False, False, True], [False, True, False], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, False], [False, False, True], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [True, False, False], [False, True, False], [False, False, True], [False, False, False], [True, False, False], [False, True, False], [False, False, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, True, False], [False, False, True], [True, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, False], [False, False, False], [False, False, False], [False, False, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, False, False], [False, False, True], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [False, False, False], [True, True, False], [True, False, False], [False, False, True], [False, False, False], [False, True, False], [True, False, False], [False, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, False, True], [False, False, False], [False, True, False], [False, False, False], [True, False, True], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, True, True], [True, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, False, True], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, True, True], [True, False, False], [False, False, False], [False, True, True], [False, False, False], [False, False, False], [True, False, False], [True, False, False], [False, False, False], [False, True, True], [False, False, False], [True, True, False], [False, False, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, True, False], [False, False, True], [True, False, False], [False, False, False], [False, True, False], [True, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, False, False], [False, False, False], [False, True, True], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, False], [False, False, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [False, True, False], [True, False, False], [False, False, False], [False, False, False], [True, False, False], [False, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, True], [True, True, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, True], [False, False, False], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, True, False], [True, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, False], [False, False, True], [False, False, False], [True, True, False], [False, False, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, True], [False, False, False], [True, True, False], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, False, False], [False, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, True, False], [True, False, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, True, True], [False, False, False], [True, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, False, False], [False, False, True], [False, True, False], [True, False, True], [False, False, False], [False, True, False], [True, False, False], [False, True, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, True, False], [False, False, False], [True, False, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, True], [False, False, False], [True, True, False], [False, False, False], [False, True, True], [True, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, True], [True, True, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, True, False], [False, False, False], [False, False, False], [True, False, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, True], [False, True, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, False, False], [False, False, False], [False, True, True], [False, False, False], [False, False, True], [True, True, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, False], [False, False, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, True, False], [False, False, False], [True, False, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, False, False], [False, False, True], [False, True, False], [False, True, False], [True, False, False], [False, False, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, False, False], [False, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, False, False], [False, False, False], [False, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False], [False, False, False], [True, True, True], [False, False, False], [False, False, False]]\n"
     ]
    }
   ],
   "source": [
    "binary_qa = []\n",
    "\n",
    "for item in dataset:\n",
    "    human_answer_1 = item['HUMAN_ANSWER_1']\n",
    "    human_answer_2 = item['HUMAN_ANSWER_2']\n",
    "    human_answer_3 = item['HUMAN_ANSWER_3']\n",
    "    # answer = item['answer']\n",
    "    \n",
    "    human_answers = []\n",
    "    for option in item[\"options\"]:\n",
    "        human_answers.append([human_answer_1==option, human_answer_2==option, human_answer_3==option])\n",
    "    binary_qa+=human_answers\n",
    "\n",
    "print(binary_qa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "binary_qa = np.array(binary_qa)\n",
    "\n",
    "#covert True/False to 1/0\n",
    "binary_qa = binary_qa.astype(int)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.808130982582557 0.7958513654678406 0.7726538123049844\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import cohen_kappa_score\n",
    "\n",
    "evaluator1_answers = binary_qa[:,0]\n",
    "evaluator2_answers = binary_qa[:,1]\n",
    "evaluator3_answers = binary_qa[:,2]\n",
    "\n",
    "kappa_1_2 = cohen_kappa_score(evaluator1_answers, evaluator2_answers)\n",
    "kappa_1_3 = cohen_kappa_score(evaluator1_answers, evaluator3_answers)\n",
    "kappa_2_3 = cohen_kappa_score(evaluator2_answers, evaluator3_answers)\n",
    "\n",
    "print(kappa_1_2, kappa_1_3, kappa_2_3)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
