{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [],
   "source": [
    "import csv\n",
    "from pathlib import Path\n",
    "\n",
    "import jsonlines\n",
    "from sklearn.metrics import cohen_kappa_score"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [],
   "source": [
    "# Define data paths\n",
    "winomt_path = Path(\".\") / \"data\" / \"winomt\"\n",
    "human_annotations_path = winomt_path / \"human_annotations\"\n",
    "\n",
    "winomt_ende_annotator1_path = human_annotations_path / \"en-de.annotator1.jsonl\"\n",
    "winomt_ende_annotator2_path = human_annotations_path / \"en-de.annotator2.jsonl\"\n",
    "winomt_enru_annotator1_path = human_annotations_path / \"en-ru.annotator1.jsonl\"\n",
    "winomt_enru_annotator2_path = human_annotations_path / \"en-ru.annotator2.jsonl\"\n",
    "\n",
    "# Original annotations (source: https://github.com/gabrielStanovsky/mt_gender/tree/master/data/human_annotations)\n",
    "winomt_ende_original_path = human_annotations_path / \"en-de.original.csv\"\n",
    "winomt_enru_original_path = human_annotations_path / \"en-ru.original.csv\""
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# Load annotator labels\n",
    "\n",
    "def load_annotator_labels(filepath):\n",
    "  annotator_labels = {}\n",
    "  with jsonlines.open(filepath) as f:\n",
    "    for line in f:\n",
    "      index = int(line[\"Index\"])\n",
    "      label = line[\"label\"][0]\n",
    "      if label == \"Male\":\n",
    "        label = 0\n",
    "      elif label == \"Female\":\n",
    "        label = 1\n",
    "      elif label == \"Both / Neutral / Ambiguous\":\n",
    "        label = 2\n",
    "      else:\n",
    "        continue\n",
    "      annotator_labels[index] = label\n",
    "  return annotator_labels\n",
    "\n",
    "ende_annotator1_labels = load_annotator_labels(winomt_ende_annotator1_path)\n",
    "ende_annotator2_labels = load_annotator_labels(winomt_ende_annotator2_path)\n",
    "enru_annotator1_labels = load_annotator_labels(winomt_enru_annotator1_path)\n",
    "enru_annotator2_labels = load_annotator_labels(winomt_enru_annotator2_path)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [],
   "source": [
    "# Load original labels\n",
    "def load_original_labels(filepath):\n",
    "  original_labels = {}\n",
    "  with open(filepath) as f:\n",
    "    for line in csv.DictReader(f):\n",
    "      index = int(line[\"Index\"])\n",
    "      if line[\"Find entity? [Y/N]\"] != \"Y\":\n",
    "        continue\n",
    "      label = line[\"Gender? [M/F/N]\"]\n",
    "      if label == \"M\":\n",
    "        label = 0\n",
    "      elif label == \"F\":\n",
    "        label = 1\n",
    "      elif label == \"N\":\n",
    "        label = 2\n",
    "      else:\n",
    "        continue\n",
    "      original_labels[index] = label\n",
    "  return original_labels\n",
    "\n",
    "ende_original_labels = load_original_labels(winomt_ende_original_path)\n",
    "enru_original_labels = load_original_labels(winomt_enru_original_path)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EN–DE:\n",
      "Comparing 88 + 88 labels\n",
      "0.27413876758854927\n",
      "Comparing 84 + 84 labels\n",
      "0.29602595296025946\n",
      "EN–RU:\n",
      "Comparing 88 + 88 labels\n",
      "0.5657001850709438\n",
      "Comparing 83 + 83 labels\n",
      "0.08398802843247288\n"
     ]
    }
   ],
   "source": [
    "# Compute inter-annotator agreement\n",
    "def compute_agreement(labels1, labels2):\n",
    "  # Remove samples that were only partially annotated\n",
    "  for key in list(labels1.keys()):\n",
    "      if key not in labels2:\n",
    "          del labels1[key]\n",
    "  for key in list(labels2.keys()):\n",
    "      if key not in labels1:\n",
    "          del labels2[key]\n",
    "  print(f\"Comparing {len(labels1)} + {len(labels2)} labels\")\n",
    "  keys = list(labels1.keys())\n",
    "  kappa = cohen_kappa_score([labels1[key] for key in keys], [labels2[key] for key in keys])\n",
    "  print(kappa)\n",
    "\n",
    "print(\"EN–DE:\")\n",
    "compute_agreement(ende_annotator1_labels, ende_original_labels)\n",
    "compute_agreement(ende_annotator2_labels, ende_original_labels)\n",
    "\n",
    "print(\"EN–RU:\")\n",
    "compute_agreement(enru_annotator1_labels, enru_original_labels)\n",
    "compute_agreement(enru_annotator2_labels, enru_original_labels)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}