{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import random\n",
    "import numpy as np\n",
    "import scipy.stats as stats\n",
    "import matplotlib.pyplot as plt\n",
    "import warnings\n",
    "from matplotlib.ticker import PercentFormatter\n",
    "import matplotlib.ticker as mtick\n",
    "warnings.filterwarnings('ignore')\n",
    "import seaborn as sns\n",
    "import math\n",
    "from textwrap import wrap\n",
    "sns.set_style({'font.family':'serif', 'font.serif':'Times New Roman'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"../dataset_final.csv\", quotechar = '\"')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "sub = pd.read_csv(\"../openreview/openreview.csv\", quotechar='\"')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['categories'] = sub['categories']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def clean_decision(row):\n",
    "    if \"Reject\" == row['decisions'] or \"Invite to Workshop Track\" == row['decisions'] or \"Withdrawn\" == row['decisions']:\n",
    "        return 0\n",
    "    elif \"Poster\" in row['decisions']:\n",
    "        return 1\n",
    "    elif \"Talk\" in row['decisions'] or \"Oral\" in row['decisions']:\n",
    "        return 2\n",
    "    else:\n",
    "        return 3\n",
    "\n",
    "def calculate_average_score(row):\n",
    "    ratings = [float(rating) for rating in row['ratings'].split(\";\")]\n",
    "    return round(sum(ratings) / len(ratings), 1)\n",
    "\n",
    "def clean_categories(row):\n",
    "    if not isinstance(row['categories'], str):\n",
    "        return '12'\n",
    "    return row['categories']\n",
    "\n",
    "data = dict()\n",
    "\n",
    "for year in range(2017, 2021):\n",
    "    data[year] = df[df.year==year].copy()\n",
    "    data[year]['decisions'] = data[year].apply(clean_decision, axis = 1)\n",
    "    data[year]['average_score'] = data[year].apply(calculate_average_score, axis = 1)\n",
    "    data[year]['categories'] = data[year].apply(clean_categories, axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_accept(df, category):\n",
    "    count = 0\n",
    "    for row in df.iterrows():\n",
    "        if not isinstance(row[1]['categories'], str):\n",
    "            continue\n",
    "        if str(category) in row[1]['categories'].split(\";\") and row[1]['decisions'] > 0:\n",
    "            count += 1\n",
    "        elif category == 12 and row[1]['categories'] == '12' and row[1]['decisions'] > 0:\n",
    "            count += 1\n",
    "    return count\n",
    "\n",
    "def get_total(df, category):\n",
    "    count = 0\n",
    "    for row in df.iterrows():\n",
    "        if not isinstance(row[1]['categories'], str):\n",
    "            continue\n",
    "        if str(category) in row[1]['categories'].split(\";\"):\n",
    "            count += 1\n",
    "        elif category == 12 and row[1]['categories'] == '12':\n",
    "            count += 1\n",
    "    return count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accept_rate = dict()\n",
    "\n",
    "for year in range(2017, 2021):\n",
    "    ar = []\n",
    "    for c in range(1, 12):\n",
    "        accepted, total = get_accept(data[year], c), get_total(data[year], c)\n",
    "        if total != 0:\n",
    "            ar.append(accepted / total)\n",
    "        else:\n",
    "            ar.append(0)\n",
    "    accept_rate[year] = ar\n",
    "    accept_rate[year].append(len(data[year][data[year]['decisions'] > 0]) / len(data[year]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "submit = dict()\n",
    "\n",
    "for year in [2020]:\n",
    "    ar = []\n",
    "    for c in range(1, 12):\n",
    "        total = get_total(data[year], c)\n",
    "        print(total)\n",
    "        ar.append(total / len(data[year]))\n",
    "    submit[year] = ar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviewer_x = ['Theory', 'Vision', 'NLP','Adversarial', 'Generative', 'Meta-learning', 'Fairness', 'Generalization', 'Optimization', 'Graph', 'Bayesian', \"All\"]\n",
    "# reproducibility level over year\n",
    "reproducibility_y = [round(rate, 2) for rate in accept_rate[2020]]\n",
    "# Seaborn plot\n",
    "sns.set_context('talk')\n",
    "\n",
    "# create dataframe for plotting\n",
    "plot_df = pd.DataFrame(dict({'x': reviewer_x, 'y': reproducibility_y}))\n",
    "\n",
    "# initiailize figure and axis\n",
    "fig, ax = plt.subplots(figsize=(8, 6))\n",
    "\n",
    "barplot = sns.barplot(x='x', y='y',data=plot_df, ax=ax, color=\"royalblue\")\n",
    "\n",
    "\n",
    "# Axis styling.\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.spines['left'].set_visible(False)\n",
    "ax.spines['bottom'].set_color('#DDDDDD')\n",
    "ax.tick_params(bottom=False, left=False)\n",
    "ax.set_axisbelow(True)\n",
    "ax.yaxis.grid(True, color='#EEEEEE')\n",
    "ax.xaxis.grid(False)\n",
    "ax.set(ylim=(0, 0.5))\n",
    "for item in ax.get_xticklabels():\n",
    "    item.set_rotation(75)\n",
    "for tick in ax.xaxis.get_major_ticks():\n",
    "    tick.label.set_fontsize(18) \n",
    "for tick in ax.yaxis.get_major_ticks():\n",
    "    tick.label.set_fontsize(12)  \n",
    "    \n",
    "for p in ax.patches:\n",
    "             ax.annotate(\"%.2f\" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),\n",
    "                 ha='center', va='center', fontsize=11, color='black', xytext=(0, 7),\n",
    "                 textcoords='offset points')\n",
    "        \n",
    "# Axis labeling\n",
    "#ax.set_xlabel('Acceptance Rate', labelpad=15, fontsize=14)\n",
    "barplot.set(xlabel=None)\n",
    "ax.set_ylabel('Acceptance Rate', labelpad=15, fontsize=23)\n",
    "#ax.set_title('Reproducibility Level with Different Number of Reviewers in 2020', pad=15)\n",
    "\n",
    "fig.tight_layout()\n",
    "\n",
    "fig.savefig(\"topic_breakdown_1.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# reproducibility level over year\n",
    "reproducibility_y = [round(rate, 2) for rate in submit[2020]]\n",
    "\n",
    "reviewer_x = ['Theory', 'Vision', 'NLP','Adversarial', 'Generative', 'Meta-learning', 'Fairness', 'Generalization', 'Optimization', 'Graph', 'Bayesian']\n",
    "\n",
    "# Seaborn plot\n",
    "sns.set_context('talk')\n",
    "\n",
    "# create dataframe for plotting\n",
    "plot_df = pd.DataFrame(dict({'x': reviewer_x, 'y': reproducibility_y}))\n",
    "\n",
    "# initiailize figure and axis\n",
    "fig, ax = plt.subplots(figsize=(8, 6))\n",
    "\n",
    "barplot = sns.barplot(x='x', y='y',data=plot_df, ax=ax, color=\"royalblue\")\n",
    "\n",
    "\n",
    "# Axis styling.\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.spines['left'].set_visible(False)\n",
    "ax.spines['bottom'].set_color('#DDDDDD')\n",
    "ax.tick_params(bottom=False, left=False)\n",
    "ax.set_axisbelow(True)\n",
    "ax.yaxis.grid(True, color='#EEEEEE')\n",
    "ax.xaxis.grid(False)\n",
    "ax.set(ylim=(0, 0.175))\n",
    "for item in ax.get_xticklabels():\n",
    "    item.set_rotation(75)\n",
    "for tick in ax.xaxis.get_major_ticks():\n",
    "    tick.label.set_fontsize(18) \n",
    "for tick in ax.yaxis.get_major_ticks():\n",
    "    tick.label.set_fontsize(12)  \n",
    "    \n",
    "for p in ax.patches:\n",
    "             ax.annotate(\"%.2f\" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),\n",
    "                 ha='center', va='center', fontsize=11, color='black', xytext=(0, 7),\n",
    "                 textcoords='offset points')\n",
    "\n",
    "# Axis labeling\n",
    "#ax.set_xlabel('Acceptance Rate', labelpad=15, fontsize=14)\n",
    "barplot.set(xlabel=None)\n",
    "ax.set_ylabel('Distribution', labelpad=15, fontsize=23)\n",
    "#ax.set_title('Reproducibility Level with Different Number of Reviewers in 2020', pad=15)\n",
    "\n",
    "fig.tight_layout()\n",
    "\n",
    "fig.savefig(\"topic_breakdown_2.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
