{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19db5aff",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from plotting_utils import get_history\n",
    "\n",
    "\n",
    "# Load the game database generated using skate-tournament/\n",
    "df = pd.read_csv('latest-game-database.csv')\n",
    "\n",
    "# List of players\n",
    "players = []\n",
    "\n",
    "name_mapping = {} # key: player_name, value: name you want to see in plots.\n",
    "colors = {} # key: player_name, value: color you want to see in plots."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9006d3a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate \"question asking skill\" and \"answering skill\" for each task_setter\n",
    "ask_skill = []\n",
    "answer_skill = []\n",
    "\n",
    "task_setters = players\n",
    "\n",
    "for task_setter in task_setters:\n",
    "    # Question asking skill: 1 - average score of all other models (excluding self) on this setter's questions\n",
    "    others = [m for m in task_setters if m != task_setter]\n",
    "    ask_df = df[df['task_setter'] == task_setter]\n",
    "    if len(others) > 0 and not ask_df.empty:\n",
    "        ask_skill.append(1 - ask_df[others].mean().mean())\n",
    "    else:\n",
    "        ask_skill.append(np.nan)\n",
    "        \n",
    "    \n",
    "    # Answering skill: average score of this model on all questions not set by itself\n",
    "    ans_df = df[df['task_setter'] != task_setter]\n",
    "    if not ans_df.empty:\n",
    "        answer_skill.append(ans_df[task_setter].mean())\n",
    "    else:\n",
    "        answer_skill.append(np.nan)\n",
    "\n",
    "# Create scatter plot\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.scatter(ask_skill, answer_skill, s=50)\n",
    "\n",
    "# Annotate each point with the task_setter name\n",
    "for i, task_setter in enumerate(task_setters):\n",
    "    if task_setter == 'claude-3-5-haiku-20241022':\n",
    "        plt.annotate(name_mapping[task_setter], (ask_skill[i], answer_skill[i]), textcoords=\"offset points\", xytext=(0,-15), ha='left', fontsize=15)\n",
    "    elif task_setter == 'claude-3-haiku-20240307':\n",
    "        plt.annotate(name_mapping[task_setter], (ask_skill[i], answer_skill[i]), textcoords=\"offset points\", xytext=(-100,5), ha='left', fontsize=15)\n",
    "    else:\n",
    "        plt.annotate(name_mapping[task_setter], (ask_skill[i], answer_skill[i]), textcoords=\"offset points\", xytext=(0,7), ha='left', fontsize=15)\n",
    "\n",
    "plt.plot(np.linspace(0,1,100), np.linspace(0,1,100), linestyle='--', color='red', alpha=0.5)  # Diagonal line for reference\n",
    "\n",
    "plt.xlabel('Question Asking Skill', fontsize=20)\n",
    "plt.ylabel('Answering Skill', fontsize=20)\n",
    "plt.title('Question Asking vs Answering Skill by Model', fontsize=20)\n",
    "plt.xlim(0, 1)\n",
    "plt.ylim(0, 1)\n",
    "plt.grid(True)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
