{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluation Noteboook\n",
    "Notebook to easily evaluate and visualize infernece results for Kaleidoscope"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from eval_utils import compute_accuracy, get_summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_path = \"path_to_json/infernece_output.json\"\n",
    "full_acc = pd.read_json(compute_accuracy(result_path))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can group the results for any column on the dataset (language as example)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "summary = get_summary(full_acc, \"language\")\n",
    "summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy = np.mean(summary['accuracy'])\n",
    "valid_acc = np.mean(summary['valid_acc'])\n",
    "rate = 1 - sum(summary['valid_count'])/sum(summary['total_questions'])\n",
    "print(f\"Accuracy: {accuracy*100}\")\n",
    "print(f\"Valid Accuracy: {valid_acc*100}\")\n",
    "print(f\"Failure Rate: {rate*100}\")\n",
    "print(f\"Total questions:{np.sum(summary['total_questions'])}\")\n",
    "print(f\"Missing questions:{np.sum(summary['none_count'])}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mm-exams",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
