{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a20a89a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir(\"../../\")\n",
    "print(os.getcwd())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34ce518e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import sys\n",
    "from pathlib import Path\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_json_path = \"outputs/estimate_pass_rate/Qwen2.5-VL-3B-Instruct/med-vlm-m23k/eval_results.jsonl\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "302081a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = []\n",
    "with open(results_json_path, \"r\") as f:\n",
    "    for line in f:\n",
    "        results.append(json.loads(line))\n",
    "\n",
    "print(f\"Total results: {len(results)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1675ff1",
   "metadata": {},
   "outputs": [],
   "source": [
    "results[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cbb7856",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_index_list = []\n",
    "for i, result in enumerate(results):\n",
    "    dataset_index_list.append(result[\"dataset_index\"])\n",
    "print(f\"Total datasets: {len(set(dataset_index_list))}\")\n",
    "# find which one is missing from 0 - len(dataset_index_list) - 1\n",
    "missing_datasets = set(range(len(dataset_index_list))) - set(dataset_index_list)\n",
    "print(f\"Missing datasets: {missing_datasets}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24ebc029",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_correct_list = []\n",
    "for result in results:\n",
    "    num_correct = result[\"num_correct\"]\n",
    "    num_correct_list.append(num_correct)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5025891",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get the number of different num_correct values\n",
    "import collections\n",
    "num_correct_counter = collections.Counter(num_correct_list)\n",
    "num_correct_counter = {k: num_correct_counter[k] for k in sorted(num_correct_counter)}\n",
    "print(\"Number of different num_correct values:\", len(num_correct_counter))\n",
    "print(\"Num correct values:\", num_correct_counter)\n",
    "# get the percentage of each num_correct value\n",
    "num_correct_percentage = {k: num_correct_counter[k] / len(results) * 100 for k in sorted(num_correct_counter)}\n",
    "print(\"Num correct percentage:\", num_correct_percentage)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6867276a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot the pie chart\n",
    "fig, ax = plt.subplots()\n",
    "ax.pie(num_correct_counter.values(), labels=num_correct_counter.keys(), autopct='%1.1f%%', startangle=90)\n",
    "ax.set_title(f\"Distribution of num_correct values: {results_json_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44ff3b0f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
