{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "241a4abf",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import glob\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "plt.style.use(\"bmh\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e0bacd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_dir = \"../results/meta-llama__Llama-3.2-3B-Instruct\"\n",
    "df_path = f\"{df_dir}/openai__openai_humaneval_proposed_{5}_info.csv\"\n",
    "res = pd.read_csv(df_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "629ad211",
   "metadata": {},
   "outputs": [],
   "source": [
    "res[\"total_branches\"].plot.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be3d09a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "res[\"mean_branching_factor\"].plot.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52a52290",
   "metadata": {},
   "outputs": [],
   "source": [
    "for col in sorted(res):\n",
    "    if col.startswith(\"beam\"):\n",
    "        plt.bar(col, res[col], color=\"grey\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60fa49cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "res[\"total_branches\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4140eb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_paths = glob.glob(\"../results/meta-llama__Llama-3.2-3B-Instruct copy/*proposed_info.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30aa6111",
   "metadata": {},
   "outputs": [],
   "source": [
    "comparison_df = pd.DataFrame()\n",
    "for dataset_path in dataset_paths:\n",
    "    res = pd.read_csv(dataset_path)\n",
    "    dataset_name = dataset_path.split(\"__\")[-1].split(\"_proposed\")[0]\n",
    "    if \"openai_\" in dataset_name:\n",
    "        dataset_name = dataset_name.replace(\"openai_\", \"\")\n",
    "        \n",
    "    comparison_df[dataset_name] = res[\"total_branches\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e1f9c65",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(6, 4))\n",
    "sns.boxenplot(data=comparison_df[[\"gsm8k\", \"MATH-500\", \"humaneval\", \"scibench\"]], color=\"darkgray\")\n",
    "plt.xlabel(\"Task difficulty\\n⟶\")\n",
    "plt.ylabel(\"Total branches\")\n",
    "plt.ylim(0)\n",
    "\n",
    "plt.savefig(\"plots/dynamic_expansions.pdf\", bbox_inches=\"tight\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
