{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "005f0cb5",
   "metadata": {},
   "source": [
    "# LLM Model Selection - Figures\n",
    "\n",
    "Comparison of BALROG vs baselines on LLM selection task (QCM dataset)."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8c115f17",
   "metadata": {},
   "source": [
    "## OtB and OPR (Side-by-Side)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "facdef19",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "\n",
    "# --- Parameters to adjust ---\n",
    "T = 1500\n",
    "num_runs = 20\n",
    "\n",
    "data_path = f\"../experiments/results/compare_to_baselines_LLM/data/raw_data_qcm_vs_baselines_{T}_{num_runs}runs.pkl\"\n",
    "\n",
    "plt.rcParams.update({\n",
    "    'font.size': 14,\n",
    "    'axes.titlesize': 16,\n",
    "    'axes.labelsize': 14,\n",
    "    'xtick.labelsize': 12,\n",
    "    'ytick.labelsize': 12,\n",
    "    'legend.fontsize': 12\n",
    "})\n",
    "\n",
    "# --- Loading data ---\n",
    "with open(data_path, \"rb\") as f:\n",
    "    data = pickle.load(f)\n",
    "\n",
    "all_o2b       = data[\"all_o2b\"]\n",
    "all_opr       = data[\"all_opr\"]\n",
    "budgets_accum = data.get(\"budgets_accum\", {})\n",
    "\n",
    "# --- Computing averages ---\n",
    "avg_o2b = {a: np.mean(np.stack(all_o2b[a]), axis=0) for a in all_o2b}\n",
    "avg_opr = {a: np.mean(np.stack(all_opr[a]), axis=0) for a in all_opr}\n",
    "\n",
    "# --- Styles de tracé ---\n",
    "styles = {\n",
    "    \"Optimal\":     {\"linestyle\": \"-.\", \"color\": \"green\",      \"linewidth\": 1.6},\n",
    "    \"Random\":      {\"linestyle\": \"--\", \"color\": \"orange\",     \"linewidth\": 1.2},\n",
    "    \"PAK-UCB\":     {\"linestyle\": \"--\", \"color\": \"red\",        \"linewidth\": 1.2},\n",
    "    \"BALROG 5\":    {\"linestyle\": \"-\",  \"color\": \"darkviolet\", \"linewidth\": 2.0},\n",
    "    \"BALROG 20\":   {\"linestyle\": \"-\",  \"color\": \"indigo\",     \"linewidth\": 2.0},\n",
    "    \"LinUCB\":      {\"linestyle\": \"--\", \"color\": \"gray\",       \"linewidth\": 1.2},\n",
    "    \"neuronal-s\":  {\"linestyle\": \"--\", \"color\": \"cyan\",       \"linewidth\": 1.2},\n",
    "    \"KNN-UCB\":     {\"linestyle\": \"--\", \"color\": \"blue\",       \"linewidth\": 1.2},\n",
    "}\n",
    "\n",
    "# --- Figure (2 plots : O2B et OPR) ---\n",
    "fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n",
    "window = T // 5\n",
    "\n",
    "# 1) Sliding-window Avg O2B\n",
    "ax = axes[0]\n",
    "for a, series in avg_o2b.items():\n",
    "    if len(series) >= window:\n",
    "        mov = np.convolve(series, np.ones(window)/window, mode=\"valid\")\n",
    "        idx = np.linspace(0, len(mov) - 1, 100, dtype=int)\n",
    "        ax.plot(np.arange(window, window + len(mov))[idx], mov[idx],\n",
    "                label=a, **styles.get(a, {}))\n",
    "ax.set_title(f\"{window}-Sliding Avg OtB\", fontsize=12)\n",
    "ax.set_xlabel(\"Iteration\")\n",
    "ax.set_ylabel(\"Avg OtB\")\n",
    "ax.legend(loc=\"upper left\", fontsize=9)\n",
    "ax.grid(True)\n",
    "\n",
    "# 2) Sliding-window Avg OPR\n",
    "ax = axes[1]\n",
    "for a, series in avg_opr.items():\n",
    "    if len(series) >= window:\n",
    "        mov = np.convolve(series, np.ones(window)/window, mode=\"valid\")\n",
    "        idx = np.linspace(0, len(mov) - 1, 100, dtype=int)\n",
    "        ax.plot(np.arange(window, window + len(mov))[idx], mov[idx],\n",
    "                label=a, **styles.get(a, {}))\n",
    "ax.set_title(f\"{window}-Sliding Avg OPR\", fontsize=12)\n",
    "ax.set_xlabel(\"Iteration\")\n",
    "ax.set_ylabel(\"Avg OPR\")\n",
    "ax.legend(loc=\"upper left\", fontsize=9)\n",
    "ax.grid(True)\n",
    "\n",
    "plt.tight_layout()\n",
    "os.makedirs(\"plots/compare_to_baselines_LLM\", exist_ok=True)\n",
    "plt.savefig(f\"plots/compare_to_baselines_LLM/{T}_{num_runs}runs_o2b_opr.pdf\", dpi=600)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7f5e466",
   "metadata": {},
   "source": [
    "## Accuracy Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b88f3a03",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "# --- Parameters ---\n",
    "T = 1500\n",
    "num_runs = 20\n",
    "data_path = f\"../experiments/results/compare_to_baselines_LLM/data/raw_data_qcm_vs_baselines_{T}_{num_runs}runs.pkl\"\n",
    "\n",
    "# --- Loading data ---\n",
    "with open(data_path, \"rb\") as f:\n",
    "    data = pickle.load(f)\n",
    "\n",
    "# --- Extracting OPR et calcul de la moyenne × 100 ---\n",
    "all_opr = data[\"all_opr\"]\n",
    "avg_opr = {\n",
    "    algo: 100 * np.mean(np.stack(all_opr[algo]), axis=0)\n",
    "    for algo in all_opr\n",
    "}\n",
    "final_opr = {\n",
    "    algo: np.mean(series)\n",
    "    for algo, series in avg_opr.items()\n",
    "}\n",
    "\n",
    "# --- Creating a DataFrame trié ---\n",
    "df_opr = pd.DataFrame.from_dict(final_opr, orient=\"index\", columns=[\"Accuracy (%)\"])\n",
    "df_opr = df_opr.sort_values(\"Accuracy (%)\", ascending=False)\n",
    "\n",
    "print(df_opr)"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
