{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Comparing with Various Method and Sensitiveness $\\beta$**\n",
    "---\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "betas = [0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 5.0, 10.0, 15.0, 20.0]\n",
    "proxys = [\"reward-model-deberta-v3-large-v2\", \"SteamSHP-flan-t5-large\", \"SteamSHP-flan-t5-xl\", 'PairRM', 'RM-Mistral-7B']\n",
    "model = 'mistral-7b-sft-beta'\n",
    "gold = 'Eurus-RM-7b'\n",
    "dataset = 'hh-helpful-t'\n",
    "# dataset = 'alpacafarm'\n",
    "# dataset = 'hh-harmless-t'\n",
    "\n",
    "sns.set_style(\"darkgrid\", {'grid.linestyle': '--'})\n",
    "sns.set_context(\"paper\", 2.5, {\"lines.linewidth\": 4})\n",
    "\n",
    "fig, axes = plt.subplots(2, 3, figsize=(20, 12))\n",
    "axes = axes.flatten()\n",
    "\n",
    "optimal_values = []\n",
    "\n",
    "for idx, proxy in enumerate(proxys):\n",
    "    if idx >= len(axes):\n",
    "        break\n",
    "    \n",
    "    ax = axes[idx]\n",
    "\n",
    "    df_value = pd.DataFrame(index=['WD-BoN', 'KL-BoN', 'BoN','l-BoN','SWD-BoN','SKL-Bon'], columns=betas)\n",
    "\n",
    "    df_path = f\"./Compare_Result/{dataset}/{model},g-{gold},p-{proxy}.csv\"\n",
    "    df_ = pd.read_csv(df_path)\n",
    "    \n",
    "    for i, beta in enumerate(betas):\n",
    "        bon_val = df_[df_.columns[0]].mean(axis=0)\n",
    "        mbr_val = df_[df_.columns[1]].mean(axis=0)\n",
    "        wd_val = df_[df_.columns[i + 2]].mean(axis=0)\n",
    "        kl_val = df_[df_.columns[i + 2 + len(betas)]].mean(axis=0)\n",
    "        swd_val = df_[df_.columns[i+2+len(betas)+len(betas)]].mean(axis=0)\n",
    "        skl_val = df_[df_.columns[i+2+len(betas)+len(betas)+len(betas)]].mean(axis=0)\n",
    "        l_val = df_[df_.columns[i + 2 + len(betas) + len(betas) + len(betas) + len(betas)]].mean(axis=0)\n",
    "        \n",
    "        df_value.loc['KL-BoN', beta] = kl_val\n",
    "        df_value.loc['WD-BoN', beta] = wd_val\n",
    "        df_value.loc['BoN', beta] = bon_val\n",
    "        df_value.loc['SWD-BoN', beta] = swd_val\n",
    "        df_value.loc['SKL-Bon', beta] = skl_val\n",
    "        df_value.loc['MBR', beta] = mbr_val\n",
    "        df_value.loc['l-BoN', beta] = l_val\n",
    "\n",
    "    optimal_wd_beta = df_value.loc['WD-BoN'].idxmax()\n",
    "    optimal_wd_value = df_value.loc['WD-BoN'].max()\n",
    "    optimal_kl_value = df_value.loc['KL-BoN'].max()\n",
    "    optimal_kl_beta = df_value.loc['KL-BoN'].idxmax()\n",
    "    optimal_l_beta = df_value.loc['l-BoN'].idxmax()\n",
    "    optimal_l_value = df_value.loc['l-BoN'].max()\n",
    "    optimal_swd_value = df_value.loc['SWD-BoN'].max()\n",
    "    optimal_skl_value = df_value.loc['SKL-Bon'].max()\n",
    "    optimal_swd_beta = df_value.loc['SWD-BoN'].idxmax()\n",
    "    optimal_skl_beta = df_value.loc['SKL-Bon'].idxmax()\n",
    "    optimal_values.append({\n",
    "        \"proxy\": proxy,\n",
    "        \"optimal_wd_beta\": optimal_wd_beta,\n",
    "        \"optimal_wd_value\": optimal_wd_value,\n",
    "        \"optimal_kl_beta\": optimal_kl_beta,\n",
    "        \"optimal_kl_value\": optimal_kl_value,\n",
    "        'optimal_l_beta':optimal_l_beta,\n",
    "        'optimal_l_value':optimal_l_value,\n",
    "        'optimal_swd_beta':optimal_swd_beta,\n",
    "        'optimal_swd_value':optimal_swd_value,\n",
    "        'optimal_skl_beta':optimal_skl_beta,\n",
    "        'optimal_skl_value':optimal_skl_value\n",
    "    })\n",
    "\n",
    "\n",
    "#evaluate test dataset\n",
    "dataset = 'hh-helpful'\n",
    "# dataset = 'alpaca'\n",
    "# dataset = 'hh-harmless'\n",
    "sns.set_style(\"darkgrid\", {'grid.linestyle': '--'})\n",
    "sns.set_context(\"paper\", 3.0, {\"lines.linewidth\": 4})\n",
    "fig, axes = plt.subplots(2, 3, figsize=(30, 18))\n",
    "plt.subplots_adjust(hspace=0.3)\n",
    "axes = axes.flatten()\n",
    "proxy_model = [\"OASST\", \"SHP-Large\", \"SHP-XL\", 'PairRM', 'RM-Mistral-7B']\n",
    "for idx, opt in enumerate(optimal_values):\n",
    "    proxy = opt['proxy']\n",
    "    opt_k_beta = opt['optimal_kl_beta']\n",
    "    opt_wd_beta = opt['optimal_wd_beta']\n",
    "    opt_l_beta = opt['optimal_l_beta']\n",
    "    opt_swd_beta = opt['optimal_swd_beta']\n",
    "    opt_skl_beta = opt['optimal_skl_beta']\n",
    "    \n",
    "    df_path = f\"/Users/s26364/Desktop/KNN_Bon_/Compare_Result/{dataset}/{model},g-{gold},p-{proxy}.csv\"\n",
    "    df_ = pd.read_csv(df_path)\n",
    "\n",
    "    df_test_value = pd.DataFrame(index=['WD-BoN', 'KL-BoN', 'random', 'MBR','l-BoN','SWD-BoN','SKL-BoN'], columns=betas)\n",
    "    for j, beta in enumerate(betas):\n",
    "        \n",
    "        wd_win_rate = (df_.iloc[:, 0] <df_.iloc[:, j + 2]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, j + 2]).mean()\n",
    "        kl_win_rate = (df_.iloc[:, 0] < df_.iloc[:, j + 2 + len(betas)]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, j + 2 + len(betas)]).mean()\n",
    "        mbr_win_rate = (df_.iloc[:, 0] < df_.iloc[:, 1]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, 1]).mean()\n",
    "        random_win_rate = (df_.iloc[:, 0] < df_.iloc[:, -1]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, -1]).mean()\n",
    "        swd_win_rate = (df_.iloc[:, 0] < df_.iloc[:, j + 2 + len(betas) + len(betas)]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, j + 2 + len(betas) + len(betas)]).mean()\n",
    "        skl_win_rate = (df_.iloc[:, 0] < df_.iloc[:, j + 2 + len(betas) + len(betas) + len(betas)]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, j + 2 + len(betas) + len(betas) + len(betas)]).mean()\n",
    "        l_win_rate = (df_.iloc[:, 0] < df_.iloc[:, j + 2 + len(betas) + len(betas) + len(betas) + len(betas)]).mean() + 0.5 * (df_.iloc[:, 0] == df_.iloc[:, j + 2 + len(betas) + len(betas) + len(betas) + len(betas)]).mean()\n",
    "        \n",
    "        df_test_value.at['WD-BoN', beta] = wd_win_rate * 100\n",
    "        df_test_value.at['KL-BoN', beta] = kl_win_rate * 100\n",
    "        df_test_value.at['l-BoN', beta] = l_win_rate * 100\n",
    "        df_test_value.at['MBR', beta] = mbr_win_rate * 100\n",
    "        df_test_value.at['random', beta] = random_win_rate * 100\n",
    "        df_test_value.at['SWD-BoN', beta] = swd_win_rate * 100\n",
    "        df_test_value.at['SKL-BoN', beta] = skl_win_rate * 100\n",
    "        \n",
    "    ax = axes[idx]\n",
    "    ax.plot(betas, df_test_value.loc['WD-BoN'], label='$\\mathrm{{RBoN}}_{{\\mathrm{{WD}}}}$', color='blue')\n",
    "    ax.plot(betas, df_test_value.loc['KL-BoN'], label='$\\mathrm{{RBoN}}_{{\\mathrm{{KL}}}}$', color='red')\n",
    "    ax.plot(betas, df_test_value.loc['l-BoN'], label='$\\mathrm{{RBoN}}_{{\\mathrm{{L}}}}$', color='yellow')\n",
    "    ax.plot(betas, df_test_value.loc['MBR'], label='MBR', color='purple')\n",
    "    ax.plot(betas, df_test_value.loc['random'], label='Random', color='brown')\n",
    "    ax.plot(betas, df_test_value.loc['SWD-BoN'], label='$\\mathrm{{RBoN}}_{{\\mathrm{{SWD}}}}$', color='black')\n",
    "    ax.plot(betas, df_test_value.loc['SKL-BoN'], label='$\\mathrm{{RBoN}}_{{\\mathrm{{SKL}}}}$', color='green')\n",
    "    \n",
    "    # Highlighting the optimal points\n",
    "    ax.plot(opt_wd_beta, df_test_value.at['WD-BoN', opt_wd_beta], 'x', label=f'Optimal Beta ($\\mathrm{{RBoN}}_{{\\mathrm{{WD}}}}$) : {opt_wd_beta}', markersize=15,markeredgewidth=10, color='blue')\n",
    "    ax.plot(opt_k_beta, df_test_value.at[\"KL-BoN\", opt_k_beta], 'x', label=f'Optimal Beta ($\\mathrm{{RBoN}}_{{\\mathrm{{KL}}}}$) : {opt_k_beta}', markersize=15,markeredgewidth=10, color='red')\n",
    "    ax.plot(opt_l_beta, df_test_value.at[\"l-BoN\", opt_l_beta], 'x', label=f'Optimal Beta ($\\mathrm{{RBoN}}_{{\\mathrm{{L}}}}$) : {opt_l_beta}', markersize=15,markeredgewidth=10, color='yellow')\n",
    "    ax.plot(opt_swd_beta, df_test_value.at[\"SWD-BoN\", opt_swd_beta], 'x', label=f'Optimal Beta ($\\mathrm{{RBoN}}_{{\\mathrm{{SWD}}}}$) : {opt_swd_beta}', markersize=15,markeredgewidth=10, color='black')\n",
    "    ax.plot(opt_skl_beta, df_test_value.at[\"SKL-BoN\", opt_skl_beta], 'x', label=f'Optimal Beta ($\\mathrm{{RBoN}}_{{\\mathrm{{SKL}}}}$) : {opt_skl_beta}', markersize=15,markeredgewidth=10,color='green')\n",
    "    \n",
    "    ax.set_xscale('log')\n",
    "    axes[5].axis('off')\n",
    "    ax.set_xlabel('Beta', fontsize=30)\n",
    "    ax.set_title(f'{proxy_model[idx]}', fontsize=30)\n",
    "    ax.set_ylabel('Win Rate (%)', fontsize=30)\n",
    "    ax.legend()\n",
    "    ax.grid(True)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cyber-ai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
