{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9c52412-33a6-40f9-8e78-6b98c65a3f2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from scripts.get_results_to_plot import process_json_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11eabe4a-51ea-4126-bb98-d1917a73976c",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# to get the average values of rejection time (under H1), FPR (under H0) across repeated runs for each significance level \n",
    "\n",
    "json_files = ['exp_main/results/raw_results/flash.gemma.scenario1.json']\n",
    "\n",
    "'''\n",
    "# if we want to get the avg.gemma.scenario1.json (the average results across 3 source models with the scoring model gemma-2b) \n",
    "json_files = [\n",
    "    'exp_main/results/raw_results/flash.gemma.scenario1.json',\n",
    "    'exp_main/results/raw_results/pro.gemma.scenario1.json',\n",
    "    'exp_main/results/raw_results/palm2.gemma.scenario1.json'\n",
    "]\n",
    "'''\n",
    "# use the name of corresponding detectors to denote the score functions in the input json_files for brevity\n",
    "item_names = ['Fast-Detect', 'DetectGPT', 'NPR', 'LRR', 'LogRank', 'Likelihood', 'Entropy', 'DNA-GPT', 'RoB-base', 'RoB-large']\n",
    "\n",
    "output_path = 'exp_main/results/results_to_plot/flash.gemma.scenario1.json'\n",
    "\n",
    "# save the results to output_path\n",
    "process_json_files(json_files, item_names, output_path)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "287b8a69-bbba-4af6-9e9c-3478d4368d56",
   "metadata": {},
   "source": [
    "## Plot results of different score functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e1b5acb-2a64-45ca-84b8-5c0ae1c4473a",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Load results_to_plot file (give an example of the setting: source model-Gemini-1.5-flash, scoring model-Gemma-2b, input parameters considering scenario 1)\n",
    "\n",
    "with open('exp_main/resluts/results_to_plot/flash.gemma.scenario1.json', 'r') as file:\n",
    "    items = json.load(file)\n",
    "\n",
    "df_list = []\n",
    "for item in items:\n",
    "    df = pd.DataFrame({\n",
    "        'rejection_time': item['rejection_time'],\n",
    "        'power': item['power'],\n",
    "        'fpr': item['fpr'],\n",
    "        'name': item['item_name'],  \n",
    "        'alpha': np.linspace(0.005, 0.1, len(item['fpr']))  \n",
    "    })\n",
    "    df_list.append(df)\n",
    "\n",
    "# Plot results\n",
    "fig, ax = plt.subplots(1, 2, figsize=(12, 4))  \n",
    "i=0\n",
    "markers = [\"*\", \"s\", \"^\", \"v\", \"_\", \"<\", \"P\", \"o\", \"X\", \"|\"]\n",
    "for df in df_list:\n",
    "    \n",
    "    # Plot 1: FPR vs. Rejection_Time\n",
    "    ax[0].plot(df['rejection_time'], df['fpr'],ls='--', lw=3,  marker=markers[i], label=df['name'].iloc[0], markersize=10)\n",
    "\n",
    "    # Plot 2: FPR vs. Significance_Level\n",
    "    ax[1].plot(df['alpha'], df['fpr'], ls='--', lw=3, marker=markers[i], label=df['name'].iloc[0], markersize=10)\n",
    "    i += 1\n",
    "\n",
    "ax[0].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)\n",
    "ax[0].set_ylim(-0.05,1.05)\n",
    "ax[0].set_yticks(np.arange(0,1.05,0.2))\n",
    "ax[0].set_xlim(-50,550)\n",
    "ax[0].set_xticks(np.arange(0,550, 50))\n",
    "ax[0].set_xlabel(r'Rejection Time ($\\tau$)', fontsize=20)\n",
    "ax[0].set_ylabel('False Positive Rate (FPR)', fontsize=20)\n",
    "\n",
    "# Plot the desired FPR region (below the significance level)\n",
    "x = np.linspace(0, 0.1, 500)\n",
    "y = x \n",
    "ax[1].fill_between(x, 0, y, color='yellow', alpha=0.2, zorder=1)  \n",
    "ax[1].set_ylim(-0.05,1.05)\n",
    "ax[1].set_yticks(np.arange(0,1.05,0.2))\n",
    "ax[1].set_xlim(-0.005,0.105)\n",
    "ax[1].set_xticks(np.arange(0,0.105,0.02))\n",
    "ax[1].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)\n",
    "ax[1].plot([0, 0.1], [0, 0.1], color='k', ls='--',  lw=3)\n",
    "ax[1].set_xlabel(r'Significance Level ($\\alpha$)', fontsize=20)\n",
    "ax[1].set_ylabel('False Positive Rate (FPR)', fontsize=20)\n",
    "\n",
    "\n",
    "handles, labels = next(ax.flat).get_legend_handles_labels()\n",
    "fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.49, -0.07), fancybox=True, shadow=True,\n",
    "             ncol=5, fontsize=20, labelspacing=0.1, handletextpad=0.5, handlelength=1)\n",
    "\n",
    "plt.subplots_adjust(wspace=0.4)  \n",
    "\n",
    "for axis in ax: \n",
    "    for spine in axis.spines.values():\n",
    "        spine.set_linewidth(2)  \n",
    "\n",
    "# Save plots\n",
    "plt.savefig('exp_main/results/plot_png/flash.gemma.scenario1.png', dpi=300, bbox_inches='tight') \n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "adedeb59-7d37-4708-b617-ed48eb7590ed",
   "metadata": {},
   "source": [
    "## Compare with baselines (no correction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ffe7d7e1-b115-4661-a0c9-045cd3546388",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Load results_to_plot file (the score function is fast-detectgpt)\n",
    "with open('exp_main/resluts/results_to_plot/flash.gemma.baseline_no_correction.json', 'r') as file:\n",
    "    items = json.load(file)\n",
    "\n",
    "df_list = []\n",
    "for item in items:\n",
    "    df = pd.DataFrame({\n",
    "        'rejection_time': item['rejection_time'],\n",
    "        'power': item['power'],\n",
    "        'fpr': item['fpr'],\n",
    "        'name': item['item_name'],  # Used to label the plots\n",
    "        'alpha': np.linspace(0.005, 0.1, len(item['fpr']))  # Assuming fpr length is consistent\n",
    "    })\n",
    "    df_list.append(df)\n",
    "\n",
    "# Plot\n",
    "fig, ax = plt.subplots(1, 2, figsize=(12, 4))  \n",
    "i=0\n",
    "markers = [\"*\", \"o\", \"o\", \"o\", \"o\", \"o\"]\n",
    "colors = [\"red\", \"lightskyblue\", \"green\", \"plum\", \"orange\", \"grey\"] \n",
    "\n",
    "for df in df_list:\n",
    "    \n",
    "    # Plot 1: FPR vs. Rejection Time\n",
    "    ax[0].plot(df['rejection_time'], df['fpr'],ls='--', lw=3,  marker=markers[i], color=colors[i], label=df['name'].iloc[0], markersize=10)\n",
    "  \n",
    "    # Plot 2: FPR vs. Significance_level\n",
    "    ax[1].plot(df['alpha'], df['fpr'], ls='--', lw=3, marker=markers[i], color=colors[i], label=df['name'].iloc[0], markersize=10)\n",
    "    i += 1\n",
    "\n",
    "ax[0].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)\n",
    "ax[0].set_ylim(-0.05,1.05)\n",
    "ax[0].set_yticks(np.arange(0,1.05,0.2))\n",
    "ax[0].set_xlim(-50,550)\n",
    "ax[0].set_xticks(np.arange(0,550, 100))\n",
    "ax[0].set_xlabel(r'Rejection Time ($\\tau$)', fontsize=20)\n",
    "ax[0].set_ylabel('False Positive Rate (FPR)', fontsize=20)\n",
    "\n",
    "\n",
    "# Plot the desired FPR region (below the significance level)\n",
    "x = np.linspace(0, 0.1, 500)\n",
    "y = x \n",
    "ax[1].fill_between(x, 0, y, color='yellow', alpha=0.2, zorder=1)  \n",
    "ax[1].set_ylim(-0.05,1.05)\n",
    "ax[1].set_yticks(np.arange(0,1.05,0.2))\n",
    "ax[1].set_xlim(-0.005,0.105)\n",
    "ax[1].set_xticks(np.arange(0,0.12,0.02))\n",
    "ax[1].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)\n",
    "ax[1].plot([0, 0.1], [0, 0.1], color='k', ls='--',  lw=3)\n",
    "ax[1].set_xlabel(r'Significance Level ($\\alpha$)', fontsize=20)\n",
    "ax[1].set_ylabel('False Positive Rate (FPR)', fontsize=20)\n",
    "\n",
    "handles, labels = next(ax.flat).get_legend_handles_labels()\n",
    "fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.49, -0.07), fancybox=True, shadow=True,\n",
    "             ncol=6, fontsize=20, labelspacing=0.1, handletextpad=0.5, handlelength=1)\n",
    "plt.subplots_adjust(wspace=0.4) \n",
    "\n",
    "\n",
    "\n",
    "for axis in ax:  \n",
    "    for spine in axis.spines.values():\n",
    "        spine.set_linewidth(2)  \n",
    "\n",
    "plt.savefig('exp_main/results/plot_png/flash.gemma.baseline_no_correction.png', dpi=300, bbox_inches='tight')  \n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25220d64-714a-4fb8-aa4a-f4bf1457f077",
   "metadata": {},
   "source": [
    "## Compare with baselines (with correction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98bba3c2-362f-455d-8bf5-12bc8414e1e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Load results_to_plot file (the score function is fast-detectgpt)\n",
    "\n",
    "with open('exp_main/resluts/results_to_plot/flash.gemma.baseline_with_correction.json', 'r') as file:\n",
    "    items = json.load(file)\n",
    "\n",
    "df_list = []\n",
    "for item in items:\n",
    "    df = pd.DataFrame({\n",
    "        'rejection_time': item['rejection_time'],\n",
    "        'power': item['power'],\n",
    "        'fpr': item['fpr'],\n",
    "        'name': item['item_name'],  # Used to label the plots\n",
    "        'alpha': np.linspace(0.005, 0.1, len(item['fpr']))  # Assuming fpr length is consistent\n",
    "    })\n",
    "    df_list.append(df)\n",
    "\n",
    "# Plot\n",
    "fig, ax = plt.subplots(1, 2, figsize=(12, 4))  \n",
    "i=0\n",
    "markers = [\"*\", \"^\", \"^\", \"^\", \"^\", \"^\"]\n",
    "colors = [\"red\", \"lightskyblue\", \"green\", \"plum\", \"orange\", \"grey\"] \n",
    "\n",
    "for df in df_list:\n",
    "    \n",
    "    # Plot 1: FPR vs. Rejection Time\n",
    "    ax[0].plot(df['rejection_time'], df['fpr'],ls='--', lw=3,  marker=markers[i], color=colors[i], label=df['name'].iloc[0], markersize=10)\n",
    "  \n",
    "    # Plot 2: FPR vs. Significance_level\n",
    "    ax[1].plot(df['alpha'], df['fpr'], ls='--', lw=3, marker=markers[i], color=colors[i], label=df['name'].iloc[0], markersize=10)\n",
    "    i += 1\n",
    "\n",
    "ax[0].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)\n",
    "ax[0].set_ylim(-0.05,1.05)\n",
    "ax[0].set_yticks(np.arange(0,1.05,0.2))\n",
    "ax[0].set_xlim(-50,550)\n",
    "ax[0].set_xticks(np.arange(0,550, 100))\n",
    "ax[0].set_xlabel(r'Rejection Time ($\\tau$)', fontsize=20)\n",
    "ax[0].set_ylabel('False Positive Rate (FPR)', fontsize=20)\n",
    "\n",
    "\n",
    "# Plot the desired FPR region (below the significance level)\n",
    "x = np.linspace(0, 0.1, 500)\n",
    "y = x \n",
    "ax[1].fill_between(x, 0, y, color='yellow', alpha=0.2, zorder=1)  \n",
    "ax[1].set_ylim(-0.05,1.05)\n",
    "ax[1].set_yticks(np.arange(0,1.05,0.2))\n",
    "ax[1].set_xlim(-0.005,0.105)\n",
    "ax[1].set_xticks(np.arange(0,0.12,0.02))\n",
    "ax[1].tick_params(axis='both', labelsize=20, which='major', length=10,  width=2)\n",
    "ax[1].plot([0, 0.1], [0, 0.1], color='k', ls='--',  lw=3)\n",
    "ax[1].set_xlabel(r'Significance Level ($\\alpha$)', fontsize=20)\n",
    "ax[1].set_ylabel('False Positive Rate (FPR)', fontsize=20)\n",
    "\n",
    "handles, labels = next(ax.flat).get_legend_handles_labels()\n",
    "fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.49, -0.07), fancybox=True, shadow=True,\n",
    "             ncol=6, fontsize=20, labelspacing=0.1, handletextpad=0.5, handlelength=1)\n",
    "plt.subplots_adjust(wspace=0.4) \n",
    "\n",
    "\n",
    "\n",
    "for axis in ax:  \n",
    "    for spine in axis.spines.values():\n",
    "        spine.set_linewidth(2)  \n",
    "\n",
    "plt.savefig('exp_main/results/plot_png/flash.gemma.baseline_with_correction.png', dpi=300, bbox_inches='tight')  \n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
