{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rouge_score.rouge_scorer import _create_ngrams\n",
    "from nltk.stem.porter import PorterStemmer\n",
    "import spacy, six, json\n",
    "from utils import tokenize\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import roc_curve, auc\n",
    "\n",
    "PorterStemmer = PorterStemmer()\n",
    "nlp = spacy.load('en_core_web_sm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_roc_curve(human_scores, gpt_scores):\n",
    "    # Data\n",
    "    A = human_scores\n",
    "    B = gpt_scores\n",
    "    # Combine scores and true labels\n",
    "    scores = A + B\n",
    "    labels = [0] * len(A) + [1] * len(B)\n",
    "    # Calculate ROC curve\n",
    "    fpr, tpr, thresholds = roc_curve(labels, scores)\n",
    "    # Calculate AUC (Area Under Curve)\n",
    "    roc_auc = auc(fpr, tpr)\n",
    "    # Plot ROC curve\n",
    "    plt.figure()\n",
    "    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)\n",
    "    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
    "    plt.xlim([0.0, 1.0])\n",
    "    plt.ylim([0.0, 1.05])\n",
    "    plt.xlabel('False Positive Rate')\n",
    "    plt.ylabel('True Positive Rate')\n",
    "    plt.title('ROC curve: Open-gen w/ GPT3.5-Reddit w prompts' )\n",
    "    plt.legend(loc=\"lower right\")\n",
    "    plt.show()\n",
    "    # what is the TPR for FPR = 0.1?\n",
    "    for idx, fpr_ in enumerate(fpr):\n",
    "        if fpr_ > 0.01:\n",
    "            print(f\"TPR at 1% FPR: {tpr[idx]:.4f}\")\n",
    "            break\n",
    "    return roc_auc, tpr[idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_score_ngrams(target_ngrams, prediction_ngrams):\n",
    "    intersection_ngrams_count = 0\n",
    "    ngram_dict = {}\n",
    "    for ngram in six.iterkeys(target_ngrams):\n",
    "        intersection_ngrams_count += min(target_ngrams[ngram],\n",
    "                                        prediction_ngrams[ngram])\n",
    "        ngram_dict[ngram] = min(target_ngrams[ngram], prediction_ngrams[ngram])\n",
    "    target_ngrams_count = sum(target_ngrams.values()) # prediction_ngrams\n",
    "    return intersection_ngrams_count / max(target_ngrams_count, 1), ngram_dict\n",
    "\n",
    "\n",
    "def get_ngram_info(article_tokens, summary_tokens, _ngram):\n",
    "    article_ngram = _create_ngrams( article_tokens , _ngram)\n",
    "    summary_ngram = _create_ngrams( summary_tokens , _ngram)\n",
    "    ngram_score, ngram_dict = get_score_ngrams( article_ngram, summary_ngram) \n",
    "    return ngram_score, ngram_dict, sum( ngram_dict.values() )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('results/regen_gpt-3.5-turbo_20_0.7.jsonl', \"r\") as f:\n",
    "    gpt35_on4 = [json.loads(x) for x in f.read().strip().split(\"\\n\") ]\n",
    "len(gpt35_on4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gpt35_on4[0].keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len( gpt35_on4[0]['human_gen_text']['choices']   )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "########################## with questions ##########################\n",
    "ngram_overlap_count_wq = []\n",
    "for idx, instance in enumerate( gpt35_on4 ):\n",
    "    human_07 = gpt35_on4[idx][\"human_gen_truncate\"] # gold_gen_truncate, gold_gen_07\n",
    "    gpt_07 = instance['machine_gen_truncate'] # gen_completion_truncate, gen_completion_07\n",
    "    \n",
    "    human_tokens = tokenize( human_07, stemmer=PorterStemmer)\n",
    "    gpt_tokens = tokenize( gpt_07, stemmer=PorterStemmer)\n",
    "    ########################################\n",
    "    human_half = gpt35_on4[idx]['human_gen_text']['choices']\n",
    "    gpt_half = instance['machine_gen_text']['choices']\n",
    "    temp = []\n",
    "    for i in range(20): # len(human_half)\n",
    "        temp1 = {}\n",
    "        temp2 = {}\n",
    "        human_generate_tokens = tokenize(human_half[i]['message']['content'], stemmer=PorterStemmer)\n",
    "        gpt_generate_tokens = tokenize(gpt_half[i]['message']['content'], stemmer=PorterStemmer )\n",
    "        if len(human_generate_tokens) == 0 or len(gpt_generate_tokens) == 0:\n",
    "            continue\n",
    "        for _ngram in range(1, 25):\n",
    "            ngram_score, ngram_dict, overlap_count = get_ngram_info(human_tokens, human_generate_tokens, _ngram)\n",
    "            temp1['human_truncate_ngram_{}_score'.format(_ngram)] = ngram_score / len(human_generate_tokens)\n",
    "            temp1['human_truncate_ngram_{}_count'.format(_ngram)] = overlap_count\n",
    "\n",
    "            ngram_score, ngram_dict, overlap_count = get_ngram_info(gpt_tokens, gpt_generate_tokens, _ngram)\n",
    "            temp2['gpt_truncate_ngram_{}_score'.format(_ngram)] = ngram_score / len(gpt_generate_tokens)\n",
    "            temp2['gpt_truncate_ngram_{}_count'.format(_ngram)] = overlap_count\n",
    "        temp.append({'human':temp1, 'machine':temp2})\n",
    "\n",
    "    ngram_overlap_count_wq.append(temp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def N_gram_detector(ngram_n_ratio):\n",
    "    score = 0\n",
    "    non_zero = []\n",
    "    \n",
    "    for idx, key in enumerate(ngram_n_ratio):\n",
    "        if idx in range(3) and 'score' in key or 'ratio' in key:\n",
    "            score += 0. * ngram_n_ratio[ key ]\n",
    "            continue\n",
    "        if 'score' in key or 'ratio' in key:\n",
    "            score += (idx+1) * np.log((idx+1))   * ngram_n_ratio[ key ]\n",
    "            if ngram_n_ratio[ key ] != 0:\n",
    "                non_zero.append( idx+1 )\n",
    "    return score/ (sum( non_zero ) + 1e-8)\n",
    "\n",
    "human_scores = []\n",
    "gpt_scores = []\n",
    "\n",
    "for instance in ngram_overlap_count_wq:\n",
    "    human_score = []\n",
    "    gpt_score = []\n",
    "\n",
    "    for i in range(len(instance)):\n",
    "        human_score.append( N_gram_detector(instance[i]['human'] ) )\n",
    "        gpt_score.append( N_gram_detector(instance[i]['machine'] ) )\n",
    "\n",
    "    human_scores.append( sum(human_score)   )\n",
    "    gpt_scores.append( sum(gpt_score)   )   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot and give different colors\n",
    "import matplotlib.pyplot as plt\n",
    "#human_scores, gpt_scores = human_scores[:40], gpt_scores[:40]\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.plot(human_scores, label='human')\n",
    "plt.plot(gpt_scores, label='gpt')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_roc_curve( human_scores, gpt_scores )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "###### openai detector ######\n",
    "with open( f'results/openai_detect_gpt35.jsonl', 'r') as f:\n",
    "    openai_detect_gpt = [json.loads(x) for x in f.read().strip().split(\"\\n\")]\n",
    "len(openai_detect_gpt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot and give different colors\n",
    "import matplotlib.pyplot as plt\n",
    "human_scores2 = [x['gold_gen_prob'] for x in openai_detect_gpt]\n",
    "gpt_scores2 = [x['gen_completion_prob'] for x in openai_detect_gpt]\n",
    "\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.plot(human_scores2, label='human')\n",
    "plt.plot(gpt_scores2, label='gpt')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_roc_curve(human_scores2, gpt_scores2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "###### gptzero detector ######\n",
    "with open( f'results/detect_gptzero_200.jsonl', 'r') as f:\n",
    "    gptzero_detect_gpt = [json.loads(x) for x in f.read().strip().split(\"\\n\")]\n",
    "len(gptzero_detect_gpt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot and give different colors\n",
    "import matplotlib.pyplot as plt\n",
    "human_scores2 = [x['gold_gen_prob'] for x in gptzero_detect_gpt]\n",
    "gpt_scores2 = [x['gen_completion_prob'] for x in gptzero_detect_gpt]\n",
    "\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.plot(human_scores2, label='human')\n",
    "plt.plot(gpt_scores2, label='gpt')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_roc_curve(human_scores2, gpt_scores2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "vpt",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "015bfb409bf441c0a66e03b2de1c9b891435fcbf36ed1d1e9d7c8167e73e6b62"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
