{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from utils import get_summary_indices\n",
    "from tqdm import trange\n",
    "from nltk import sent_tokenize\n",
    "import seaborn as sns\n",
    "from paraphrase import is_bad\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "summarizers = ['textrank', 'matchsum', 'presumm_ext', 'azure', 'pegasus', 'bart']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def count_matches(entry, new_idx_df=None):\n",
    "    count = 0\n",
    "    possible_matches = new_idx_df[new_idx_df.id == entry.id]\n",
    "    for idx in possible_matches.idx:\n",
    "        if len(idx) == len(entry.idx) and (idx == entry.idx).all():\n",
    "            count += 1\n",
    "    if len(possible_matches) == 0:\n",
    "        return 0\n",
    "    return count/len(possible_matches)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "textrank\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 14%|█▍        | 1664/11481 [00:26<02:37, 62.49it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb Cell 4'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000003?line=14'>15</a>\u001b[0m original_idxs\u001b[39m.\u001b[39mappend((\u001b[39mid\u001b[39m, original_idx))\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000003?line=16'>17</a>\u001b[0m \u001b[39mfor\u001b[39;00m _, entry \u001b[39min\u001b[39;00m paraphrased[paraphrased\u001b[39m.\u001b[39mid \u001b[39m==\u001b[39m \u001b[39mid\u001b[39m]\u001b[39m.\u001b[39miterrows():\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000003?line=17'>18</a>\u001b[0m     new_article \u001b[39m=\u001b[39m sent_tokenize(entry\u001b[39m.\u001b[39;49marticle)\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000003?line=18'>19</a>\u001b[0m     new_summary \u001b[39m=\u001b[39m sent_tokenize(entry\u001b[39m.\u001b[39msummary)\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000003?line=19'>20</a>\u001b[0m     idx \u001b[39m=\u001b[39m get_summary_indices(new_article, new_summary, top_k\u001b[39m=\u001b[39m\u001b[39m2\u001b[39m, tolerance\u001b[39m=\u001b[39m\u001b[39m0.1\u001b[39m)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/__init__.py:107\u001b[0m, in \u001b[0;36msent_tokenize\u001b[0;34m(text, language)\u001b[0m\n\u001b[1;32m     97\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m     98\u001b[0m \u001b[39mReturn a sentence-tokenized copy of *text*,\u001b[39;00m\n\u001b[1;32m     99\u001b[0m \u001b[39musing NLTK's recommended sentence tokenizer\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    104\u001b[0m \u001b[39m:param language: the model name in the Punkt corpus\u001b[39;00m\n\u001b[1;32m    105\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    106\u001b[0m tokenizer \u001b[39m=\u001b[39m load(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mtokenizers/punkt/\u001b[39m\u001b[39m{\u001b[39;00mlanguage\u001b[39m}\u001b[39;00m\u001b[39m.pickle\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 107\u001b[0m \u001b[39mreturn\u001b[39;00m tokenizer\u001b[39m.\u001b[39;49mtokenize(text)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1276\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer.tokenize\u001b[0;34m(self, text, realign_boundaries)\u001b[0m\n\u001b[1;32m   1272\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenize\u001b[39m(\u001b[39mself\u001b[39m, text, realign_boundaries\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m   1273\u001b[0m     \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1274\u001b[0m \u001b[39m    Given a text, returns a list of the sentences in that text.\u001b[39;00m\n\u001b[1;32m   1275\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1276\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mlist\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msentences_from_text(text, realign_boundaries))\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1332\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer.sentences_from_text\u001b[0;34m(self, text, realign_boundaries)\u001b[0m\n\u001b[1;32m   1325\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msentences_from_text\u001b[39m(\u001b[39mself\u001b[39m, text, realign_boundaries\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m   1326\u001b[0m     \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1327\u001b[0m \u001b[39m    Given a text, generates the sentences in that text by only\u001b[39;00m\n\u001b[1;32m   1328\u001b[0m \u001b[39m    testing candidate sentence breaks. If realign_boundaries is\u001b[39;00m\n\u001b[1;32m   1329\u001b[0m \u001b[39m    True, includes in the sentence closing punctuation that\u001b[39;00m\n\u001b[1;32m   1330\u001b[0m \u001b[39m    follows the period.\u001b[39;00m\n\u001b[1;32m   1331\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1332\u001b[0m     \u001b[39mreturn\u001b[39;00m [text[s:e] \u001b[39mfor\u001b[39;00m s, e \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mspan_tokenize(text, realign_boundaries)]\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1332\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m   1325\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msentences_from_text\u001b[39m(\u001b[39mself\u001b[39m, text, realign_boundaries\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[1;32m   1326\u001b[0m     \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1327\u001b[0m \u001b[39m    Given a text, generates the sentences in that text by only\u001b[39;00m\n\u001b[1;32m   1328\u001b[0m \u001b[39m    testing candidate sentence breaks. If realign_boundaries is\u001b[39;00m\n\u001b[1;32m   1329\u001b[0m \u001b[39m    True, includes in the sentence closing punctuation that\u001b[39;00m\n\u001b[1;32m   1330\u001b[0m \u001b[39m    follows the period.\u001b[39;00m\n\u001b[1;32m   1331\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1332\u001b[0m     \u001b[39mreturn\u001b[39;00m [text[s:e] \u001b[39mfor\u001b[39;00m s, e \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mspan_tokenize(text, realign_boundaries)]\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1322\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer.span_tokenize\u001b[0;34m(self, text, realign_boundaries)\u001b[0m\n\u001b[1;32m   1320\u001b[0m \u001b[39mif\u001b[39;00m realign_boundaries:\n\u001b[1;32m   1321\u001b[0m     slices \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_realign_boundaries(text, slices)\n\u001b[0;32m-> 1322\u001b[0m \u001b[39mfor\u001b[39;00m sentence \u001b[39min\u001b[39;00m slices:\n\u001b[1;32m   1323\u001b[0m     \u001b[39myield\u001b[39;00m (sentence\u001b[39m.\u001b[39mstart, sentence\u001b[39m.\u001b[39mstop)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1421\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer._realign_boundaries\u001b[0;34m(self, text, slices)\u001b[0m\n\u001b[1;32m   1408\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1409\u001b[0m \u001b[39mAttempts to realign punctuation that falls after the period but\u001b[39;00m\n\u001b[1;32m   1410\u001b[0m \u001b[39mshould otherwise be included in the same sentence.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1418\u001b[0m \u001b[39m    [\"(Sent1.)\", \"Sent2.\"].\u001b[39;00m\n\u001b[1;32m   1419\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1420\u001b[0m realign \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[0;32m-> 1421\u001b[0m \u001b[39mfor\u001b[39;00m sentence1, sentence2 \u001b[39min\u001b[39;00m _pair_iter(slices):\n\u001b[1;32m   1422\u001b[0m     sentence1 \u001b[39m=\u001b[39m \u001b[39mslice\u001b[39m(sentence1\u001b[39m.\u001b[39mstart \u001b[39m+\u001b[39m realign, sentence1\u001b[39m.\u001b[39mstop)\n\u001b[1;32m   1423\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m sentence2:\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:321\u001b[0m, in \u001b[0;36m_pair_iter\u001b[0;34m(iterator)\u001b[0m\n\u001b[1;32m    319\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m:\n\u001b[1;32m    320\u001b[0m     \u001b[39mreturn\u001b[39;00m\n\u001b[0;32m--> 321\u001b[0m \u001b[39mfor\u001b[39;00m el \u001b[39min\u001b[39;00m iterator:\n\u001b[1;32m    322\u001b[0m     \u001b[39myield\u001b[39;00m (prev, el)\n\u001b[1;32m    323\u001b[0m     prev \u001b[39m=\u001b[39m el\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1396\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer._slices_from_text\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m   1394\u001b[0m last_break \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[1;32m   1395\u001b[0m \u001b[39mfor\u001b[39;00m match, context \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_match_potential_end_contexts(text):\n\u001b[0;32m-> 1396\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtext_contains_sentbreak(context):\n\u001b[1;32m   1397\u001b[0m         \u001b[39myield\u001b[39;00m \u001b[39mslice\u001b[39m(last_break, match\u001b[39m.\u001b[39mend())\n\u001b[1;32m   1398\u001b[0m         \u001b[39mif\u001b[39;00m match\u001b[39m.\u001b[39mgroup(\u001b[39m\"\u001b[39m\u001b[39mnext_tok\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m   1399\u001b[0m             \u001b[39m# next sentence starts after whitespace\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1442\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer.text_contains_sentbreak\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m   1438\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1439\u001b[0m \u001b[39mReturns True if the given text includes a sentence break.\u001b[39;00m\n\u001b[1;32m   1440\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1441\u001b[0m found \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m  \u001b[39m# used to ignore last token\u001b[39;00m\n\u001b[0;32m-> 1442\u001b[0m \u001b[39mfor\u001b[39;00m tok \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_annotate_tokens(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_tokenize_words(text)):\n\u001b[1;32m   1443\u001b[0m     \u001b[39mif\u001b[39;00m found:\n\u001b[1;32m   1444\u001b[0m         \u001b[39mreturn\u001b[39;00m \u001b[39mTrue\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/nltk/tokenize/punkt.py:1578\u001b[0m, in \u001b[0;36mPunktSentenceTokenizer._annotate_second_pass\u001b[0;34m(self, tokens)\u001b[0m\n\u001b[1;32m   1572\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_annotate_second_pass\u001b[39m(\u001b[39mself\u001b[39m, tokens):\n\u001b[1;32m   1573\u001b[0m     \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   1574\u001b[0m \u001b[39m    Performs a token-based classification (section 4) over the given\u001b[39;00m\n\u001b[1;32m   1575\u001b[0m \u001b[39m    tokens, making use of the orthographic heuristic (4.1.1), collocation\u001b[39;00m\n\u001b[1;32m   1576\u001b[0m \u001b[39m    heuristic (4.1.2) and frequent sentence starter heuristic (4.1.3).\u001b[39;00m\n\u001b[1;32m   1577\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1578\u001b[0m     \u001b[39mfor\u001b[39;00m token1, token2 \u001b[39min\u001b[39;00m _pair_iter(tokens):\n\u001b[1;32m   1579\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_second_pass_annotation(token1, token2)\n\u001b[1;32m   1580\u001b[0m         \u001b[39myield\u001b[39;00m token1\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "for summarizer in summarizers:\n",
    "    print(summarizer)\n",
    "    original = pd.read_csv(f'../data/paraphrasing/{summarizer}_test.csv', index_col=0).dropna()\n",
    "    paraphrased = pd.read_csv(f'../data/paraphrasing/untargeted/{summarizer}/summaries_same.csv').dropna()\n",
    "    \n",
    "    original_idxs = []\n",
    "    new_idxs = []\n",
    "\n",
    "    for i in trange(len(original.id)):\n",
    "        original_entry = original.iloc[i]\n",
    "        id = original_entry.id\n",
    "        original_article = sent_tokenize(original_entry.article)\n",
    "        original_summary = sent_tokenize(original_entry.summary)\n",
    "        original_idx = get_summary_indices(original_article, original_summary, top_k=2, tolerance=0.1)\n",
    "        original_idxs.append((id, original_idx))\n",
    "\n",
    "        for _, entry in paraphrased[paraphrased.id == id].iterrows():\n",
    "            new_article = sent_tokenize(entry.article)\n",
    "            new_summary = sent_tokenize(entry.summary)\n",
    "            idx = get_summary_indices(new_article, new_summary, top_k=2, tolerance=0.1)\n",
    "            new_idxs.append((id, idx))\n",
    "        \n",
    "    original_idx_df = pd.DataFrame(original_idxs, columns=['id', 'idx'])\n",
    "    new_idx_df = pd.DataFrame(new_idxs, columns=['id', 'idx'])\n",
    "    original_idx_df.to_parquet(f'~/Data/paraphrasing/untargeted/{summarizer}/no_sample_orig_idxs.parquet')\n",
    "    new_idx_df.to_parquet(f'~/Data/paraphrasing/untargeted/{summarizer}/no_sample_new_idxs.parquet')\n",
    "    mismatches = new_idx_df.progress_apply(lambda x: len(original_idx_df[original_idx_df.id == x.id].iloc[0].idx) != len(x.idx) or (original_idx_df[original_idx_df.id == x.id].iloc[0].idx != x.idx).any(), axis=1)\n",
    "    matches = new_idx_df.progress_apply(lambda x: len(original_idx_df[original_idx_df.id == x.id].iloc[0].idx) == len(x.idx) and (original_idx_df[original_idx_df.id == x.id].iloc[0].idx == x.idx).all(), axis=1)\n",
    "    match_percents = original_idx_df.progress_apply(count_matches, new_idx_df=new_idx_df, axis=1)\n",
    "    match_percents_df = pd.DataFrame(zip( original_idx_df.id, match_percents), columns=['id', 'percent_match'])\n",
    "    match_percents_df['summarizer'] = summarizer\n",
    "    \n",
    "    if summarizer != summarizers[0]:\n",
    "        old_match_percents = pd.read_csv('~/Data/paraphrasing/untargeted/no_sample_percent_matches.csv', index_col=0)\n",
    "        match_percents_df = pd.concat((old_match_percents, match_percents_df))\n",
    "    \n",
    "    match_percents_df.to_csv('~/Data/paraphrasing/untargeted/no_sample_percent_matches.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">percent_match</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>summarizer</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>azure</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.340714</td>\n",
       "      <td>0.380371</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.6</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bart</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.013560</td>\n",
       "      <td>0.074640</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>matchsum</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.222611</td>\n",
       "      <td>0.326837</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pegasus</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.016867</td>\n",
       "      <td>0.083431</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>presumm_abs</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.202350</td>\n",
       "      <td>0.296646</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>presumm_ext</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.122280</td>\n",
       "      <td>0.240968</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>textrank</th>\n",
       "      <td>11481.0</td>\n",
       "      <td>0.051145</td>\n",
       "      <td>0.171864</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            percent_match                                             \n",
       "                    count      mean       std  min  25%  50%  75%  max\n",
       "summarizer                                                            \n",
       "azure             11490.0  0.340714  0.380371  0.0  0.0  0.2  0.6  1.0\n",
       "bart              11490.0  0.013560  0.074640  0.0  0.0  0.0  0.0  1.0\n",
       "matchsum          11490.0  0.222611  0.326837  0.0  0.0  0.0  0.4  1.0\n",
       "pegasus           11490.0  0.016867  0.083431  0.0  0.0  0.0  0.0  1.0\n",
       "presumm_abs       11490.0  0.202350  0.296646  0.0  0.0  0.0  0.4  1.0\n",
       "presumm_ext       11490.0  0.122280  0.240968  0.0  0.0  0.0  0.2  1.0\n",
       "textrank          11481.0  0.051145  0.171864  0.0  0.0  0.0  0.0  1.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "match_percents_df.groupby('summarizer').describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "unmatched = pd.read_csv('../data/paraphrasing/untargeted/no_sample_percent_matches.csv', index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "sort_order = {k:i for i, k in enumerate(summarizers)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'textrank': 0,\n",
       " 'matchsum': 1,\n",
       " 'presumm_ext': 2,\n",
       " 'azure': 3,\n",
       " 'presumm_abs': 4,\n",
       " 'pegasus': 5,\n",
       " 'bart': 6}"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sort_order"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "match_percents_df = pd.read_csv('../data/paraphrasing/untargeted/no_sample_percent_matches.csv', index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "match_percents_df = match_percents_df.sort_values(by='summarizer', key=lambda x: x.map(sort_order))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">percent_match</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>summarizer</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>bart</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.308651</td>\n",
       "      <td>0.289780</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.6</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>matchsum</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.629713</td>\n",
       "      <td>0.304236</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pegasus</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.308216</td>\n",
       "      <td>0.286093</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.6</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>presumm_abs</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.539547</td>\n",
       "      <td>0.318809</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>presumm_ext</th>\n",
       "      <td>11490.0</td>\n",
       "      <td>0.227694</td>\n",
       "      <td>0.223960</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>textrank</th>\n",
       "      <td>11481.0</td>\n",
       "      <td>0.637357</td>\n",
       "      <td>0.284223</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            percent_match                                             \n",
       "                    count      mean       std  min  25%  50%  75%  max\n",
       "summarizer                                                            \n",
       "bart              11490.0  0.308651  0.289780  0.0  0.0  0.2  0.6  1.0\n",
       "matchsum          11490.0  0.629713  0.304236  0.0  0.4  0.6  0.8  1.0\n",
       "pegasus           11490.0  0.308216  0.286093  0.0  0.0  0.2  0.6  1.0\n",
       "presumm_abs       11490.0  0.539547  0.318809  0.0  0.2  0.6  0.8  1.0\n",
       "presumm_ext       11490.0  0.227694  0.223960  0.0  0.0  0.2  0.4  1.0\n",
       "textrank          11481.0  0.637357  0.284223  0.0  0.4  0.6  0.8  1.0"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "match_percents_df.groupby('summarizer').describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmMAAAF2CAYAAAAiO5/IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAx8UlEQVR4nO3deXQUZdr+8Ss7YQlZSMKm6IiACwEEJIKDIGR44WUVcBAh4iguGBhxRFBBEEEQUJa4wqiogGMQNLKIiIg6QEAyKoyyBdmXrJBASEKn+/n9wY9+DZDQkG4qy/dzDueka3nq7ruquy+qKxUvY4wRAAAALOFtdQEAAACVGWEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAv5Wl1AaRw/niuHg9ukAQCAssvb20shIdWKnV+uw5jDYQhjAACgXONrSgAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQh4PY6dOnVL37t116NChC+Zt375dffv2VZcuXfT888+rsLDQ0+UAAACUKR4NY7/88ovuu+8+7du376LzR40apXHjxumrr76SMUYJCQmeLAcAAKDM8WgYS0hI0Pjx4xUREXHBvMOHDys/P1/NmzeXJN1zzz1atWqVJ8sBAAAoczz6tyknT55c7Ly0tDSFh4c7H4eHhys1NdWT5Vyxb79do6Sk9SUuc/z4ceXkZBeZVlhYKLu9UMYYGXPh39D08vKSl5eXfHx85etbdFcEBdVUSEhIiduMjm6njh07u/gsyhd6Xn69/Xa8fv75P2re/DY99thwq8sp0zjOy49z++rc/sjPz3P23s/PX6GhYQoJCaF3blSZem7ZHwov7g3kcoSFVXdXOSVKTk7Szt0p8q8WWvKCfkFFHnr7uX7q0XHe4xN5Rifysopd/kxulvz8fHTvvX1c3EL5kpycpH0pO1QnqEqxy1SVVDXw/Km+uuLD2patgrTsYmcfzcmv0D13l61bf1Je3mlt3PhvjRv3nNXllGnJyUnau2uHwv0Dil3GT1LY+RN9fM7+c8X577XZJ3Qq+0Sxi6efKeA4v4jk5CTt2bNHocGRCqpeS0Hnf/w4pD179tA7N0pOTtLve3YpNLiGalYPUM3q571OHPn6fc+uCtFzy8JYZGSkMjIynI/T09Mv+nVmSTIzT8nhuDDUuZvNZpd/tVDVvqW7x7flqmO/LpfNZld6+kmrS/EIm82uOkFV9OgdDawuxemdjfsrdM/d5Y+vSXpVMpvNrnD/AP213jVWl+L0yeGDHOcXYbPZFRocqf+5e1Cxy6xau4DeudHZntdQ9w5til1m+bpN5aLn3t5eJZ5AsuzWFvXq1VNAQICSk5MlSZ9//rnat29vVTkAAACWuOphbOjQodq2bZskacaMGZoyZYq6du2qvLw8xcbGXu1yAAAALHVVvqZcu3at8+d58+Y5f27SpIk+/fTTq1ECAABAmcQd+AEAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALCQR8PYsmXL1K1bN8XExGjhwoUXzP/111/Vt29f9ezZU48++qhycnI8WQ4AAECZ47EwlpqaqpkzZ2rRokVKTEzUJ598opSUlCLLTJ48WSNGjNAXX3yh66+/Xu+++66nygEAACiTPBbGNmzYoOjoaAUHB6tq1arq0qWLVq1aVWQZh8Oh3NxcSVJeXp6qVKniqXIAAADKJC9jjPHEwO+8845Onz6tkSNHSpIWL16srVu36qWXXnIu8/PPP+vBBx9UtWrVFBgYqISEBIWEhLi8jczMU3I4ii//22/XKClp/ZU/if9v//59yj9jk3+1sFKP5S5ncjNVxd9PDRpc55bxoqPbqWPHzqUex509N7Z81QkqOwH9aE6+vPyqlLmeW+3cPj9+/LhycrKVl3faOc/Pz1+hoWEKCQmpMM9Xcu9xbs/PV0RAgBuqco+0ggL5VKncx/nF9u/+/ftUWGhXaHBksetlnUiVr6/PBb0rjz242orv+RmFBdcodr3MEyfl6+tf5nvu7e2lsLDqxc739dSGL5bxvLy8nD/n5+fr+eef1wcffKCoqCi9//77Gj16tObOnevyNkp6YpKUnJykHbt2y6eK6wHvoryqyztAKix0lG4cN/IOCNEZSbsPZJR6LHv+cfn5+ejee/uUeqzk5CTt271dtav7lWqcSH9J/r4y9sJS1+Qutav5SipU/tGUSy57KcdO2dzWc6slJycpZe8uVQ+vqYCwQAUosMj8Ap1Ryt5dFeb5Smef8+87t6uWT+neQkMkycdHjsKyc5zX8vGRbDblpOwu9VgZ9sJyud+Tk5O0J2WPagaFO6dVCwyVJNntxX8O1KxxdvmMtP+7/jk7J71c9uBqO9vznQqpUc05rUYVX0m+chTai10vpHpVSVJW6iHntOMnc8tdzz0WxiIjI7Vlyxbn47S0NEVERDgf79q1SwEBAYqKipIk/fWvf9Xs2bMvaxuXOjNms9nlUyVE1f8Uc5nVVy6nfv9aNptd6eknSz2WzWZX7ep+eqhFLTdUVnG9+1OG23puNZvNrurhNdWyf7til0levL7CPF/p7HOu5eOre4JDrS6lTFt6Iqtc7nebza6aQeG6K/reUo/1XVJCuezB1Waz2RVSo5o6tbm11GN9s+m/Za7nlzoz5rFrxtq2bauNGzcqKytLeXl5Wr16tdq3b++c36BBAx07dky///67JOmbb75R06ZNPVUOAABAmeTRM2MjR45UbGysbDab+vXrp6ioKA0dOlQjRoxQ06ZNNWXKFD355JMyxigsLEwvv/yyp8oBAAAokzwWxiSpR48e6tGjR5Fp8+bNc/5811136a677vJkCQAAAGUad+AHAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAAL+bqyUEZGhhYvXqzMzMwi08eOHeuRogAAACoLl8LYyJEjFRQUpCZNmsjLy8vTNQEAAFQaLp8Z++ijjzxdCwAAQKXj0jVj4eHhOnHihIdLAQAAqHxKPDM2adIkSZKPj4/69++vtm3bys/Pzzn/UteMLVu2TG+99ZZsNpuGDBmi+++/v8j833//XePHj1d2drbCw8P12muvqWbNmlf6XAAAAMqdEs+MBQcHKzg4WC1btlSvXr0UHh7unBYcHFziwKmpqZo5c6YWLVqkxMREffLJJ0pJSXHON8bo8ccf19ChQ/XFF1/opptu0ty5c93ypAAAAMqLEs+MxcXFSZJsNpu+/PJL9ezZU+np6VqxYoViY2NLHHjDhg2Kjo52hrYuXbpo1apVzjF//fVXVa1aVe3bt5ckPfbYY8rJySnt8wEAAChXXLqAf+LEicrNzVXPnj3l7e2t5ORkHTp0qMSvKdPS0hQeHu58HBERoa1btzofHzhwQLVq1dLo0aP122+/qVGjRho3blwpnsrFnTyRoTOHf3X7uBVJwYkM6dpabhsvNStHG383bhuvIkrNOqkGdayuoqhvv12jpKT1JS5z/Phx5eRkF5lWUJAvh3Fo3Rsri13PbitUjtdxPfbYg0WmBwXVVEhISInbjI5up44dO1+i+qsvPSdHP9kdVpdRpqXnnlKQm8Zy5fh0xcWO4fMVFOTL4TBKXP16qbdXWGhT1okjFxz753PlteAqd71mrnbPjcOhT9dsKvX2CgvtysjeUa567lIY++mnn7R8+XJJUlhYmGbPnq1evXqVuI4xF34Y//G2GIWFhdq8ebMWLFigpk2batasWZo6daqmTp3qcvFhYdVLnO/n5+PyWJWdn5+PwsNruGUcuMZdPXeX5OQk7dqzUwGhgSUvGFT06oYAVb3ibebopHKOnyx2fkFWnvz8fHTvvX2ueBuewHHuOncd58nJSdq9O0XVAkNLX5NPyRHRr6q7IqTr8nKN8nKzSj1Obl6W214zyclJStm1XdUD/Us9VsAlfl0wINCv5AUua2Pnoo29xMUKTmXp2KnS9/xU3plS99ylMGaz2XTmzBn5+5/dIYWFhZdcJzIyUlu2bHE+TktLU0REhPNxeHi4GjRooKZNm0qSunfvrhEjRlxW8ZmZp+RwFH8Gxmazq0ZwLVWvd8tljVvZnCo4IpvNrvT04j8UXWWz2RUZGqQ7/uS+M20V0W/ZXm7rubvYbHYFhAaqfpfGVpfidOirnWWuT9LZXoUHBalFcOmDQUW218fbre8t1QJDFXVjNzdUVnFt3b3SrT2vHuivVg0jLr1wJbYlJe2SPff29irxBJJLt7bo0KGDHnroIX366adasmSJHn30Uee1XsVp27atNm7cqKysLOXl5Wn16tVF1mnRooWysrK0Y8cOSdLatWt1yy2EJgAAULm4dGbsmWee0cKFC/XNN9/I19dXMTExuu+++0pcJzIyUiNHjlRsbKxsNpv69eunqKgoDR06VCNGjFDTpk31xhtvaOzYscrLy1Pt2rU1bdo0tzwpAACA8sKlMPbJJ58oNja2yG9Qzp07V4888kiJ6/Xo0UM9evQoMm3evHnOn5s1a6ZPP/30cuoFAACoUEoMYx9//LHy8/M1f/58FRQUOKfbbDZ99NFHlwxjAAAAKFmJYczX11e7du1Sfn6+du3a5Zzu4+PjkdtQAAAAVDYlhrH+/furf//+WrNmjTp3Lnv3+QEAACjvXLpm7LbbbtP8+fOVm5srY4wcDof279+vV1991dP1AQAAVGguhbEnn3xSVapUUUpKitq2basNGzaoZcuWnq4NAACgwnPpPmNHjhzR3Llz1b59ew0aNEgff/yxDhw44OnaAAAAKjyXwlitWmfvpn7ddddp165dioyMdOku/AAAACiZS19ThoWF6Z///KeaN2+u+Ph4Va9eXadOnfJ0bQAAABWeS2fGJk6cKH9/f7Vq1Uq33nqr5syZo6efftrTtQEAAFR4Lp8ZO3f3/VGjRmnUqFEeLQoAAKCycCmMrVy5UnPmzFF2dnaR6Rs3bvRIUQAAAJWFS2Fs+vTpGjt2rK699lpP1wMAAFCpuBTG6tWrp06dOnm6FgAAgErHpTDWu3dvvfLKK2rfvr18ff9vldatW3usMAAAgMrApTC2fv16bdiwQf/+97+LTF+2bJlHigIAAKgsXApjW7du1ffff6+AgABP1wMAAFCpuHSfsfDwcO64DwAA4AEunRm79tpr1atXL7Vt21b+/v7O6WPHjvVYYQAAAJWBy2GM21oAAAC4n0thLC4urth5gwYN0oIFC9xWEAAAQGXi0jVjJeEPhgMAAFy5UocxLy8vd9QBAABQKZU6jAEAAODKEcYAAAAsRBgDAACwUKnDmDHGHXUAAABUSi6FsYcffviCaffee68kaeHChe6tCAAAoBIp8T5jI0aM0N69e3Xw4EH16NHDOb2wsFDe3mdzXLVq1TxbIQAAQAVWYhh75plndPjwYY0bN07jxo1zTvfx8dGNN97o8eIAAAAquhLDWP369VW/fn2tWrXKeSYMAAAA7uPSn0P66quvNH36dGVnZ8sYI2OMvLy89J///MfT9QEAAFRoLoWxWbNmacyYMbr55pu54z4AAIAbuRTGgoKC9Je//MXTtQAAAFQ6Ll0I1qxZM3333XeergUAAKDScenM2HfffacFCxbIz89Pfn5+XDMGAADgJi6Fsfnz53u4DAAAgMrJpa8p69Wrp23btikhIUGhoaH66aefVK9ePU/XBgAAUOG5FMbmzp2rjz/+WKtWrVJ+fr5ef/11vfHGG56uDQAAoMJzKYytWLFC8+bNU2BgoEJCQpSQkKDly5d7ujYAAIAKz6Uw5uvrK39/f+fjoKAg+fq6dLkZAAAASuBSoqpTp47WrVsnLy8vnTlzRu+++y7XjAEAALiBS2Fs3LhxeuaZZ7Rz5041b95czZo104wZMzxdGwAAQIXnUhiLjIzUm2++KW9vb9ntdhUUFCgsLMzTtQEAAFR4Ll0ztnLlSvXp00eBgYFKT09X9+7dtXbtWk/XBgAAUOG5FMbefvttffjhh5Kk66+/XkuXLlV8fLxHCwMAAKgMXApjDodDtWvXdj6uU6eOHA6Hx4oCAACoLFwKY6GhofrXv/6lwsJC2e12ffrpp6pVq5anawMAAKjwXApjEydOVEJCgpo1a6aoqCglJCRowoQJHi4NAACg4nPptylTUlK0dOlSZWdny8fHR9WrV/d0XQAAAJWCS2fGZs6cKUmqWbMmQQwAAMCNXApjjRo10ltvvaUff/xRv/76q/PfpSxbtkzdunVTTEyMFi5cWOxy69at09133+161QAAABWES19T/vLLL/rll1+0ePFi5zQvLy998803xa6TmpqqmTNnaunSpfL399eAAQPUpk0bNWzYsMhyGRkZeuWVV66wfAAAgPLNpTB2JTd43bBhg6KjoxUcHCxJ6tKli1atWqW4uLgiy40dO1ZxcXF69dVXL3sbAAAA5Z1LYSw3N1evvvqq9uzZo9mzZ+u1117T6NGjVa1atWLXSUtLU3h4uPNxRESEtm7dWmSZDz/8UDfffLOaNWt2heVfmj3/uE79/rXHxq8I7PnHJbnvViXHTtn07k8ZbhuvIjp2yqbr3DTWt9+uUVLS+lKPs3//PhUU5uvQVzvdUJV7FGSd1v6cfZoy5UW3jBcd3U4dO3Z2y1gA4C4uhbFJkyYpIiJCmZmZCggI0KlTp/TCCy+UeDbLGHPBNC8vL+fPu3bt0urVqzV//nwdO3bsCkqXwsJK/mWCmJhO8vPzuaKx/+j333/X6fwz8q9Wdv4e55ncTFWt4q8//elPbhgtUh06dFB4eI1SjxQT00nr3NRze0Ge6gRVKfVY7nI0J18+AYFu6XkNyW09T05O0s49O+QbHFC6gapLPgpQocNe6prcxSc4QIUy2pO5t9RjFZ4okJ+fj+69t0+px3LH+0pl4efn45bjnJ67jp5ffaXtuUthbPv27ZoyZYq+++47BQYGasaMGerevXuJ60RGRmrLli3Ox2lpaYqIiHA+XrVqldLT09W3b1/ZbDalpaVp4MCBWrRokcvFZ2aeksNxYeg7p1WrO9Wq1Z0uj1ecKVNe1N4jWap9S8nP+Wo69utyXVM3VE8/PdZtY6annyz1GO7seUHaPj16R4NSj+Uu72zcr4CIBmWu5zabXb7BAQrpUN8NFVVcx9cdks1md1vP4Rp6fvXR86vvUj339vYq8QSSS79N6e1ddDG73X7BtPO1bdtWGzduVFZWlvLy8rR69Wq1b9/eOX/EiBH66quvlJiYqLlz5yoiIuKyghgAAEBF4FIYa926taZPn678/Hz98MMPiouLU5s2bUpcJzIyUiNHjlRsbKx69+6t7t27KyoqSkOHDtW2bdvcUjwAAEB559LXlE8//bTmzp0rY4xeeuklde3aVcOGDbvkej169FCPHj2KTJs3b94Fy9WvX/+KfmMTAACgvHMpjB08eFBr167VwYMHZYxRcnKyMjMzVbduXU/XBwAAUKG59DXls88+q/79+ztv/tqlSxc9//zznq4NAACgwnMpjOXl5WnAgAHy8/OTv7+/Bg8erIwM7iMFAABQWi6FsWuuuUb/+c9/nI937dql+vX5NXoAAIDScumasdTUVA0ePFiNGzeWr6+vfvvtN4WHhzsvzl+2bJlHiwQAAKioXApjzzzzjKfrAAAAqJRcCmO33367p+sAAAColFy6ZgwAAACeQRgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEIeDWPLli1Tt27dFBMTo4ULF14wf82aNerVq5d69uypYcOGKTs725PlAAAAlDkeC2OpqamaOXOmFi1apMTERH3yySdKSUlxzj916pQmTJiguXPn6osvvlDjxo0VHx/vqXIAAADKJI+FsQ0bNig6OlrBwcGqWrWqunTpolWrVjnn22w2TZgwQZGRkZKkxo0b6+jRo54qBwAAoEzy9dTAaWlpCg8Pdz6OiIjQ1q1bnY9DQkLUuXNnSVJ+fr7mzp2rwYMHe6qcUjuTm6Vjvy63ugynM7lZkkKtLsOjjubk652N+60uw+loTr6ui7C6iovLyciWfbuP1WWUabkZ2VKY+8bLsBdq6Yks9w1YAWXYCxXkxvGOH0/X3oNbL71gJXb8eLrq1nffZ0PWiWylHHLbcBVS1ols1b6mdGN4LIwZYy6Y5uXldcG0kydPatiwYWrSpIn69OlzWdsIC6t+xfVdjpiYTvLzK/mDLisrSydOnCgyLS8vTw6HkY+vf7Hr2QvPyNvbS4GBgUWmBwcHKzS0pBdUuDp06KDw8BqXKr9cionppHVX2HNjHArwLf7QLigslJeX92X3/MZ6KpM9v9Sxif/j5+fjlv3nyvHpiosdw+fLy8uTcTjk71367Z1x2OXlfeGxf75Lv/+4Jkzue81wnLvOXcc5PXddaXvusTAWGRmpLVu2OB+npaUpIqLoaYW0tDQ99NBDio6O1nPPPXfZ28jMPCWH48LQ526tWt2pVq3uvOz1pkx5UUczTqrZXQOKXeaX7/6lOrVq6Nlnx19RbenpJ69ovbKuND0vPHFUw7u2LnaZ+C9/lG9wnQrTc5vNrqBaNRVyUx2rSynTfFLtstnsbtl/V3p8XokpU17U6UMHNahRk1KPtWDXDlWtf80VH/tXyh09t9nsCgkJ1/XXRLmhoorrZP4htx3nNptdocE11bB+Gf1KoIw4ka9L9tzb26vEE0geu2asbdu22rhxo7KyspSXl6fVq1erffv2zvl2u12PPfaYunbtqueff/6iZ80AAAAqOo+eGRs5cqRiY2Nls9nUr18/RUVFaejQoRoxYoSOHTum3377TXa7XV999ZUk6dZbb9XkyZM9VRIAAECZ47EwJkk9evRQjx49ikybN2+eJKlp06basWOHJzcPAABQ5nEHfgAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALCQR8PYsmXL1K1bN8XExGjhwoUXzN++fbv69u2rLl266Pnnn1dhYaEnywEAAChzPBbGUlNTNXPmTC1atEiJiYn65JNPlJKSUmSZUaNGady4cfrqq69kjFFCQoKnygEAACiTfD018IYNGxQdHa3g4GBJUpcuXbRq1SrFxcVJkg4fPqz8/Hw1b95cknTPPfdozpw5GjhwoKdKssSp7DT98t2/SpyvWjWuYkUV3+Gsk4r/8scS5zcIrnMVK/K8whMFOr7ukNVllGmFJwqkMKuruDKpeae1YNcOt4xzvRvqsUpuXpa27l5pdRllWm5elqRQt413Ku+MtqSkuW28iuhU3plSj+GxMJaWlqbw8HDn44iICG3durXY+eHh4UpNTb2sbYSFVS99oR4UE9NJfn4+ysrK0okTJ5SXlyeHwyFJ8vf3V3h4uK5t0kgdOnRQeDiBzB1iYjpp3bqSe96wcf0K1fNzx1lpnetZSWw22wWXExhjZIy55PheXl7y8vIqMs3X11d+fn4lrhccHKzQUDd8uNRWudzvMTGdtO68/XtuX53bH+eOcelsn/38/OTn53dB72qqfPZA4jh3Xbjb9jE9d11pe+6xMHaxBv6xWZea74rMzFNyOC69o6zSqtWdatXqTpeWTU8/6eFqKofK2PPLec4of/vdE/u3vPVA4ji/XO7Yx/T88pTUc29vrxJPIHnsmrHIyEhlZGQ4H6elpSkiIqLY+enp6UXmAwAAVAYeC2Nt27bVxo0blZWVpby8PK1evVrt27d3zq9Xr54CAgKUnJwsSfr888+LzAcAAKgMPHpmbOTIkYqNjVXv3r3VvXt3RUVFaejQodq2bZskacaMGZoyZYq6du2qvLw8xcbGeqocAACAMsnLuHJ1XBlV1q8ZAwAAsOyaMQAAAFwaYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQoQxAAAACxHGAAAALOSxPxR+NXh7X94fFgcAALjaLpVXyvUd+AEAAMo7vqYEAACwEGEMAADAQoQxAAAACxHGAAAALEQYAwAAsBBhDAAAwEKEMQAAAAsRxgAAACxEGAMAALAQYcwFJ0+e1LBhwy57va1bt2r69OluqWHp0qUaM2aMW8aqSObMmaMtW7YUO3/Tpk0aPHjwVawIVnPn6w5FNW7c2OoSUMGV9j27vL7fE8ZckJ2drR07dlz2eikpKcrMzPRARTjnxx9/lN1ut7oMlCG87oDKa/PmzVaXcEXK9R8Kv1omTZqktLQ0PfHEE4qJidEHH3wgh8OhW265RePHj1dKSoqGDh2q5cuXy9vbW3369NGbb76pOXPm6PTp03rrrbcUGRmpzz77TCdOnFDHjh3VvXt3vfTSSzp9+rSysrL04IMPKjY2VvHx8UpNTdX+/ft1+PBh9e/fX48//niRel5++WVlZGRo+vTp8vHxsagr7rNp0ya9/fbbMsbowIED6tKli2rUqKE1a9ZIkubOnatVq1YpMTFReXl58vLy0qxZs7Rt2zb997//1dixY/X666/L4XDohRdeUH5+vmrWrKkZM2ZIkrKysjR06FAdOHBA119/vebMmaMzZ87oqaeeUkZGhiTpiSeeUKdOnTR48GDFxcWpTZs2OnTokGJjY7V27VqNGTNGgYGBSk5O1smTJ/Xcc88pMTFRO3bsUOfOncvcWctNmzYpPj5evr6+Onr0qKKiovT4449r2LBhCgkJUUBAgN59911NmzZNmzdvlt1u1z333KMhQ4bo2LFjevrpp3X69Gl5e3tr7Nixat68ue6++259+OGHql+/vjZt2qTXX39dH330kQYPHqybbrpJGzduVH5+vsaOHauPPvpIKSkpGjJkiIYMGaL4+HgdOXJEO3fuVGZmpp588kklJSXpl19+UZMmTTRz5kx5eRX/h3Q///zzK3rdnf/aod+u9XvmzJnauHGjsrOzFRISovj4eIWHh0uSxo0bp61btyokJEQvv/yy6tatq/fff1+fffaZvL29FRUVpYkTJ16Vvl+Ji+2ryZMna+XKlRccYwEBAVq5cqXmzJmjwMBA3XzzzbLb7Zo6daq+/PJLvf/++8rPz1dBQYEmTZqk1q1bX7QXS5cu1ebNmzV16lRJcr7PNGjQ4KL7vrI7fvy4HnroIaWlpSkqKkrjx49XQkLCBZ8BN9xwg+6++25FRUVp+/btuuOOOyRJ/fv31+LFiy1+FpfJ4JIOHjxoOnbsaHbt2mXuu+8+k5+fb4wxZsaMGeaNN94wxhgze/ZsM3r0aPPUU0+Zd955xxhjzJIlS8zo0aOdP8fExBibzWaMMWbSpElmw4YNxhhjDhw4YJo3b26MMWbOnDmmX79+pqCgwGRkZJjmzZub7Oxs51hz5swxw4cPd45TESQlJZkWLVqYI0eOmNOnT5vmzZubjz/+2BhjzJgxY8z8+fPNAw88YPLy8owxxsyaNctMnDjRGGPMoEGDTFJSkjHGmG7dupm1a9caY4xZuHChmTp1qklKSjLNmzc3Bw4cMHa73fTt29d8++23ZunSpWbChAnGGGNSUlLM1KlTLxjv3H43xpjRo0ebYcOGGWOMWbp0qWnZsqXJyMgwJ0+eNC1atDA5OTlXo1UuS0pKMk2bNjV79uwxDofDDB8+3Lz33numUaNG5uDBg8YYYxYtWmRefvllY4wxBQUFZtCgQebHH3808fHxZt68ec5x/vnPfxpjjOnYsaNz3aSkJDNo0CBjzNmeTZ482RhjTHx8vOncubM5ffq0OXTokGnVqpUx5uxxfc899xibzWY2bdpkmjRpYnbv3m1sNpuJiYkx27dvL/a5lOZ1d7VUpH7v27fPxMXFGbvdbowxZtSoUebdd981xhjTqFEjk5iYaIwxZsGCBWbYsGHGZrOZNm3amDNnzhi73W5eeOEFc+zYMfc1180utq/efPPNix5jmZmZpl27dubYsWPGbrebJ554wowePdrY7XYTGxtrMjMzjTHGLF682Dz66KPF9uL8Y/Lc+0xx+74yS0pKMs2aNTN79+41DofD/P3vfy/xM6Bjx45myZIlzvUbNWpkSd2lxZmxy7Bp0ybt379f9957ryTJZrPp5ptvliQ9/vjj6tu3r6pUqVLs9So333yzfH3PtnzMmDH64Ycf9M4772jnzp06ffq0c7k2bdrI399fYWFhCg4O1smTJyVJ33//vbKysvTpp586x6koGjVqpDp16kiSQkJCnP/DqVu3rnJycvTqq69qxYoV2rdvn3744QfddNNNRdbPyspSenq6OnbsKEkaOHCgpLP7rEmTJrrmmmskSTfccIOOHz+uFi1a6LXXXlNqaqo6dOigJ5544pI1tm/f3lnTjTfeqLCwMElScHCwsrOzVaNGDTd0wn1at26tP/3pT5KkXr16KSEhQWFhYapfv74kaePGjdq+fbuSkpIkSadPn9bOnTt1xx13aPjw4dq+fbvuuusuDRo06JLb+mNvmjVrpsDAQNWrV085OTnOZdq1aydfX1/VrVtX4eHhatiwoSQpMjJS2dnZxY5d2tfd1VJR+t2gQQONHj1aixcv1t69e/Xzzz/r2muvlSRVqVJFPXv2dD7HWbNmydfXVy1atFC/fv3UqVMn3X///YqMjLzc9l1V5++r4cOHKyQk5IJjbMuWLWrRooXz+fTu3Vtr1qyRt7e33njjDa1du1Z79+7V5s2b5e3tfdm9uJJ9Xxm0atVK1113nSSpR48eWrp0aYmfAc2aNbOoUvepWJ/oHma329W1a1eNHTtWkpSbm+u8XunkyZPKzc1Vbm6uTpw4odDQ0AvWr1KlivPnJ598UkFBQerYsaO6deumFStWOOcFBAQ4f/by8pIxRpJUr149jRw5UhMnTtS//vUveXtXnEv+/Pz8ijz+49evR48e1V//+lcNGjRI7du3V61atbR9+/YS1y8oKFBaWpokFQmu5/p53XXX6csvv9QPP/ygb7/9Vu+9956+/PJLSXL2u7CwsNhtlIcw/MceGmPk4+NT5Bi02+0aNWqU/vKXv0g6G2irVq2qKlWqaMWKFVq3bp1Wrlypzz77TO+//75zHOnKenOl/Svt6+5qqSj9/u9//6t//OMfGjJkiLp06SJvb29nHX98zzHGOMd988039fPPP+v777/Xww8/rBkzZuj22293eZtX2/n7qrhjbPPmzXI4HBesn5ubq759+6pXr15q3bq1GjdurIULF0q6eC/++D4unQ17ktSyZcti931l9sfj1RijnJycEj8D/viZWV5VnE9zD/L19VVhYaHatGmjr7/+WpmZmTLGaMKECfrggw8kSS+++KIGDRqkgQMH6sUXX5R09gV//pvoOevXr9eIESPUuXNn/fjjj5J0yQvRb7jhBvXv31+BgYHOF35lsG3bNjVo0EBDhgxRs2bN9P333zt75ePjI7vdrho1aqh27dpav369JCkxMVGzZ88udswFCxYoPj5eXbt21fjx45WVlaWTJ08qJCREKSkpkuS8Zq28Sk5OVmpqqhwOhz7//HPn2ZRzoqOjlZCQIJvNptzcXA0cOFC//PKLpk2bpsTERPXp00cvvPCCfvvtN0kq0ptvvvnmqj0Pd77uPKmi9PvHH3/U7bffrvvuu08NGzbU+vXrna+306dPO2tZsmSJ2rZtq6ysLHXt2lWNGjXS3//+d7Vr1047d+68avVeifP31XPPPXfRY+y2227Ttm3blJaWJmOMVq5cKS8vL+3bt0/e3t567LHHFB0d7XxPKq4XISEh2rNnj4wxOnjwoLM/xe37yi45OVlHjhwp8loq7jPgfFa9/kur7P/3vgwICwtT3bp1NXnyZMXFxemBBx6Qw+HQTTfdpEceeUQrV67UwYMH9dprr8kYo759+2rlypWKiorS66+/rhkzZjhPiZ8zfPhwDRw4UEFBQbr++utVr149HTp0yKV6JkyYoPvuu08xMTGqXbu2J55ymXLnnXdqx44d6tatm/z9/RUVFaXdu3dLkv785z9r/PjxeuWVVzR9+nRNmDBB06ZNU0hIiKZNm6a9e/dedMzevXvrqaeeUo8ePeTr66u4uDgFBQXp4Ycf1pgxY7RkyRJ16tTpaj5Nt4uIiNAzzzyj1NRUtWvXTm3bttXcuXOd8wcMGKD9+/erT58+Kiws1D333KM2bdro2muv1T/+8Q999tln8vHx0fjx4yVJI0aM0EsvvaTXX39dd95551V7Hk2aNLni193TTz991eqsKP3u1q2b4uLi1KNHD/n5+alx48bO96agoCCtWbNGs2fPVmRkpKZMmaLQ0FANGDBA/fr1U2BgoOrUqaM+ffpctXqvxPn7atCgQapateoFx1hAQIDGjh2rv/3tb/L391f9+vUVFBSkJk2a6KabblLXrl1VpUoVtW7dWkeOHCm2F/7+/lqyZIn+53/+R9dff71atmwp6eyF/Bfb95Vdw4YN9dxzzyk9PV3R0dEaMGCANmzYcNHPgPN16tRJvXr10tKlS8vVGTMv88dzpwAqhD/+9h08j36XH5ezr44fP66PPvpIcXFx8vb21qRJk9SgQYNyey8rlF2cGQNguQMHDmj48OEXnTdp0iQ1bdr0KldUsdFv1wQHBysnJ0fdu3eXj4+PbrnlFudF/oA7cWYMAADAQlzADwAAYCHCGAAAgIUIYwAAABYijAFAKQwdOtR5PzAAuBJcwA8AAGAhbm0BoMzJzc3Vs88+q/3798vb21u33HKL/vd//1eTJ0/W8uXLJZ29X9RLL72k5cuXKz4+XgcOHNDBgweVlpamqKgotWvXTp9//rkOHTqkUaNGqXv37i4vl5GRoRdeeEGZmZlKT09XvXr1NGvWLIWFhenuu+9WVFSUdu7cqaeeekpTpkzR7NmztXXrViUkJDifw549e/Twww/rySef1Nq1a/XWW2/JZrOpSpUqGj16tFq0aKH4+Hj9/PPPSktLU+PGjTVjxgyrWg7AQoQxAGXO119/rdzcXCUmJsput2v8+PGX/AsVycnJSkxMlJ+fn9q3b6/atWtr4cKFWrNmjaZPn67u3bu7vNyKFSvUvHlzPfLIIzLG6JFHHlFiYqL+9re/SZJuvPFGzZo1S5I0ZcoUSdL999+v+++/X5K0aNEiLVmyRI888oj27dunmTNn6sMPP1RISIh2796tBx98UKtXr5YkHT58WMuXLy8Xf+8UgGfw6gdQ5rRs2VIzZ87U4MGD1bZtWz3wwAPKysoqcZ22bduqRo0aks7+uZs///nPkqRrr71WJ06cuKzlHnjgAW3ZskXvv/++9u3bp927d6tZs2bOMVq1alVsHV9//bXee+89ffzxx6patarWr1+vtLQ0DRkyxLmMl5eXDhw4IElq3rw5QQyo5HgHAFDmXHPNNfr666+1adMmJSUl6cEHH9SAAQP0x0tcbTZbkXX8/f2LPC4u4Liy3PTp07V161b17dtXbdq0UWFhYZFtV61a9aJjJycn68UXX9T8+fMVHh4uSXI4HLrjjjucZ9Ik6ejRo4qIiNDXX39d7FgAKg9+mxJAmbNo0SI9++yzuvPOOzVq1Cjdeeed2r17t44cOaLMzEwZY7RmzRqPbf/f//63HnjgAfXu3VthYWHasGGD7HZ7ieukpKTo73//u1599VU1bNjQOT06Olrr16/Xnj17JEnfffedevbsqYKCAo/VD6B84cwYgDKnd+/e2rx5s7p166bAwEDVrVtXkydP1ttvv62+ffsqPDxcHTp08Nj2n3jiCU2bNk1vvvmmfHx8dNtttzm/VizOlClTZLPZ9MorrziD26233qrJkydr4sSJeuqpp2SMka+vr9566y3OiAFw4tYWAAAAFuJrSgAAAAsRxgAAACxEGAMAALAQYQwAAMBChDEAAAALEcYAAAAsRBgDAACwEGEMAADAQv8P1nCm4A7QlqAAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 720x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.set(rc={'figure.figsize':(10,6)})\n",
    "sns.boxenplot(data=match_percents_df, y='percent_match', x='summarizer')\n",
    "plt.savefig('../paper/figs/paraphrasing_untargeted_exact_match.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_percent_overlap(summarizer):\n",
    "    percent_overlaps = []\n",
    "    orig_idxs = pd.read_parquet(f'../data/paraphrasing/targeted/{s}/no_sample_orig_idxs.parquet')\n",
    "    new_idxs = pd.read_parquet(f'../data/paraphrasing/targeted/{s}/no_sample_new_idxs.parquet')\n",
    "\n",
    "    for x in tqdm(orig_idxs.iterrows(), total=len(orig_idxs)):\n",
    "        id = x[1].id\n",
    "        orig_idx = x[1].idx\n",
    "        percents = []\n",
    "        for new_idx in new_idxs[new_idxs.id == id].idx:\n",
    "            overlap = set(new_idx.tolist()).intersection(set(orig_idx.tolist()))\n",
    "            percent = len(overlap)/max(len(new_idx), len(orig_idx))\n",
    "            percents.append(percent)\n",
    "        percent_overlaps.append((id, np.mean(percent), summarizer))\n",
    "\n",
    "    return percent_overlaps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 79%|███████▉  | 9094/11481 [00:28<00:07, 315.45it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb Cell 14'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000015?line=0'>1</a>\u001b[0m \u001b[39mfor\u001b[39;00m s \u001b[39min\u001b[39;00m summarizers:\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000015?line=1'>2</a>\u001b[0m     percent_overlaps \u001b[39m=\u001b[39m get_percent_overlap(s)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000015?line=2'>3</a>\u001b[0m     overlap_df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(percent_overlaps, columns\u001b[39m=\u001b[39m[\u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mpercent_overlap\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39msummarizer\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000015?line=4'>5</a>\u001b[0m     \u001b[39mif\u001b[39;00m s \u001b[39m!=\u001b[39m summarizers[\u001b[39m0\u001b[39m]:\n",
      "\u001b[1;32m/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb Cell 13'\u001b[0m in \u001b[0;36mget_percent_overlap\u001b[0;34m(summarizer)\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000014?line=7'>8</a>\u001b[0m orig_idx \u001b[39m=\u001b[39m x[\u001b[39m1\u001b[39m]\u001b[39m.\u001b[39midx\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000014?line=8'>9</a>\u001b[0m percents \u001b[39m=\u001b[39m []\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000014?line=9'>10</a>\u001b[0m \u001b[39mfor\u001b[39;00m new_idx \u001b[39min\u001b[39;00m new_idxs[new_idxs\u001b[39m.\u001b[39;49mid \u001b[39m==\u001b[39;49m \u001b[39mid\u001b[39;49m]\u001b[39m.\u001b[39midx:\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000014?line=10'>11</a>\u001b[0m     overlap \u001b[39m=\u001b[39m \u001b[39mset\u001b[39m(new_idx\u001b[39m.\u001b[39mtolist())\u001b[39m.\u001b[39mintersection(\u001b[39mset\u001b[39m(orig_idx\u001b[39m.\u001b[39mtolist()))\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/userbrown/nus/summarization-bias/code/paraphrasing_analysis.ipynb#ch0000014?line=11'>12</a>\u001b[0m     percent \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(overlap)\u001b[39m/\u001b[39m\u001b[39mmax\u001b[39m(\u001b[39mlen\u001b[39m(new_idx), \u001b[39mlen\u001b[39m(orig_idx))\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/common.py:70\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer.<locals>.new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/common.py?line=65'>66</a>\u001b[0m             \u001b[39mreturn\u001b[39;00m \u001b[39mNotImplemented\u001b[39m\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/common.py?line=67'>68</a>\u001b[0m other \u001b[39m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/common.py?line=69'>70</a>\u001b[0m \u001b[39mreturn\u001b[39;00m method(\u001b[39mself\u001b[39;49m, other)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/arraylike.py:40\u001b[0m, in \u001b[0;36mOpsMixin.__eq__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/arraylike.py?line=37'>38</a>\u001b[0m \u001b[39m@unpack_zerodim_and_defer\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m__eq__\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/arraylike.py?line=38'>39</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__eq__\u001b[39m(\u001b[39mself\u001b[39m, other):\n\u001b[0;32m---> <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/arraylike.py?line=39'>40</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_cmp_method(other, operator\u001b[39m.\u001b[39;49meq)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/series.py:5623\u001b[0m, in \u001b[0;36mSeries._cmp_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m   <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/series.py?line=5619'>5620</a>\u001b[0m rvalues \u001b[39m=\u001b[39m extract_array(other, extract_numpy\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, extract_range\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m   <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/series.py?line=5621'>5622</a>\u001b[0m \u001b[39mwith\u001b[39;00m np\u001b[39m.\u001b[39merrstate(\u001b[39mall\u001b[39m\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m-> <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/series.py?line=5622'>5623</a>\u001b[0m     res_values \u001b[39m=\u001b[39m ops\u001b[39m.\u001b[39;49mcomparison_op(lvalues, rvalues, op)\n\u001b[1;32m   <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/series.py?line=5624'>5625</a>\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_construct_result(res_values, name\u001b[39m=\u001b[39mres_name)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36mcomparison_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m    <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=279'>280</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m invalid_comparison(lvalues, rvalues, op)\n\u001b[1;32m    <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=281'>282</a>\u001b[0m \u001b[39melif\u001b[39;00m is_object_dtype(lvalues\u001b[39m.\u001b[39mdtype) \u001b[39mor\u001b[39;00m \u001b[39misinstance\u001b[39m(rvalues, \u001b[39mstr\u001b[39m):\n\u001b[0;32m--> <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=282'>283</a>\u001b[0m     res_values \u001b[39m=\u001b[39m comp_method_OBJECT_ARRAY(op, lvalues, rvalues)\n\u001b[1;32m    <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=284'>285</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=285'>286</a>\u001b[0m     res_values \u001b[39m=\u001b[39m _na_arithmetic_op(lvalues, rvalues, op, is_cmp\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py:73\u001b[0m, in \u001b[0;36mcomp_method_OBJECT_ARRAY\u001b[0;34m(op, x, y)\u001b[0m\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=70'>71</a>\u001b[0m     result \u001b[39m=\u001b[39m libops\u001b[39m.\u001b[39mvec_compare(x\u001b[39m.\u001b[39mravel(), y\u001b[39m.\u001b[39mravel(), op)\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=71'>72</a>\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=72'>73</a>\u001b[0m     result \u001b[39m=\u001b[39m libops\u001b[39m.\u001b[39;49mscalar_compare(x\u001b[39m.\u001b[39;49mravel(), y, op)\n\u001b[1;32m     <a href='file:///Users/userbrown/opt/anaconda3/envs/summarization/lib/python3.9/site-packages/pandas/core/ops/array_ops.py?line=73'>74</a>\u001b[0m \u001b[39mreturn\u001b[39;00m result\u001b[39m.\u001b[39mreshape(x\u001b[39m.\u001b[39mshape)\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "for s in summarizers:\n",
    "    percent_overlaps = get_percent_overlap(s)\n",
    "    overlap_df = pd.DataFrame(percent_overlaps, columns=['id', 'percent_overlap', 'summarizer'])\n",
    "    \n",
    "    if s != summarizers[0]:\n",
    "        old_overlap_df = pd.read_csv('../data/paraphrasing/targeted/no_sample_overlap_percent.csv', index_col=0)\n",
    "        overlap_df = pd.concat((old_overlap_df, overlap_df))\n",
    "    \n",
    "    overlap_df.to_csv('../data/paraphrasing/targeted/no_sample_overlap_percent.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "overlap_df = pd.read_csv('../data/paraphrasing/targeted/no_sample_overlap_percent.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "overlap_df = overlap_df.sort_values(by='summarizer', key=lambda x: x.map(sort_order))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmMAAAF2CAYAAAAiO5/IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7EUlEQVR4nO3dd2AUZf7H8c9uGokEEtJoCtjAQoCfKBE4NALHwYEiqKcIqKfoicChJwIKAirGgoLECvajaJRIFBGplgMCEk9BpQVIqOmFEJKwZX5/cKwgCWw2WSbl/fqH7M48z3zn2ZnZD5PJjMUwDEMAAAAwhdXsAgAAAOozwhgAAICJCGMAAAAmIowBAACYiDAGAABgIsIYAACAiQhjAAAAJvI1u4CqyM8vltPJbdIAAEDNZbVaFBp6XoXTa3UYczoNwhgAAKjV+DUlAACAiQhjAAAAJiKMAQAAmIgwBgAAYCLCGAAAgIkIYwAAACYijAEAAJiIMAYAAGAir4exI0eOqH///tq/f/9p07Zu3arBgwerT58+euKJJ2S3271dDgAAQI3i1TD2888/64477lBaWlq508eNG6fJkyfr66+/lmEYSkhI8GY5AAAANY5Xw1hCQoKmTJmiyMjI06YdOHBApaWl6tixoyRp0KBBWrZsmTfLAQAAqHG8+mzK6dOnVzgtKytLERERrtcRERHKzMz0Zjles2bNSiUnr1V+fr7y8nJlt9tkGIZ8fHwUHh6p0NBQxcR0U2xsL7NLPSdOjEdF8vPzdfhwoeu13W6Xw2GXYRgyjFOfNWqxWGSxWOTj4ytf398310aNGis0NLTCZdTl8f7j+J4YT7vdLpvtmOt9i8UiX18/+fr6njZeNXV8zrbtnM0ft60/OrGtnVDeNvdHJ7bBE/64Lf7R2bbNszHzs2HfNceaNSv10Uf/Vmlpqet7o2/f/vVuHDx18viFh0fopZdeNbukSjPtQeHlHQBPPuC5IyysYXWVUyUpKcnauWuXGjaOVFDjqFOmlTqknbt2yc/PR7fddrNJFZ5bKSnJSkvdpmaNGpQ7PUhSUODJ7/iq0puirVBlWeV/6R46XFqnxzslJVk7d29X4Int3yr5hPjJR34KUGC5bYp1VMWFRyVJJblHauz4pKQka/f2rQr38ezQ5CPprDHI6uNR3y6GIdlsFU/PzdHh3ByPus5x2E39bFJSkpW6M1XBDcMrmMOiBv4hv7/0r/wyjpVKmYcKyp1WdCSnxm6b3pSSkiyn06EWTY+foMjOy1FKSnK9GwdPpaQky2JYFBgQqJycbEVEBJtdUqWZFsaioqKUk/P7ASs7O7vcX2eeSW7uETmdZ/5f7blgsznUsHGkOlx3e7nTf/72I9lsDmVnF53jysxhsznUrFEDPXBtK1OW/9b69Do93jabQ4FhDdXuxk4etd/2+X9r7PjYbA6F+/hqUEgTs0sxRWJBnqmfjc3mUHDDcF0TPdCU5W/cvLjGbpveZLM5FNEkRLf0u0GS9OnS1fVyHDxlszl0QdMWkqTt6ak1ctysVssZTyCZdmuLFi1aKCAgQCkpKZKkxYsXq0ePHmaVAwAAYIpzHsZGjBihLVu2SJJmzJihuLg49e3bVyUlJRo+fPi5LgcAAMBU5+TXlKtXr3b9PHfuXNfP7dq106effnouSgAAAKiRuAM/AACAiQhjAAAAJiKMAQAAmIgwBgAAYCLCGAAAgIkIYwAAACYijAEAAJiIMAYAAGAiwhgAAICJCGMAAAAmIowBAACYiDAGAABgIsIYAACAiQhjAAAAJiKMAQAAmIgwBgAAYCLCGAAAgIkIYwAAACYijAEAAJiIMAYAAGAiwhgAAICJCGMAAAAmIowBAACYiDAGAABgIsIYAACAiQhjAAAAJiKMAQAAmIgwBgAAYCLCGAAAgIkIYwAAACYijAEAAJiIMAYAAGAiwhgAAICJCGMAAAAmIowBAACYiDAGAABgIsIYAAColdasWan09DTtzdivvRn7Xe/VNoQxAABQKyUnr5WcTp0f0ULnR7RQYECD4+/VMr5mFwAAAOCp8yNa6F+3PyRJeumj10yuxjOcGQMAADARYQwAAMBEhDEAAAATEcYAAABMxAX8QDVbs2alEhMTPG5fXFyskpKjFU632WwyDEOZrxzyqH/D7lSm5aDuuOPmcqcHBgbpvPPO86hvSRo06DbFxvbyuD1QX6xZs1LJyWuVnp4mh92mT5euliRl5xYor7BYcXHTFBPTjf2pHCePnRxO14X7+7IOSLnWWjd2hDGgmiUnr9XBzIPyCfLzvJOAiidZA6rQ70kcFbx/xHlUR4oqDoNn7POoTcnJa2vNARAwU3LyWu3etUNNQoIlBcjhsEuSmoQ0lCTt3rVDktifypGcvFbpu3erRXgzSZLT6ZQk1+v03bsl1Z6xI4wBXhB6QbhCr29pdhnnXP43+80uAahVmoQEq//1XcqdtuSbDee4mtqlRXgzjR34QLnTZi1+6xxXUzVcMwYAAGAiwhgAAICJCGMAAAAmIowBAACYyKth7IsvvlC/fv3Uu3dvzZ8//7Tpv/76qwYPHqwbb7xRDzzwgA4fPuzNcgAAAGocr4WxzMxMzZw5UwsWLFBSUpI+/vhjpaamnjLP9OnTNWbMGH3++edq06aN3nnnHW+VAwAAUCN5LYytW7dOMTExCgkJUVBQkPr06aNly5adMo/T6VRxcbEkqaSkRA0aNPBWOQAAADWS1+4zlpWVpYiICNfryMhIbd68+ZR5JkyYoHvuuUfPPvusAgMDlZDg+V3Lz6UTd/49IT09TcdsDv387Uflzn+kIEvpxbmKi5vmeq823RkYAFB15X132O3HKryfWG5BkQqPlPHdofLHznA4K7yf2P6cQ7LkW2vN2HktjBmGcdp7FovF9XNpaameeOIJffDBB4qOjtZ7772n8ePHa86cOW4vIyysYbXUWlkpKcnakZqqwOBwSZK1QYgaNJDsjvLvad4gOEyStC+zQJJUUpQjPz8f3XZb+Y+jqe38/HxUVgNqiIgINm3Z9VlVxr6+j53Etmvm+ntbSkqydqVuV2jw8ceNBTfwleQrp738747QhkGSpLzM4zdTzi8qrtPfHWeSkpKstF271aJJU0lS80aRkiTDcXrWkKQWocfns+Uff5rIgbyMGj12XgtjUVFR2rRpk+t1VlaWIiMjXa937NihgIAARUdHS5L+9re/6ZVXXqnUMnJzj8jpLP+D8CabzaHA4HBddPUgj9rv+iFRNptD2dlF1VxZzWCzVfSgnXNbg1njWxPW30xVGfv6PnYS225dPzaGBp+nnl2u9Kj9qg2/1OnxORObzaEWTZpqdN+/e9Q+/qt3TR07q9VyxhNIXrtmrGvXrlq/fr3y8vJUUlKi5cuXq0ePHq7prVq1UkZGhnb/7/lRq1atUvv27b1VDgAAQI3k1TNjDz/8sIYPHy6bzaZbbrlF0dHRGjFihMaMGaP27dsrLi5OY8eOlWEYCgsL07PPPuutcgAAAGokrz4ofMCAARowYMAp782dO9f183XXXafrrrvOmyUAAADUaNyBHwAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwka/ZBQAAUBesWbNSyclrK5yenp4mu61Mqzb84lH/+UXFKipNU1zctArniYnpptjYXh71D/MQxgAAqAbJyWuVunObGp/XoNzpQX6S/ALkcNg96r9RUIAkKftgWrnTC4tLJYkwVgsRxgAAqCaNz2ug7tFtTFn2fzbvMWW5qDquGQMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABPxbMpyrFmzUsnJayucnp6epjKbXbt+SPSo/5KiHKWXFigublqF88TEdDPtYa9r1qxUYmKCx+1zcrJl2G2atqSg+oqqhIISmywZBRo9+n6P+xg06LYqjb+9oEz53+z3uH1tZS8ok8Kq1sfugny9dfhw9RRUyxQ5HepodhGACdz53jXsDsV/9a5H/R/IOyTLYZ8a+71LGCtHcvJabd+ZKv/zmpQ/g18j+flJdofTo/79gprIKWnPwbxypx8rPv6+WRtFcvJaZWccUEiAZ5tHYx9JPj6S07PxqaqQAB9JTtnyMz1qX1BmV3LyWo/HPyamm0ftTkhPT1OZvVQBTYKq1I8nyvKOKsC3gVq1au1ZB2FVW/+YmG46ePCAx+1zcrLlPGZTIz8/j/uoisM2m6z+fgoPj/CofaCqvv0AtVFy8lql7dql5iFR5U5v1jBckmR4+L1yot9juUfKnX6w4Pj3BWGshvE/r4maXtHflGVn/LrElOWerG2zJrq3U7jZZZjinf/mVKl9bGyvKu3QcXHTlJ6/Vy37tK1SHZ7Y//V2tQq9QBMnTjnny5aqZ+yOpO3R31qcX41Vue/jA/vUsHUb08YPqM2ah0Tpwdjhpiz7jTUfmrLcE7hmDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATuX1ri507d2rt2rWyWq3q0aOHWrdu7cWyAAAA6ge3zowtXLhQw4cP12+//abNmzdryJAhWrp0qbdrAwAAqPPcOjP2/vvva/HixYqKOn4H24MHD+r+++9Xv379vFocAABAXefWmbGGDRu6gpgkNW/eXP7+/l4rCgAAoL5w68xY586dNWXKFN15553y8fFRUlKSWrdurV9//VWSdMUVV3i1SAAAgLrKrTC2YsUKSdL3339/yvujR4+WxWLRqlWrqr8yAACAesCtMLZ69Wpv1wEAAFAvuRXG8vLy9Pnnn6u4uFiGYcjpdCo9PV0vvfSSt+sDAACo09y6gH/s2LFat26dFi1apIyMDC1evFhW69mbfvHFF+rXr5969+6t+fPnnzZ99+7dGjZsmG688Ubde++9KiwsrPwaAAAA1GJuhbGDBw9qzpw56tGjh4YOHaqFCxdq7969Z2yTmZmpmTNnasGCBUpKStLHH3+s1NRU13TDMPTggw9qxIgR+vzzz3XZZZdpzpw5VVsbAACAWsatMBYeHi5Jat26tXbs2KGoqCjZ7fYztlm3bp1iYmIUEhKioKAg9enTR8uWLXNN//XXXxUUFKQePXpIkv7xj3/ozjvv9HQ9AAAAaiW3rhkLCwvT22+/rY4dOyo+Pl4NGzbUkSNHztgmKytLERERrteRkZHavHmz6/XevXsVHh6u8ePH67ffftOll16qyZMne7gap1qzZqWSk9d63D49PU3HjtmU8euSaqmnso4V5yo9/bDi4qZ53EdMTDfFxvaqxqoAoG6rju8O27FS/Wfznmqsyn2FxaU6mp5m2ndHVcYvPT1Nht2hN9Z86FH7qjpYkCnLkRzTxs6tMPbUU0/pyy+/VOfOnXXllVdq9uzZevTRR8/YxjCM096zWCyun+12uzZu3Kh58+apffv2mjVrlp577jk999xzbhcfFtaw3PdTUpK1bcdO+TQIdbuvUwttKGuAZLc7PWtfRdaAUB2TtHNvjkftHaX58vPz0W233exRez8/H5V61LLu8PPzUUREsGnLNpOZ615VZo/diRoYv6rVYNb4paQkK3XHVjUM9Oym5gFWKaCBnxwORzVX5p6GDfwkOZSxb5dH7Y+UHKvSd0dKSrLSUnepWaPISrdtGhgmSTIcp2eHc6FZ8PGay7KKPGp/6HBWlcbO7TNjw4cPlySNGzdO48aNO2ubqKgobdq0yfU6KytLkZG/f0ARERFq1aqV2rdvL0nq37+/xowZU6nic3OPyOk8/YOz2RzyaRCqhhf2rlR/dcWR3StkszmUne3ZRmWzmXMgqUmqMn7VsWwzmbnuVWX22J2ogfGrWg1m7nsNA/3V+eLKh4m6YFNqVpW/O5o1itT9Xe+o5spqvjnrFp5x7KxWS4UnkKSzhLFOnTqdcjbrj3788ccKp3Xt2lXx8fHKy8tTYGCgli9frqeffvqUvvPy8rRt2za1a9dOq1ev5k7+AACg3jljGFuy5Pg1U9nZ2adc/+WOqKgoPfzwwxo+fLhsNptuueUWRUdHa8SIERozZozat2+v1157TZMmTVJJSYmaNm2qF154wfM1AQAAqIXOGMZatGghSbr33ntP+UtIdw0YMEADBgw45b25c+e6fu7QoYM+/fTTSvcLAABQV7h1a4sWLVroxx9/lNNpzgXtAAAAdZVbF/Dv2rVLQ4YMka+vr/z9/WUYhiwWyxmvGQMAAMDZuRXGynuUEQAAAKrO7V9TbtmyRQkJCWrSpIn++9//uq4nAwAAgOfcCmNz5szRwoULtWzZMpWWlurVV1/Va6+95u3aAAAA6jy3wtiXX36puXPnKjAwUKGhoUpISHDd9gIAAACecyuMnbhw/4RGjRrJ19ety80AAABwBm4lqmbNmumbb76RxWLRsWPH9M4773DNGAAAQDVwK4xNnjxZjz32mLZv366OHTuqQ4cOeumll7xdGwAAQJ3nVhgLCgrSBx98oJKSEjkcDjVsWPHDLgEAAOA+t64Z69mzpx577DH9+uuvBDEAAIBq5FYYW7VqlTp16qTnn39ef/nLX/TOO+8oLy/P27UBAADUeW6FseDgYN1xxx365JNPNGvWLH399de67rrrvF0bAABAnef2/Sl+/fVXffbZZ/rqq6/Uvn17vfLKK96sCwAAoF5wK4wNGDBAJSUlGjx4sBITExUVFeXtugAAAOoFt8LYhAkT1K1bN2/XAgAAUO+4FcY6duyoqVOn6rvvvpPdble3bt30xBNP8JeVAAAAVeTWBfzPPfecjh07ptdee02vv/66LBaLnn76aW/XBgAAUOe5dWbs559/1ueff+56/cwzz+ivf/2r14oCAACoL9w6M+ZwOOR0Ol2vnU6nfHx8vFYUAABAfeHWmbFrr71WY8eO1R133CFJWrhwobp06eLVwgAAAOoDt/+a8o033tDLL78sp9Op7t27a+TIkd6uDQAAoM5zK4z5+vpq9OjRGj169GnThg4dqnnz5lV7YQAAAPWB23fgr8iRI0eqow4AJynLK9H+r7ebslyFnvPFVqvsY2X6+MA+05Zt5g1/1qxZqcTEBI/b5+Rky253alne3Gqsyn0lZUXKzLZq9Oj7Pe5j0KDbFBvby+P2eQWFSt3vcfNaLa+gUE3Pr1ofGblZWrfjh+opqBbJyM1Sq8hgj9tXOYxZLJaqdgHgJDExZ77Bcn5+vg4fLnS9ttvtcjjsv/+RzYl90jAkSVarVT4+vvL1/X13b9SosUJDy0ldoWdffk12ttrT09PkLCtTVFCQR/1nHj0qa0CAWrVqXe70hm7U4E3JyWuVkZGpAL/zPGrvYwmUj59O+YOtc+lE3fm5nv0nv8xWrOTktVUKY4AZqhzGAFSv2NheHn2ZxMVNU87RHPUY+mdJ0nfzlis8KFwTJ06p7hJrrLONXVzcNB3dv09DL23nUf/zdmxTUMvza/SYNo1srehL+pldhik271xa5T6ahDTWxS0jq6Ga2qegtOp9NA2LVNdLr656R7XMLzmpVWrv1q0tAAAA4B2EMQAAABNVOYwZ/7suBQAAAJXnVhi77777TnvvtttukyTNnz+/eisCAACoR854Af+YMWO0Z88e7du3TwMGDHC9b7fbZbUez3HnnefZX+0AAADgLGHsscce04EDBzR58mRNnjzZ9b6Pj48uueQSrxcHAABQ150xjLVs2VItW7bUsmXLXGfCAAAAUH3cus/Y119/rRdffFGFhYUyDEOGYchisejHH3/0dn0AAAB1mlthbNasWZowYYIuv/xy7rgPAABQjdwKY40aNdKf//xnb9cCAABQ77h1IViHDh307bffersWAACAesetM2Pffvut5s2bJz8/P/n5+XHNGAAAQDVxK4y9//77Xi4DAACgfnLr15QtWrTQli1blJCQoCZNmui///2vWrRo4e3aAAAA6jy3wticOXO0cOFCLVu2TKWlpXr11Vf12muvebs2AACAOs+tMPbll19q7ty5CgwMVGhoqBISErRkyRJv1wYAAFDnuRXGfH195e/v73rdqFEj+fq6dbkZAAAAzsCtRNWsWTN98803slgsOnbsmN555x2uGQMAAKgGboWxyZMn67HHHtP27dvVsWNHdejQQTNmzPB2bQAAAHWeW2EsKipKr7/+uqxWqxwOh8rKyhQWFubt2gAAAOo8t64ZW7p0qW6++WYFBgYqOztb/fv31+rVq71dGwAAQJ3nVhh788039eGHH0qS2rRpo8TERMXHx3u1MAAAgPrArTDmdDrVtGlT1+tmzZrJ6XR6rSgAAID6wq0w1qRJE3300Uey2+1yOBz69NNPFR4e7u3aAAAA6jy3wthTTz2lhIQEdejQQdHR0UpISNDUqVPP2u6LL75Qv3791Lt3b82fP7/C+b755hvdcMMNbhcNAABQV7j115SpqalKTExUYWGhfHx81LBhw7O2yczM1MyZM5WYmCh/f3/dfvvt6tKliy6++OJT5svJydHzzz/vWfUAAAC1nFtnxmbOnClJaty4sVtBTJLWrVunmJgYhYSEKCgoSH369NGyZctOm2/SpEkaNWpUJUoGAACoO9w6M3bppZfqjTfeUOfOnRUUFOR6/4orrqiwTVZWliIiIlyvIyMjtXnz5lPm+fDDD3X55ZerQ4cOla0bAACgTnArjP3888/6+eef9cknn7jes1gsWrVqVYVtDMM47T2LxeL6eceOHVq+fLnef/99ZWRkVKZml7Cw8s/S+fn5eNRfXeLn56OIiGCP25ZWcz21TVXGzyzlbfe1cT28qTqODTV5TDn2Vf3YV99VdfzKqrme2qQqY+dWGPPkBq9RUVHatGmT63VWVpYiIyNdr5ctW6bs7GwNHjxYNptNWVlZGjJkiBYsWOD2MnJzj8jpPD302WyOStdb19hsDmVnF3nctr6ryviZpbzPrTauhzdVx7Zdk8eUfZdjX1Uxfp4709hZrZYKTyBJbl4zVlxcrKeeekp33XWXCgoK9OSTT6q4uPiMbbp27ar169crLy9PJSUlWr58uXr06OGaPmbMGH399ddKSkrSnDlzFBkZWakgBgAAUBe4FcaeeeYZBQcHKzc3VwEBATpy5IiefPLJM7aJiorSww8/rOHDh2vgwIHq37+/oqOjNWLECG3ZsqVaigcAAKjt3Po15datWxUXF6dvv/1WgYGBmjFjhvr373/WdgMGDNCAAQNOeW/u3LmnzdeyZUuedQkAAOolt86MWa2nzuZwOE57DwAAAJXn1pmxq6++Wi+++KJKS0v1/fffa968eerSpYu3awMAAKjz3Dq99eijjyooKEiGYejpp59Wu3bt9Nhjj3m7NgAAgDrPrTNj+/bt0+rVq7Vv3z4ZhqGUlBTl5uaqefPm3q4PAACgTnPrzNjEiRN16623um7+2qdPHz3xxBPerg0AAKDOcyuMlZSU6Pbbb5efn5/8/f01bNgw5eTkeLs2AACAOs+tMHb++efrxx9/dL3esWOHWrZs6bWiAAAA6gu3rhnLzMzUsGHD1LZtW/n6+uq3335TRESE6x5iX3zxhVeLBAAAqKvcCmP85SQAAIB3uBXGrrnmGm/XAQAAUC9xG30AAAATEcYAAABMRBgDAAAwEWEMAADARG5dwF8bFRXk6NiBX80uwxRlBTnSBeFV6iPjiE3v/Ld+3tg344hNrc0uwkOFmfn6bt5y18/hbaq2HdRFmSVHNW/HNo/btqnmelCzHCk5pk2pWWaXYYojJceq3Mehw1mas25hNVRTuxw6nKXWkcEet6+zYQyei4npVqX2+fn5Ony4sMLpdrtdDofd9drpdEqSLJby5zeM4/9arb+fyPXx8ZWvb8Wbb6NGjRUaGlqJqn/XWlUfAzPExHTTtm2/KWfv8S+SqKhmtXI9vOmP43FiWy0rK5XhdCrgf9tUmd0ui9WqgIAGp2xLbcrpA3VHVT/b9PQ02Y6VqvF5DaqposopLC6Vn38DtWrV2uM+qjIGVWmbnp4mw+5Q85Aoj/uoioMFmbL4+ng8dq0jg6u0/nU2jAWHhKthiyvMLsMUR8oOVql9bGwvxcb2qqZqzi4ubprsBYc0uu/V5U6P/+oH+YY008SJU85ZTbVRbGwvffjhO3I6nbJarXrhhVlml1TjVLRtx8VNU1lOlh7403WSpLe+/1YB4ZFsc/VMVY99cXHTlH0wTd2jzTl/+p/NexTRvLVp221Vxi8ubpqO5R7Rg7HDq7kq97yx5kP5hzU0bey4ZgwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwES+ZhcAADXBocJCvfX9t66fW4dHmlyRZ4pL8rR551KzyzBFcUmepCam1lBYXKr/bN5j2rIjTFkyqoowBqDei4nppvT0NB0qLJQkHXM4FBPTzeSqKq+qNaenp8l2zK7ghuHVVFHlFB3JkZ+/r1q1au1hD01M/dzOtuz09DTZbWUKDT7Po/7zi4rl6xdQ4fhEuFEDaibCGIB6Lza2l5KT18pWkCdJOlh4WLGxvUyuqvJiY3tVqe64uGnKPFSga6IHVl9RlbBx82JFNQvRxIlTTFl+VZ1t/OPipikvc796drnSo/5XbfhFTaJa1trxQcW4ZgwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwEReDWNffPGF+vXrp969e2v+/PmnTV+5cqVuuukm3XjjjRo5cqQK//coEgAAgPrCa2EsMzNTM2fO1IIFC5SUlKSPP/5YqamprulHjhzR1KlTNWfOHH3++edq27at4uPjvVUOAABAjeS1MLZu3TrFxMQoJCREQUFB6tOnj5YtW+aabrPZNHXqVEVFRUmS2rZtq0OHDnmrHAAAgBrJaw8Kz8rKUkREhOt1ZGSkNm/e7HodGhqqXr2OP1C1tLRUc+bM0bBhw7xVDgAAqMEOFmTqjTUfmrbs1mENTVm25MUwZhjGae9ZLJbT3isqKtLIkSPVrl073XzzzZVaRlgFA+fn51OpfuoiPz8fRUQEm12GW/z8fGR3Y57asj41BeNVOX5+PrL972er1VIvx68mHDvr8r5eHeNbV8end++e+uYM47N79245jtnVokkzj/o/kHdIPv6+uvDCC8udfknTxrr++utNG1uvhbGoqCht2rTJ9TorK0uRkZGnzJOVlaV7771XMTExevzxxyu9jNzcI3I6Tw99Npuj8gXXMTabQ9nZRWaX4RZ3Pq/atD41BeNVOSdvh06nUS/HryYcO+vyvl4d41tXx6dz5+7q3Ll7hdPj4qbJln9Uo/v+3aP+4796V36hQXr00UlnnM9bY2u1Wio8gSR58Zqxrl27av369crLy1NJSYmWL1+uHj16uKY7HA794x//UN++ffXEE0+Ue9YMAACgrvPqmbGHH35Yw4cPl81m0y233KLo6GiNGDFCY8aMUUZGhn777Tc5HA59/fXXkqQrr7xS06dP91ZJAAAANY7XwpgkDRgwQAMGDDjlvblz50qS2rdvr23btnlz8QAAADUed+AHAAAwEWEMAADARIQxAAAAExHGAAAATOTVC/gBAMDv8ouKtWrDLx63bRJVzQXVIgfyMhT/1bset20dWv4NX2sCwhhQh7RufaHS0nardeuae9AB6quYmG6nvE5PT5PdfkxhIeXf9T23oEi+vv5q1aq1JKlJ1Ol91BfljZ3hcKplePl35N+fc0gWH6tr7FqHXlijx44wBtQhU6Zwnz6gpoqN7aXY2F6u13Fx01SQe0j9r+9S7vxLvtmgkLBmmjhxyrkqscYqb+zsh0s0duAD5c4/a/Fb8m0UWGvGjmvGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMxB34AQAwSV5BkZZ8s6HCaSFh5T/uB3ULYQwAABOceFZienqaHHabIsJCJEnZuQXy8fXThRddWqOfp4jqQxgDAMAEJ563GBc3TUUF2bql3w2SpE+XrlZwSEStea4iqo5rxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBEhDEAAAATEcYAAABMRBgDAAAwEWEMqIPy8vLMLgEA4CbCGFDHbN++VQ8//KB27NhmdikAADd4NYx98cUX6tevn3r37q358+efNn3r1q0aPHiw+vTpoyeeeEJ2u92b5QB1nsPh0Ny5r0uS5s59XQ6Hw+SKAABn47UwlpmZqZkzZ2rBggVKSkrSxx9/rNTU1FPmGTdunCZPnqyvv/5ahmEoISHBW+UA9cKKFct0+HChJKmwsEArV35tckUAgLPx9VbH69atU0xMjEJCQiRJffr00bJlyzRq1ChJ0oEDB1RaWqqOHTtKkgYNGqTZs2dryJAh1bJ8R2m+juxeUS191TaO0nxJ4WaXUSkH8ooU/9UPFU5rFdLsHFdU+xQUFCgx8WOVlZVJksrKyrRo0UeKiemqxo1DzC2uljjwv2vtLD5eOzTWeEVHcrRx82LTlh2lEFOWbbbsvAJ9unS16+fgkAiTK6r5DuQc0qzFb1U4rVWjC89xRZ7z2hEnKytLERG/b0yRkZHavHlzhdMjIiKUmZlZqWWEhTUs9/3evXvKz8+nkhX/Li8vTwUFBRVOt9lsp/xK1TAMGYZxxj4tFossFovrta+vr/z8/CqcPyQkRE2aNHG/6FNE6frrr1dERLCH7c+t3r176ptvfJSXl6eMjAw5HA5ZLBb5+fkpIiJCF7dtWavWxyz/+c9KOZ3OU95zOp365ZcUDRw40JyiapHevXsqMbFQBQUFuuaaa+rl9na2Y+cfj40njoXlHQNPHPP+eKw787EtrF7u671799Tbb+/RgYxs+fj4qGnTpurdu2e9G4fKOPl7Izs7W8eOHZN0fLtr3ry5Lm57Sa3alrwWxsoLJyeHkbNNd0du7hE5naf307lzd3Xu3L1SfdVF2dlFZpfgFnc/r9qyPmZp376zrNYPT3nParXqyiuvYuzc8MftsD6OWU05dta3sa9o3OvbOFRGbfvesFotFZ5Akrx4zVhUVJRycnJcr7OyshQZGVnh9Ozs7FOmA6icxo1DNGjQ3xQQECBJCggI0ODBt/MrSgCo4bwWxrp27ar169crLy9PJSUlWr58uXr06OGa3qJFCwUEBCglJUWStHjx4lOmA6i83r3/4gpfjRuHqFevPuYWBAA4K6+eGXv44Yc1fPhwDRw4UP3791d0dLRGjBihLVu2SJJmzJihuLg49e3bVyUlJRo+fLi3ygHqBR8fH91334OSpBEjRsrHx/NrJwEA54bFONuV5zVYRdeMAfVdXl5eFf4ABABQnUy7ZgyAeQhiAFB7EMYAAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwERee1D4uWC1Vu7B4gAAAOfa2fJKrb4DPwAAQG3HrykBAABMRBgDAAAwEWEMAADARIQxAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGDtJUVGRRo4cWel2mzdv1osvvlgtNSQmJmrChAnV0ldNNHv2bG3atKnC6Rs2bNCwYcPOYUWoTtW5LwDladu2rdklwCRV/X6oyd8thLGTFBYWatu2bZVul5qaqtzcXC9UVPf88MMPcjgcZpcBL2FfAFBTbdy40ewSKlSrHxRe3Z555hllZWXpoYceUu/evfXBBx/I6XTqiiuu0JQpU5SamqoRI0ZoyZIlslqtuvnmm/X6669r9uzZOnr0qN544w1FRUXps88+U0FBgWJjY9W/f389/fTTOnr0qPLy8nTPPfdo+PDhio+PV2ZmptLT03XgwAHdeuutevDBB0+p59lnn1VOTo5efPFF+fj4mDQqp9uwYYPefPNNGYahvXv3qk+fPgoODtbKlSslSXPmzNGyZcuUlJSkkpISWSwWzZo1S1u2bNEvv/yiSZMm6dVXX5XT6dSTTz6p0tJSNW7cWDNmzJAk5eXlacSIEdq7d6/atGmj2bNn69ixY3rkkUeUk5MjSXrooYfUs2dPDRs2TKNGjVKXLl20f/9+DR8+XKtXr9aECRMUGBiolJQUFRUV6fHHH1dSUpK2bdumXr16efXs44YNGxQfHy9fX18dOnRI0dHRevDBBzVy5EiFhoYqICBA77zzjl544QVt3LhRDodDgwYN0t13362MjAw9+uijOnr0qKxWqyZNmqSOHTvqhhtu0IcffqiWLVtqw4YNevXVV/Xvf/9bw4YN02WXXab169ertLRUkyZN0r///W+lpqbq7rvv1t133634+HgdPHhQ27dvV25ursaOHavk5GT9/PPPateunWbOnCmLpeKH2C5evNijfeGP27MZ7Ha7pk6dqp07dyonJ0dt2rRR//799d5770mSnE6nduzYoU8++UQLFizQNddco0GDBkk6fgZm+/btio+P108//aRDhw7pzjvvVPfu3TV16lQVFBSoQYMGmjx5si6//HIzV1NS3dvuZs6cqfXr16uwsFChoaGKj49XRESEJGny5MnavHmzQkND9eyzz6p58+Z677339Nlnn8lqtSo6OlpPPfWUaeM+ffp0LV269LT9JiAgQEuXLtXs2bMVGBioyy+/XA6HQ88995y++uorvffeeyotLVVZWZmeeeYZXX311eWuV2JiojZu3KjnnntOklzHwVatWpX7OdYl+fn5uvfee5WVlaXo6GhNmTJFCQkJp33fXHTRRbrhhhsUHR2trVu36tprr5Uk3Xrrrfrkk09MXotyGHDZt2+fERsba+zYscO44447jNLSUsMwDGPGjBnGa6+9ZhiGYbzyyivG+PHjjUceecR46623DMMwjEWLFhnjx493/dy7d2/DZrMZhmEYzzzzjLFu3TrDMAxj7969RseOHQ3DMIzZs2cbt9xyi1FWVmbk5OQYHTt2NAoLC119zZ492xg9erSrn5okOTnZ6NSpk3Hw4EHj6NGjRseOHY2FCxcahmEYEyZMMN5//33jrrvuMkpKSgzDMIxZs2YZTz31lGEYhjF06FAjOTnZMAzD6Nevn7F69WrDMAxj/vz5xnPPPWckJycbHTt2NPbu3Ws4HA5j8ODBxpo1a4zExERj6tSphmEYRmpqqvHcc8+d1t+Jz88wDGP8+PHGyJEjDcMwjMTEROOqq64ycnJyjKKiIqNTp07G4cOHvTo+7du3N3bt2mU4nU5j9OjRxrvvvmtceumlxr59+wzDMIwFCxYYzz77rGEYhlFWVmYMHTrU+OGHH4z4+Hhj7ty5rn7efvttwzAMIzY21tU2OTnZGDp0qGv9p0+fbhiGYcTHxxu9evUyjh49auzfv9/o3LmzYRjHt7VBgwYZNpvN2LBhg9GuXTtj586dhs1mM3r37m1s3bq1wnWpyr5QE2zcuNG13TgcDmPo0KHGsmXLXNOffvpp1/Tx48cbixYtck279NJLDcM4Pn4nxtswDONvf/ub8euvvxqGYRg7d+40/vznP3t9PdxRl7a7tLQ0Y9SoUYbD4TAMwzDGjRtnvPPOO4ZhHP9ckpKSDMMwjHnz5hkjR440bDab0aVLF+PYsWOGw+EwnnzySSMjI6P6BvcMyhv3119/vdz9Jjc31+jWrZuRkZFhOBwO46GHHjLGjx9vOBwOY/jw4UZubq5hGIbxySefGA888ECF6/XH/ezEcbCiz7GuSE5ONjp06GDs2bPHcDqdxj//+c8zft/ExsaWu0/XRJwZK8eGDRuUnp6u2267TZJks9lc//N98MEHNXjwYDVo0KDCa2Muv/xy+foeH9oJEybo+++/11tvvaXt27fr6NGjrvm6dOkif39/hYWFKSQkREVFRZKk7777Tnl5efr0009d/dQ0l156qZo1ayZJCg0Ndf2vo3nz5jp8+LBeeuklffnll0pLS9P333+vyy677JT2eXl5ys7OVmxsrCRpyJAhko6Pfbt27XT++edLki666CLl5+erU6dOevnll5WZmanrr79eDz300Flr7NGjh6umSy65RGFhYZKkkJAQFRYWKjg4uBpGonxXX321LrzwQknSTTfdpISEBIWFhally5aSpPXr12vr1q1KTk6WJB09elTbt2/Xtddeq9GjR2vr1q267rrrNHTo0LMu6+T17NChgwIDA9WiRQsdPnzYNU+3bt3k6+ur5s2bKyIiQhdffLEkKSoqSoWFhRX2XdV9wWxXX321QkJCNH/+fO3evVtpaWmuffDTTz/Vb7/9pg8++OCs/URHR0uSiouL9csvv2jixImuaUePHlV+fr5CQ0O9sxKVUFe2u1atWmn8+PH65JNPtGfPHv3000+64IILJEkNGjTQjTfe6FrHWbNmydfXV506ddItt9yinj176s4771RUVFRlh89jfxz30aNHKzQ09LT9ZtOmTerUqZOrtoEDB2rlypWyWq167bXXtHr1au3Zs0cbN26U1Wqt9Hp58jnWNp07d1br1q0lSQMGDFBiYuIZv286dOhgUqWVUzO/6U3mcDjUt29fTZo0SdLxA/CJ65yKiopUXFys4uJiFRQUqEmTJqe1b9CggevnsWPHqlGjRoqNjVW/fv305ZdfuqYFBAS4frZYLDIMQ5LUokULPfzww3rqqaf00UcfyWqteZf2+fn5nfL65F+jHjp0SH/72980dOhQ9ejRQ+Hh4dq6desZ25eVlSkrK0uSTgmgJ8aldevW+uqrr/T9999rzZo1evfdd/XVV19Jkmvc7HZ7hcs416H25PEwDEM+Pj6nbBcOh0Pjxo3Tn//8Z0nHw2lQUJAaNGigL7/8Ut98842WLl2qzz77zPUrtaqsp6djUdV9wWyrVq3S7NmzNXz4cA0aNEj5+fkyDEM//vij3nzzTX300UeusTl5H7TZbKf0c+Kzczqd8vf3V1JSkmtaRkaGQkJCzs0KnUVd2e5++eUX/etf/9Ldd9+tPn36yGq1uuo4+XhoGIar39dff10//fSTvvvuO913332aMWOGrrnmGreXWRV/HPeK9puNGzfK6XSe1r64uFiDBw/WTTfdpKuvvlpt27bV/PnzK1yvk7dV6fft9aqrrqrwc6wrTt6ODMPQ4cOHz/h9c/L3bE1W877lTeTr6yu73a4uXbpoxYoVys3NlWEYmjp1qut/z9OmTdPQoUM1ZMgQTZs2TdLxHfGPB6oT1q5dqzFjxqhXr1764YcfJOmsF7BfdNFFuvXWWxUYGOjaIWuTLVu2qFWrVrr77rvVoUMHfffdd6519vHxkcPhUHBwsJo2baq1a9dKkpKSkvTKK69U2Oe8efMUHx+vvn37asqUKcrLy1NRUZFCQ0OVmpoqSa5r1mqClJQUZWZmyul0avHixa6zCCfExMQoISFBNptNxcXFGjJkiH7++We98MILSkpK0s0336wnn3xSv/32mySdsp6rVq06Z+tRnfuCGdavX6++fftq8ODBCg8P1w8//KCDBw/q0Ucf1csvv6zw8HDXvCEhIWfdloKDg9W6dWtXGFu7dq3uvPNO76+Im+rKdvfDDz/ommuu0R133KGLL75Ya9eudR1Djh496qpl0aJF6tq1q/Ly8tS3b19deuml+uc//6lu3bpp+/bt56zeP477448/Xu5+83//93/asmWLsrKyZBiGli5dKovForS0NFmtVv3jH/9QTEyM65hZ0XqFhoZq165dMgxD+/btc61rRZ9jXZKSkqKDBw+eso1X9H3zRzXt+HQyzoydJCwsTM2bN9f06dM1atQo3XXXXXI6nbrssst0//33a+nSpdq3b59efvllGYahwYMHa+nSpYqOjtarr76qGTNmuE5VnzB69GgNGTJEjRo1Ups2bdSiRQvt37/frXqmTp2qO+64Q71791bTpk29scpe0b17d23btk39+vWTv7+/oqOjtXPnTknSn/70J02ZMkXPP/+8XnzxRU2dOlUvvPCCQkND9cILL2jPnj3l9jlw4EA98sgjGjBggHx9fTVq1Cg1atRI9913nyZMmKBFixapZ8+e53I1zygyMlKPPfaYMjMz1a1bN3Xt2lVz5sxxTb/99tuVnp6um2++WXa7XYMGDVKXLl10wQUX6F//+pc+++wz+fj4aMqUKZKkMWPG6Omnn9arr76q7t27n7P1aNeuncf7wqOPPnrO6qzIrbfeqkcffVTLli2Tv7+/OnbsqAMHDqi4uFhTp051HbQfeOABDRkyRGPHjtWAAQMUExPjulj8j05st2+//bb8/PzOeiH6uVRXtrt+/fpp1KhRGjBggPz8/NS2bVvXcbNRo0ZauXKlXnnlFUVFRSkuLk5NmjTR7bffrltuuUWBgYFq1qyZbr755nNW7x/HfejQoQoKCjptvwkICNCkSZP097//Xf7+/mrZsqUaNWqkdu3a6bLLLlPfvn3VoEEDXX311Tp48GCF6+Xv769FixbpL3/5i9q0aaOrrrpK0vEL+cv7HOuSiy++WI8//riys7MVExOj22+/XevWrSv3++aPevbsqZtuukmJiYk17oyZxTj5XCeAKjv5r86Ac4XtzhyVGff8/Hz9+9//1qhRo2S1WvXMM8+oVatWNfr+Vzg3ODMG1HN79+7V6NGjy532zDPPqH379ue4ItQH9XG7CwkJ0eHDh9W/f3/5+PjoiiuucF3kj/qNM2MAAAAm4gJ+AAAAExHGAAAATEQYAwAAMBFhDACqYMSIEa57cQGAJ7iAHwAAwETc2gJAjVNcXKyJEycqPT1dVqtVV1xxhf76179q+vTpWrJkiaTj93d6+umntWTJEsXHx2vv3r3at2+fsrKyFB0drW7dumnx4sXav3+/xo0bp/79+7s9X05Ojp588knl5uYqOztbLVq00KxZsxQWFqYbbrhB0dHR2r59ux555BHFxcXplVde0ebNm5WQkOBah127dum+++7T2LFjtXr1ar3xxhuy2Wxq0KCBxo8fr06dOik+Pl4//fSTsrKy1LZtW82YMcOsIQdgIsIYgBpnxYoVKi4uVlJSkhwOh6ZMmXLWJ1ekpKQoKSlJfn5+6tGjh5o2bar58+dr5cqVevHFF9W/f3+35/vyyy/VsWNH3X///TIMQ/fff7+SkpL097//XZJ0ySWXaNasWZKkuLg4SdKdd97pejTSggULtGjRIt1///1KS0vTzJkz9eGHHyo0NFQ7d+7UPffco+XLl0uSDhw4oCVLlpzz56cCqDnY+wHUOFdddZVmzpypYcOGqWvXrrrrrruUl5d3xjZdu3ZVcHCwpOOPp/nTn/4kSbrgggtUUFBQqfnuuusubdq0Se+9957S0tK0c+dOdejQwdVH586dK6xjxYoVevfdd7Vw4UIFBQVp7dq1ysrK0t133+2ax2KxaO/evZKkjh07EsSAeo4jAIAa5/zzz9eKFSu0YcMGJScn65577tHtt9+uky9xtdlsp7Tx9/c/5XVFAced+V588UVt3rxZgwcPVpcuXWS3209ZdlBQULl9p6SkaNq0aXr//fddz7Z0Op269tprXWfSJOnQoUOKjIzUihUrKuwLQP3BX1MCqHEWLFigiRMnqnv37ho3bpy6d++unTt36uDBg8rNzZVhGFq5cqXXlv+f//xHd911lwYOHKiwsDCtW7fO9VDxiqSmpuqf//ynXnrpJV188cWu92NiYrR27Vrt2rVLkvTtt9/qxhtvVFlZmdfqB1C7cGYMQI0zcOBAbdy4Uf369VNgYKCaN2+u6dOn680339TgwYMVERGh66+/3mvLf+ihh/TCCy/o9ddfl4+Pj/7v//7P9WvFisTFxclms+n55593Bbcrr7xS06dP11NPPaVHHnlEhmHI19dXb7zxBmfEALhwawsAAAAT8WtKAAAAExHGAAAATEQYAwAAMBFhDAAAwESEMQAAABMRxgAAAExEGAMAADARYQwAAMBE/w9r+7ddWMvwAAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 720x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.set(rc={'figure.figsize':(10,6)})\n",
    "sns.boxenplot(data=overlap_df, y='percent_overlap', x='summarizer')\n",
    "plt.savefig('../paper/figs/paraphrasing_targeted_overlap_percent.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.7 ('summarization')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "fb9173d43df76b287d1b53052eec4ff84d0ee52790be7057998c9269beecf529"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
