{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1f012abd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "afd8b189",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./decoded_competition_wfst.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "33ffc6c2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "repeated words: [], last word: routine\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: air\n",
      "repeated words: [], last word: car\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: imprint\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: choice\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: m\n",
      "repeated words: [], last word: a\n",
      "repeated words: [], last word: portal\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: [], last word: of\n",
      "repeated words: [], last word: a\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['do', 'you'], last word: own\n",
      "repeated words: [], last word: awe\n",
      "repeated words: ['the'], last word: expense\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['year', 'oh'], last word: oh\n",
      "repeated words: [], last word: show\n",
      "repeated words: [], last word: thing\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['of', 'n'], last word: n\n",
      "repeated words: [], last word: to\n",
      "repeated words: [], last word: is\n",
      "repeated words: [], last word: in\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['i', 'own'], last word: own\n",
      "repeated words: ['to', 'o'], last word: o\n",
      "repeated words: ['i', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the', 'oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['to'], last word: do\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: r\n",
      "repeated words: [], last word: them\n",
      "repeated words: ['it'], last word: on\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: [], last word: on\n",
      "repeated words: ['no'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: [], last word: view\n",
      "repeated words: [], last word: no\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['no', 'o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the'], last word: hamptons\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['as', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the', 'n', 'o'], last word: o\n",
      "repeated words: [], last word: over\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: or\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['have', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: r\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['i'], last word: that\n",
      "repeated words: ['o'], last word: r\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: known\n",
      "repeated words: [], last word: unknown\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: decision\n",
      "repeated words: [], last word: point\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: higher\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: no\n",
      "repeated words: [], last word: go\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['s', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: boss\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['to', 'a'], last word: a\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: you\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: ot\n",
      "repeated words: ['in', 'the'], last word: all\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: own\n",
      "repeated words: [], last word: en\n",
      "repeated words: ['as', 'he'], last word: too\n",
      "repeated words: ['that', 'on', 'o'], last word: o\n",
      "repeated words: [], last word: none\n",
      "repeated words: [], last word: r\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: ah\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['o'], last word: n\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['oh'], last word: no\n",
      "repeated words: [], last word: movies\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: [], last word: all\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: [], last word: time\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: n\n",
      "repeated words: [], last word: in\n",
      "repeated words: [], last word: gun\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: [], last word: made\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: it\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['a', 'o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['a'], last word: jury\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: off\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: another\n",
      "repeated words: ['or', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the'], last word: placed\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['are', 'o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: illusion\n",
      "repeated words: [], last word: alone\n",
      "repeated words: [], last word: on\n",
      "repeated words: [], last word: teenager\n",
      "repeated words: [], last word: too\n",
      "repeated words: [], last word: around\n",
      "repeated words: [], last word: mid\n",
      "repeated words: [], last word: occasion\n",
      "repeated words: [], last word: gesture\n",
      "repeated words: [], last word: funny\n",
      "repeated words: [], last word: handle\n",
      "repeated words: [], last word: probably\n",
      "repeated words: [], last word: vacations\n",
      "repeated words: [], last word: time\n",
      "repeated words: [], last word: usual\n",
      "repeated words: [], last word: good\n",
      "repeated words: [], last word: base\n",
      "repeated words: [], last word: somewhere\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: that\n",
      "repeated words: [], last word: yourself\n",
      "repeated words: [], last word: cook\n",
      "repeated words: [], last word: no\n",
      "repeated words: [], last word: dealer\n",
      "repeated words: [], last word: year\n",
      "repeated words: [], last word: that\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: make\n",
      "repeated words: [], last word: movies\n",
      "repeated words: [], last word: all\n",
      "repeated words: [], last word: upset\n",
      "repeated words: [], last word: demons\n",
      "repeated words: [], last word: her\n",
      "repeated words: [], last word: revealing\n",
      "repeated words: [], last word: regionally\n",
      "repeated words: ['to'], last word: one\n",
      "repeated words: [], last word: up\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['on', 'o'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['of', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['my'], last word: now\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the'], last word: line\n",
      "repeated words: ['the', 'room', 'on', 'o'], last word: o\n",
      "repeated words: [], last word: own\n",
      "repeated words: [], last word: all\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the'], last word: prophecy\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: rights\n",
      "repeated words: ['the', 'on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: own\n",
      "repeated words: [], last word: exploration\n",
      "repeated words: ['i', 'it'], last word: a\n",
      "repeated words: ['the', 'to'], last word: ano\n",
      "repeated words: ['it'], last word: him\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['person', 'ow'], last word: ow\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['as', 'o'], last word: o\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: it\n",
      "repeated words: ['the'], last word: summoned\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: alone\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the'], last word: both\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: sunderland\n",
      "repeated words: ['to'], last word: by\n",
      "repeated words: ['the', 'oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the', 'o'], last word: o\n",
      "repeated words: [], last word: future\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: weekends\n",
      "repeated words: [], last word: shame\n",
      "repeated words: ['as'], last word: in\n",
      "repeated words: ['year'], last word: it\n",
      "repeated words: [], last word: home\n",
      "repeated words: [], last word: studies\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: that\n",
      "repeated words: [], last word: cooking\n",
      "repeated words: [], last word: exciting\n",
      "repeated words: [], last word: similar\n",
      "repeated words: ['was'], last word: child\n",
      "repeated words: [], last word: chance\n",
      "repeated words: [], last word: yet\n",
      "repeated words: [], last word: laugh\n",
      "repeated words: [], last word: there\n",
      "repeated words: [], last word: choice\n",
      "repeated words: [], last word: january\n",
      "repeated words: [], last word: bust\n",
      "repeated words: [], last word: too\n",
      "repeated words: [], last word: money\n",
      "repeated words: [], last word: trembled\n",
      "repeated words: ['go'], last word: late\n",
      "repeated words: [], last word: up\n",
      "repeated words: [], last word: gasoline\n",
      "repeated words: [], last word: watching\n",
      "repeated words: [], last word: life\n",
      "repeated words: [], last word: too\n",
      "repeated words: [], last word: in\n",
      "repeated words: [], last word: now\n",
      "repeated words: ['the'], last word: changes\n",
      "repeated words: [], last word: up\n",
      "repeated words: ['and', 'a'], last word: joy\n",
      "repeated words: [], last word: plus\n",
      "repeated words: [], last word: time\n",
      "repeated words: ['he'], last word: is\n",
      "repeated words: ['the'], last word: configuration\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['her'], last word: own\n",
      "repeated words: ['x', 'o'], last word: o\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['the'], last word: alleged\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: ['of', 'a'], last word: a\n",
      "repeated words: ['the', 'is', 'on', 'and'], last word: on\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['the'], last word: approach\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['it'], last word: goal\n",
      "repeated words: [], last word: him\n",
      "repeated words: [], last word: no\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['a'], last word: year\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['was', 'i', 'own'], last word: own\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['in'], last word: a\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: on\n",
      "repeated words: ['your', 'on', 'o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: are\n",
      "repeated words: ['on', 'n'], last word: n\n",
      "repeated words: [], last word: summary\n",
      "repeated words: [], last word: no\n",
      "repeated words: [], last word: sun\n",
      "repeated words: [], last word: minister\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: silly\n",
      "repeated words: ['on'], last word: o\n",
      "repeated words: [], last word: deductions\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: [], last word: a\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: [], last word: salt\n",
      "repeated words: ['on', 'n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['and', 'so'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: point\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['and', 'on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'and'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['a'], last word: note\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: affairs\n",
      "repeated words: [], last word: productions\n",
      "repeated words: [], last word: nov\n",
      "repeated words: ['i'], last word: blue\n",
      "repeated words: ['on'], last word: own\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: family\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['you', 'are', 'on', 'and'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['support', 'our'], last word: own\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: graduating\n",
      "repeated words: ['a'], last word: gun\n",
      "repeated words: ['n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: car\n",
      "repeated words: ['a'], last word: a\n",
      "repeated words: [], last word: eat\n",
      "repeated words: ['the', 'in'], last word: in\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['the'], last word: pounds\n",
      "repeated words: ['the'], last word: like\n",
      "repeated words: ['the'], last word: in\n",
      "repeated words: [], last word: along\n",
      "repeated words: [], last word: note\n",
      "repeated words: [], last word: off\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['the'], last word: of\n",
      "repeated words: ['the'], last word: once\n",
      "repeated words: [], last word: kong\n",
      "repeated words: ['the', 'all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: [], last word: considered\n",
      "repeated words: ['work', 'all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['hit'], last word: hit\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: [], last word: event\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['of'], last word: it\n",
      "repeated words: ['the', 'all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['the'], last word: up\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['the', 'of', 'it', 'all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: [], last word: fighter\n",
      "repeated words: [], last word: us\n",
      "repeated words: ['i'], last word: baby\n",
      "repeated words: [], last word: bit\n",
      "repeated words: ['the'], last word: come\n",
      "repeated words: [], last word: have\n",
      "repeated words: [], last word: ring\n",
      "repeated words: [], last word: hope\n",
      "repeated words: [], last word: him\n",
      "repeated words: ['the'], last word: have\n",
      "repeated words: [], last word: how\n",
      "repeated words: [], last word: ohio\n",
      "repeated words: [], last word: high\n",
      "repeated words: ['the'], last word: high\n",
      "repeated words: [], last word: high\n",
      "repeated words: [], last word: rich\n",
      "repeated words: [], last word: hall\n",
      "repeated words: [], last word: house\n",
      "repeated words: ['the'], last word: whole\n",
      "repeated words: [], last word: huh\n",
      "repeated words: [], last word: x\n",
      "repeated words: ['the'], last word: water\n",
      "repeated words: [], last word: high\n",
      "repeated words: [], last word: sinking\n",
      "repeated words: [], last word: her\n",
      "repeated words: [], last word: games\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: ho\n",
      "repeated words: [], last word: home\n",
      "repeated words: [], last word: home\n",
      "repeated words: [], last word: ho\n",
      "repeated words: [], last word: up\n",
      "repeated words: [], last word: house\n",
      "repeated words: [], last word: how\n",
      "repeated words: [], last word: have\n",
      "repeated words: [], last word: point\n",
      "repeated words: [], last word: garage\n",
      "repeated words: [], last word: years\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: hot\n",
      "repeated words: [], last word: commit\n",
      "repeated words: [], last word: whole\n",
      "repeated words: [], last word: working\n",
      "repeated words: [], last word: entirety\n",
      "repeated words: ['they'], last word: want\n",
      "repeated words: [], last word: life\n",
      "repeated words: [], last word: one\n",
      "repeated words: [], last word: say\n",
      "repeated words: [], last word: permission\n",
      "repeated words: [], last word: girl\n",
      "repeated words: [], last word: pace\n",
      "repeated words: [], last word: deer\n",
      "repeated words: [], last word: them\n",
      "repeated words: [], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: ah\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['you', 'o'], last word: o\n",
      "repeated words: ['do', 'oh'], last word: oh\n",
      "repeated words: [], last word: no\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: work\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: on\n",
      "repeated words: [], last word: gone\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: [], last word: true\n",
      "repeated words: [], last word: u\n",
      "repeated words: [], last word: them\n",
      "repeated words: [], last word: work\n",
      "repeated words: [], last word: true\n",
      "repeated words: [], last word: week\n",
      "repeated words: [], last word: their\n",
      "repeated words: [], last word: know\n",
      "repeated words: [], last word: this\n",
      "repeated words: [], last word: decision\n",
      "repeated words: [], last word: his\n",
      "repeated words: [], last word: me\n",
      "repeated words: [], last word: problems\n",
      "repeated words: [], last word: interesting\n",
      "repeated words: [], last word: around\n",
      "repeated words: [], last word: before\n",
      "repeated words: [], last word: house\n",
      "repeated words: [], last word: away\n",
      "repeated words: [], last word: are\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: need\n",
      "repeated words: [], last word: please\n",
      "repeated words: [], last word: money\n",
      "repeated words: [], last word: me\n",
      "repeated words: [], last word: year\n",
      "repeated words: ['on', 'off'], last word: off\n",
      "repeated words: [], last word: days\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: you\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: no\n",
      "repeated words: [], last word: good\n",
      "repeated words: [], last word: love\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: [], last word: all\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: or\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: again\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: well\n",
      "repeated words: [], last word: said\n",
      "repeated words: [], last word: gay\n",
      "repeated words: [], last word: usual\n",
      "repeated words: [], last word: laws\n",
      "repeated words: [], last word: coming\n",
      "repeated words: [], last word: without\n",
      "repeated words: [], last word: boyes\n",
      "repeated words: [], last word: well\n",
      "repeated words: [], last word: missing\n",
      "repeated words: [], last word: experience\n",
      "repeated words: [], last word: dallas\n",
      "repeated words: [], last word: goal\n",
      "repeated words: [], last word: women\n",
      "repeated words: [], last word: miles\n",
      "repeated words: [], last word: guy\n",
      "repeated words: [], last word: fast\n",
      "repeated words: [], last word: employees\n",
      "repeated words: [], last word: true\n",
      "repeated words: [], last word: real\n",
      "repeated words: [], last word: lines\n",
      "repeated words: [], last word: books\n",
      "repeated words: [], last word: leave\n",
      "repeated words: [], last word: movies\n",
      "repeated words: [], last word: stick\n",
      "repeated words: [], last word: company\n",
      "repeated words: [], last word: car\n",
      "repeated words: [], last word: game\n",
      "repeated words: [], last word: ago\n",
      "repeated words: ['the'], last word: computer\n",
      "repeated words: [], last word: different\n",
      "repeated words: [], last word: music\n",
      "repeated words: [], last word: eat\n",
      "repeated words: [], last word: there\n",
      "repeated words: [], last word: wonderful\n",
      "repeated words: [], last word: home\n",
      "repeated words: ['she', 'to'], last word: you\n",
      "repeated words: [], last word: please\n",
      "repeated words: [], last word: important\n",
      "repeated words: [], last word: expensive\n",
      "repeated words: [], last word: newspapers\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: please\n",
      "repeated words: [], last word: company\n",
      "repeated words: [], last word: book\n",
      "repeated words: [], last word: make\n",
      "repeated words: [], last word: nice\n",
      "repeated words: [], last word: game\n",
      "repeated words: [], last word: summer\n",
      "repeated words: [], last word: visually\n",
      "repeated words: [], last word: compass\n",
      "repeated words: [], last word: off\n",
      "repeated words: [], last word: line\n",
      "repeated words: [], last word: important\n",
      "repeated words: ['up'], last word: up\n",
      "repeated words: [], last word: difference\n",
      "repeated words: [], last word: depression\n",
      "repeated words: [], last word: stores\n",
      "repeated words: [], last word: increased\n",
      "repeated words: ['on'], last word: gasoline\n",
      "repeated words: [], last word: too\n",
      "repeated words: [], last word: study\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: you\n",
      "repeated words: [], last word: service\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: work\n",
      "repeated words: ['the'], last word: myself\n",
      "repeated words: [], last word: day\n",
      "repeated words: [], last word: way\n",
      "repeated words: [], last word: along\n",
      "repeated words: [], last word: around\n",
      "repeated words: [], last word: expensive\n",
      "repeated words: [], last word: today\n",
      "repeated words: [], last word: sun\n",
      "repeated words: [], last word: season\n",
      "repeated words: [], last word: company\n",
      "repeated words: [], last word: wonderful\n",
      "repeated words: [], last word: game\n",
      "repeated words: [], last word: better\n",
      "repeated words: [], last word: good\n",
      "repeated words: [], last word: cars\n",
      "repeated words: [], last word: states\n",
      "repeated words: [], last word: growing\n",
      "repeated words: [], last word: people\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: final\n",
      "repeated words: [], last word: better\n",
      "repeated words: [], last word: dollars\n",
      "repeated words: [], last word: o\n",
      "repeated words: [], last word: this\n",
      "repeated words: [], last word: season\n",
      "repeated words: [], last word: a\n",
      "repeated words: [], last word: city\n",
      "repeated words: [], last word: help\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: game\n",
      "repeated words: [], last word: living\n",
      "repeated words: [], last word: far\n",
      "repeated words: ['to'], last word: friends\n",
      "repeated words: [], last word: alone\n",
      "repeated words: [], last word: wonderful\n",
      "repeated words: [], last word: season\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['the'], last word: north\n",
      "repeated words: [], last word: this\n",
      "repeated words: [], last word: book\n",
      "repeated words: [], last word: important\n",
      "repeated words: [], last word: room\n",
      "repeated words: [], last word: wife\n",
      "repeated words: [], last word: expensive\n",
      "repeated words: [], last word: you\n",
      "repeated words: [], last word: true\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: card\n",
      "repeated words: [], last word: play\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: own\n",
      "repeated words: [], last word: you\n",
      "repeated words: [], last word: know\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['her'], last word: own\n",
      "repeated words: [], last word: today\n",
      "repeated words: [], last word: me\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: today\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['i'], last word: help\n",
      "repeated words: [], last word: no\n",
      "repeated words: [], last word: them\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['you', 'oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: choice\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: these\n",
      "repeated words: [], last word: ago\n",
      "repeated words: [], last word: is\n",
      "repeated words: [], last word: bad\n",
      "repeated words: ['to'], last word: be\n",
      "repeated words: [], last word: store\n",
      "repeated words: [], last word: true\n",
      "repeated words: [], last word: evening\n",
      "repeated words: [], last word: school\n",
      "repeated words: [], last word: outside\n",
      "repeated words: [], last word: out\n",
      "repeated words: [], last word: country\n",
      "repeated words: [], last word: need\n",
      "repeated words: [], last word: by\n",
      "repeated words: [], last word: all\n",
      "repeated words: [], last word: year\n",
      "repeated words: [], last word: soon\n",
      "repeated words: [], last word: kids\n",
      "repeated words: [], last word: stuff\n",
      "repeated words: ['the'], last word: house\n",
      "repeated words: [], last word: enjoy\n",
      "repeated words: [], last word: this\n",
      "repeated words: [], last word: team\n",
      "repeated words: ['the'], last word: house\n",
      "repeated words: [], last word: heart\n",
      "repeated words: [], last word: what\n",
      "repeated words: [], last word: place\n",
      "repeated words: ['you'], last word: you\n",
      "repeated words: [], last word: unusual\n",
      "repeated words: [], last word: change\n",
      "repeated words: [], last word: tons\n",
      "repeated words: [], last word: put\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: still\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: work\n",
      "repeated words: [], last word: oil\n",
      "repeated words: [], last word: picks\n",
      "repeated words: [], last word: thousand\n",
      "repeated words: [], last word: yourself\n",
      "repeated words: [], last word: minute\n",
      "repeated words: [], last word: television\n",
      "repeated words: [], last word: music\n",
      "repeated words: [], last word: back\n",
      "repeated words: [], last word: eyes\n",
      "repeated words: [], last word: problem\n",
      "repeated words: [], last word: areas\n",
      "repeated words: [], last word: way\n",
      "repeated words: [], last word: market\n",
      "repeated words: [], last word: how\n",
      "repeated words: ['to'], last word: care\n",
      "repeated words: [], last word: on\n",
      "repeated words: [], last word: class\n",
      "repeated words: [], last word: no\n",
      "repeated words: [], last word: policy\n",
      "repeated words: [], last word: outside\n",
      "repeated words: [], last word: room\n",
      "repeated words: [], last word: them\n",
      "repeated words: [], last word: interesting\n",
      "repeated words: [], last word: well\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: there\n",
      "repeated words: ['this'], last word: year\n",
      "repeated words: [], last word: tell\n",
      "repeated words: [], last word: month\n",
      "repeated words: [], last word: already\n",
      "repeated words: [], last word: either\n",
      "repeated words: ['the'], last word: back\n",
      "repeated words: [], last word: program\n",
      "repeated words: [], last word: places\n",
      "repeated words: [], last word: please\n",
      "repeated words: [], last word: favorite\n",
      "repeated words: [], last word: lot\n",
      "repeated words: [], last word: morning\n",
      "repeated words: [], last word: joy\n",
      "repeated words: [], last word: difference\n",
      "repeated words: [], last word: you\n",
      "repeated words: [], last word: parents\n",
      "repeated words: [], last word: work\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: [], last word: row\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['a', 'on', 'o'], last word: o\n",
      "repeated words: [], last word: own\n",
      "repeated words: [], last word: own\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: [], last word: country\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: air\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: way\n",
      "repeated words: [], last word: minutes\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: month\n",
      "repeated words: [], last word: them\n",
      "repeated words: [], last word: flavor\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: newspaper\n",
      "repeated words: [], last word: delivers\n",
      "repeated words: [], last word: button\n",
      "repeated words: [], last word: much\n",
      "repeated words: [], last word: mountains\n",
      "repeated words: [], last word: something\n",
      "repeated words: [], last word: level\n",
      "repeated words: [], last word: back\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: explanation\n",
      "repeated words: [], last word: in\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: time\n",
      "repeated words: [], last word: s\n",
      "repeated words: [], last word: you\n",
      "repeated words: [], last word: food\n",
      "repeated words: [], last word: everyone\n",
      "repeated words: [], last word: before\n",
      "repeated words: [], last word: character\n",
      "repeated words: ['the'], last word: court\n",
      "repeated words: [], last word: yourself\n",
      "repeated words: [], last word: book\n",
      "repeated words: [], last word: want\n",
      "repeated words: [], last word: year\n",
      "repeated words: [], last word: today\n",
      "repeated words: [], last word: criminals\n",
      "repeated words: [], last word: decisions\n",
      "repeated words: [], last word: california\n",
      "repeated words: [], last word: weekend\n",
      "repeated words: [], last word: not\n",
      "repeated words: ['the'], last word: spent\n",
      "repeated words: [], last word: music\n",
      "repeated words: [], last word: country\n",
      "repeated words: [], last word: now\n",
      "repeated words: ['my', 'part'], last word: decor\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: room\n",
      "repeated words: ['i'], last word: car\n",
      "repeated words: ['and'], last word: thing\n",
      "repeated words: ['i'], last word: year\n",
      "repeated words: [], last word: sorry\n",
      "repeated words: [], last word: course\n",
      "repeated words: ['the'], last word: gates\n",
      "repeated words: [], last word: thing\n",
      "repeated words: [], last word: now\n",
      "repeated words: [], last word: it\n",
      "repeated words: [], last word: game\n",
      "repeated words: [], last word: rate\n",
      "repeated words: [], last word: apart\n",
      "repeated words: [], last word: comments\n",
      "repeated words: [], last word: not\n",
      "repeated words: [], last word: month\n",
      "repeated words: [], last word: programs\n",
      "repeated words: [], last word: that\n",
      "repeated words: ['just'], last word: now\n",
      "repeated words: [], last word: something\n",
      "repeated words: [], last word: this\n",
      "repeated words: [], last word: bets\n",
      "repeated words: [], last word: column\n",
      "repeated words: [], last word: happen\n",
      "repeated words: [], last word: ahead\n",
      "repeated words: [], last word: enjoy\n",
      "repeated words: [], last word: winter\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: years\n",
      "repeated words: ['music'], last word: music\n",
      "repeated words: [], last word: oregon\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: of\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: destroyed\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: each\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['it', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: off\n",
      "repeated words: ['on'], last word: a\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['i', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: known\n",
      "repeated words: [], last word: papers\n",
      "repeated words: ['on'], last word: o\n",
      "repeated words: [], last word: a\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['the'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['the', 'oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: from\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['and', 'of'], last word: home\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['i', 'ow'], last word: ow\n",
      "repeated words: [], last word: time\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['was'], last word: no\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: lot\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: nov\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['it'], last word: it\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['that', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: ago\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: notice\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['you', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: time\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: house\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: up\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: [], last word: idea\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: no\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: america\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: [], last word: neighborhood\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['to'], last word: again\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: a\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: class\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['do', 'o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: not\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['to', 'be'], last word: sitter\n",
      "repeated words: [], last word: alone\n",
      "repeated words: ['o'], last word: oh\n",
      "repeated words: [], last word: ago\n",
      "repeated words: ['to'], last word: them\n",
      "repeated words: ['ho'], last word: ho\n",
      "repeated words: [], last word: roll\n",
      "repeated words: [], last word: oct\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: [], last word: in\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: la\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: policy\n",
      "repeated words: ['oh'], last word: ah\n",
      "repeated words: ['of'], last word: prison\n",
      "repeated words: [], last word: toxin\n",
      "repeated words: [], last word: a\n",
      "repeated words: ['i'], last word: up\n",
      "repeated words: [], last word: trap\n",
      "repeated words: [], last word: ours\n",
      "repeated words: [], last word: class\n",
      "repeated words: ['old'], last word: old\n",
      "repeated words: [], last word: pain\n",
      "repeated words: [], last word: in\n",
      "repeated words: [], last word: life\n",
      "repeated words: [], last word: going\n",
      "repeated words: [], last word: impressive\n",
      "repeated words: [], last word: whole\n",
      "repeated words: [], last word: done\n",
      "repeated words: [], last word: bad\n",
      "repeated words: [], last word: area\n",
      "repeated words: ['the'], last word: peels\n",
      "repeated words: [], last word: people\n",
      "repeated words: [], last word: to\n",
      "repeated words: [], last word: south\n",
      "repeated words: [], last word: a\n",
      "repeated words: [], last word: away\n",
      "repeated words: [], last word: point\n",
      "repeated words: [], last word: phone\n",
      "repeated words: [], last word: emergencies\n",
      "repeated words: [], last word: afternoon\n",
      "repeated words: [], last word: something\n",
      "repeated words: [], last word: done\n",
      "repeated words: [], last word: system\n",
      "repeated words: [], last word: us\n",
      "repeated words: [], last word: name\n",
      "repeated words: [], last word: research\n",
      "repeated words: [], last word: summer\n",
      "repeated words: [], last word: mean\n",
      "repeated words: [], last word: person\n",
      "repeated words: [], last word: guilty\n",
      "repeated words: [], last word: it\n",
      "repeated words: ['the'], last word: pool\n",
      "repeated words: [], last word: go\n",
      "repeated words: [], last word: whatever\n",
      "repeated words: [], last word: myself\n",
      "repeated words: ['the'], last word: inspection\n",
      "repeated words: [], last word: here\n",
      "repeated words: [], last word: weekend\n",
      "repeated words: [], last word: start\n",
      "repeated words: [], last word: was\n",
      "repeated words: [], last word: fields\n",
      "repeated words: [], last word: area\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['to'], last word: her\n",
      "repeated words: [], last word: nov\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: canal\n",
      "repeated words: ['i'], last word: done\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['it'], last word: summer\n",
      "repeated words: [], last word: nov\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: river\n",
      "repeated words: ['it', 'o'], last word: o\n",
      "repeated words: [], last word: own\n",
      "repeated words: [], last word: complicated\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: no\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: so\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['story', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['your', 'oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['au'], last word: au\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['we'], last word: it\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: trash\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: thing\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: [], last word: school\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: inclusion\n",
      "repeated words: [], last word: funny\n",
      "repeated words: [], last word: conduct\n",
      "repeated words: [], last word: s\n",
      "repeated words: [], last word: outside\n",
      "repeated words: [], last word: reasons\n",
      "repeated words: [], last word: them\n",
      "repeated words: [], last word: scale\n",
      "repeated words: [], last word: small\n",
      "repeated words: [], last word: well\n",
      "repeated words: [], last word: first\n",
      "repeated words: [], last word: have\n",
      "repeated words: [], last word: women\n",
      "repeated words: [], last word: explosion\n",
      "repeated words: [], last word: from\n",
      "repeated words: [], last word: consider\n",
      "repeated words: [], last word: inside\n",
      "repeated words: [], last word: out\n",
      "repeated words: [], last word: recent\n",
      "repeated words: [], last word: them\n",
      "repeated words: [], last word: stale\n",
      "repeated words: [], last word: slow\n",
      "repeated words: ['a', 'clip'], last word: wall\n",
      "repeated words: [], last word: russia\n",
      "repeated words: [], last word: off\n",
      "repeated words: [], last word: remember\n",
      "repeated words: ['all'], last word: all\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['a', 'own'], last word: own\n",
      "repeated words: ['no'], last word: no\n",
      "repeated words: ['oh'], last word: no\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['to', 'oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: [], last word: movie\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: [], last word: problem\n",
      "repeated words: ['oh'], last word: oh\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['of', 'o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['a', 'on'], last word: a\n",
      "repeated words: ['that', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['do', 'on'], last word: on\n",
      "repeated words: ['a', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['music', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['the', 'n'], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['is', 'on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: in\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: [], last word: out\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['it', 'on'], last word: it\n",
      "repeated words: ['on'], last word: a\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['or'], last word: questions\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on', 'o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['own'], last word: own\n",
      "repeated words: [], last word: n\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['o'], last word: o\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['ow'], last word: ow\n",
      "repeated words: ['on'], last word: on\n",
      "repeated words: ['on'], last word: on\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>i get tired with the song and dance routine</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>emergency here</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>you create a people surprise</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>i think maybe you look at it on air</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>show that they do have problems in a car</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1445</th>\n",
       "      <td>1445</td>\n",
       "      <td>since they don't have the real me</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1446</th>\n",
       "      <td>1446</td>\n",
       "      <td>that's a lot of new sections in the document</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1447</th>\n",
       "      <td>1447</td>\n",
       "      <td>an hour and a half away in</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1448</th>\n",
       "      <td>1448</td>\n",
       "      <td>she points at the other one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1449</th>\n",
       "      <td>1449</td>\n",
       "      <td>while people that is another factor</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1450 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id                                          text\n",
       "0        0   i get tired with the song and dance routine\n",
       "1        1                                emergency here\n",
       "2        2                  you create a people surprise\n",
       "3        3           i think maybe you look at it on air\n",
       "4        4      show that they do have problems in a car\n",
       "...    ...                                           ...\n",
       "1445  1445             since they don't have the real me\n",
       "1446  1446  that's a lot of new sections in the document\n",
       "1447  1447                    an hour and a half away in\n",
       "1448  1448                   she points at the other one\n",
       "1449  1449           while people that is another factor\n",
       "\n",
       "[1450 rows x 2 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# for each sentence in pred_sentence if the last word is repeated, remove all the repeated words, including the last one\n",
    "# e.g. \"hello world day day\" -> \"hello world\"\n",
    "\n",
    "def clean_pred_sentence(sentence):\n",
    "    words = sentence.split()\n",
    "    if not words:\n",
    "        return sentence\n",
    "    last_word = words[-1]\n",
    "\n",
    "    # count occurrences of each word\n",
    "    word_counts = {}\n",
    "    for word in words:\n",
    "        if word in word_counts:\n",
    "            word_counts[word] += 1\n",
    "        else:\n",
    "            word_counts[word] = 1\n",
    "\n",
    "    # find repeated words\n",
    "    repeated_words = [word for word, count in word_counts.items() if count > 1]\n",
    "\n",
    "\n",
    "    \n",
    "\n",
    "    # check what are the repeated words\n",
    "\n",
    "    print(f\"repeated words: {repeated_words}, last word: {last_word}\")\n",
    "\n",
    "    if last_word in repeated_words:\n",
    "        cleaned_words = [word for word in words if word != last_word]\n",
    "    \n",
    "\n",
    "    else:\n",
    "        cleaned_words = words\n",
    "    return ' '.join(cleaned_words)\n",
    "\n",
    "df['text'] = df['text'].apply(clean_pred_sentence)\n",
    "# save the cleaned dataframe\n",
    "df\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a3bf7d94",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fine\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>i get tired with the song and dance routine</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>emergency here</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>you create a people surprise</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>i think maybe you look at it on air</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>show that they do have problems in a car</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1445</th>\n",
       "      <td>1445</td>\n",
       "      <td>since they don't have the real me</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1446</th>\n",
       "      <td>1446</td>\n",
       "      <td>that's a lot of new sections in the document</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1447</th>\n",
       "      <td>1447</td>\n",
       "      <td>an hour and a half away in</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1448</th>\n",
       "      <td>1448</td>\n",
       "      <td>she points at the other one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1449</th>\n",
       "      <td>1449</td>\n",
       "      <td>while people that is another factor</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1450 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id                                          text\n",
       "0        0   i get tired with the song and dance routine\n",
       "1        1                                emergency here\n",
       "2        2                  you create a people surprise\n",
       "3        3           i think maybe you look at it on air\n",
       "4        4      show that they do have problems in a car\n",
       "...    ...                                           ...\n",
       "1445  1445             since they don't have the real me\n",
       "1446  1446  that's a lot of new sections in the document\n",
       "1447  1447                    an hour and a half away in\n",
       "1448  1448                   she points at the other one\n",
       "1449  1449           while people that is another factor\n",
       "\n",
       "[1450 rows x 2 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "submission_df = pd.DataFrame({\n",
    "    \"id\": np.arange(len(df)),\n",
    "    \"text\": df[\"text\"]})\n",
    "\n",
    "submission_df.to_csv(f\"./submission.csv\", index=False)\n",
    "\n",
    "print(\"fine\")\n",
    "submission_df\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7e0e76c7",
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "'break' outside loop (668683560.py, line 1)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  Cell \u001b[0;32mIn[5], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m    break\u001b[0m\n\u001b[0m    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m 'break' outside loop\n"
     ]
    }
   ],
   "source": [
    "break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0fa7b7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "def preprocess_text(text):\n",
    "    \"\"\"\n",
    "    Remove punctuation, strip, and convert text to lowercase.\n",
    "    \"\"\"\n",
    "    return text.translate(str.maketrans('', '', string.punctuation)).strip().lower()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be650bb8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WER: 0.2697\n",
      "BLEU: 66.8566\n",
      "ROUGE-1: 0.8067\n",
      "ROUGE-2: 0.7153\n",
      "ROUGE-L: 0.8064\n",
      "METEOR: 0.7873\n",
      "BERTScore_Precision: 0.6366\n",
      "BERTScore_Recall: 0.6594\n",
      "BERTScore_F1: 0.6468\n"
     ]
    }
   ],
   "source": [
    "import jiwer  # For WER\n",
    "import sacrebleu  # For BLEU\n",
    "from rouge_score import rouge_scorer  # For ROUGE\n",
    "from nltk.translate.meteor_score import meteor_score  # For METEOR\n",
    "import bert_score  # For BERTScore\n",
    "import numpy as np\n",
    "\n",
    "def compute_metrics(text_transcriptions, gpt_decoded):\n",
    "    \"\"\"\n",
    "    Compute various NLP evaluation metrics for text generation.\n",
    "\n",
    "    Args:\n",
    "        text_transcriptions (list): List of ground-truth reference sentences.\n",
    "        gpt_decoded (list): List of model-generated sentences.\n",
    "\n",
    "    Returns:\n",
    "        dict: Dictionary containing all computed metrics.\n",
    "    \"\"\"\n",
    "\n",
    "    #remove punctuation, strip and lower case\n",
    "\n",
    "\n",
    "    text_transcriptions = [preprocess_text(text) for text in text_transcriptions]\n",
    "    gpt_decoded = [preprocess_text(text) for text in gpt_decoded]\n",
    "\n",
    "    results = {}\n",
    "\n",
    "    # WER (Word Error Rate)\n",
    "    wer = jiwer.wer(text_transcriptions, gpt_decoded)\n",
    "    results[\"WER\"] = wer\n",
    "\n",
    "    # BLEU Score\n",
    "    bleu = sacrebleu.corpus_bleu(gpt_decoded, [text_transcriptions]).score\n",
    "    results[\"BLEU\"] = bleu\n",
    "\n",
    "    # ROUGE Scores\n",
    "    rouge = rouge_scorer.RougeScorer([\"rouge1\", \"rouge2\", \"rougeL\"], use_stemmer=True)\n",
    "    rouge_scores = [rouge.score(ref, pred) for ref, pred in zip(text_transcriptions, gpt_decoded)]\n",
    "    results[\"ROUGE-1\"] = np.mean([score[\"rouge1\"].fmeasure for score in rouge_scores])\n",
    "    results[\"ROUGE-2\"] = np.mean([score[\"rouge2\"].fmeasure for score in rouge_scores])\n",
    "    results[\"ROUGE-L\"] = np.mean([score[\"rougeL\"].fmeasure for score in rouge_scores])\n",
    "\n",
    "    ##METEOR\n",
    "    tokenized_references = [ref.split() for ref in text_transcriptions]  # Tokenize reference sentences\n",
    "    tokenized_hypotheses = [pred.split() for pred in gpt_decoded]  # Tokenize predicted sentences\n",
    "\n",
    "    meteor_scores = [meteor_score([ref], pred) for ref, pred in zip(tokenized_references, tokenized_hypotheses)]\n",
    "    results[\"METEOR\"] = np.mean(meteor_scores)\n",
    "    # BERTScore (Semantic Similarity)\n",
    "    P, R, F1 = bert_score.score(gpt_decoded, text_transcriptions, lang=\"en\", rescale_with_baseline=True)\n",
    "    results[\"BERTScore_Precision\"] = P.mean().item()\n",
    "    results[\"BERTScore_Recall\"] = R.mean().item()\n",
    "    results[\"BERTScore_F1\"] = F1.mean().item()\n",
    "\n",
    "    ## save also all values without recomputing when possible\n",
    "    results[\"METEOR_scores\"] = meteor_scores\n",
    "    results[\"ROUGE_scores\"] = rouge_scores\n",
    "\n",
    "    results[\"WER_scores\"] = [jiwer.wer([ref], [pred]) for ref, pred in zip(text_transcriptions, gpt_decoded)]\n",
    "    results[\"BERTScore_F1_scores\"] = F1.cpu().numpy().tolist()\n",
    "    return results\n",
    "\n",
    "\n",
    "sentences = df[\"target_sentence\"].tolist()\n",
    "decoded_sentences = df[\"pred_sentence\"].tolist()\n",
    "\n",
    "metrics = compute_metrics(sentences,decoded_sentences)\n",
    "for metric, score in metrics.items():\n",
    "    if \"scores\" not in metric:\n",
    "        print(f\"{metric}: {score:.4f}\")\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "results_df = pd.DataFrame({\n",
    "    \"target_sentence\": sentences,\n",
    "    \"pred_sentence\": decoded_sentences,\n",
    "})\n",
    "\n",
    "#unfold cer_list\n",
    "# cer_list_unfold = [item for sublist in cer_list for item in sublist]\n",
    "\n",
    "results_df[\"WER_scores\"] = metrics[\"WER_scores\"]\n",
    "results_df[\"METEOR_scores\"] = metrics[\"METEOR_scores\"]\n",
    "results_df[\"ROUGE_scores\"] = metrics[\"ROUGE_scores\"]\n",
    "results_df[\"BERTScore_F1_scores\"] = metrics[\"BERTScore_F1_scores\"]\n",
    "\n",
    "results_df.to_csv(f\"./language_results.csv\", index=False)\n",
    "\n",
    "overall_metrics = {k:v for k,v in metrics.items() if \"scores\" not in k}\n",
    "\n",
    "metrics_df = pd.DataFrame(overall_metrics, index=[0])\n",
    "metrics_df.to_csv(f\"./language_metrics.csv\", index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0912e870",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['You can see the code at this point as well.',\n",
       " 'How does it keep the cost down?',\n",
       " 'Not too controversial.',\n",
       " 'The jury and a judge work together on it.',\n",
       " 'Were quite vocal about it.',\n",
       " 'He said the decision to part ways was mutual.',\n",
       " 'In fact this morning when they were talking.',\n",
       " 'This is like a cruelty joke.',\n",
       " 'Has such a high clay content.',\n",
       " 'Woodworking mastery.',\n",
       " \"Wait a minute we know this thing isn't.\",\n",
       " \"Up in New England where I'm from.\",\n",
       " 'One thing or the other.',\n",
       " \"He's one of the big proponents of that.\",\n",
       " 'I have not gone back lately.',\n",
       " \"To me, it's a treasure.\",\n",
       " 'He is also a member of the Royal Irish Academy.',\n",
       " 'I guess I like to take care of it.',\n",
       " 'Put that back in the saucepan.',\n",
       " 'He does the yard.',\n",
       " \"You just really can't tell what's going to happen.\",\n",
       " 'And who is in charge of making that decision?',\n",
       " 'Not for the job I have now.',\n",
       " \"It's really not too difficult.\",\n",
       " 'Collisions should never happen.',\n",
       " \"I'm trying to think.\",\n",
       " 'Employee controller.',\n",
       " 'Bacon and all that good stuff.',\n",
       " 'If you look back.',\n",
       " 'And it also helps when they were winning.',\n",
       " 'She came last June and watched a game in the sky dome.',\n",
       " 'For me, I had no choice but to move.',\n",
       " \"Then it's twice as hard.\",\n",
       " \"I can't really complain.\",\n",
       " 'When I do recipes I just throw stuff in.',\n",
       " 'They recently released him.',\n",
       " 'One year public service for everybody.',\n",
       " \"It's an eighty seven degree day.\",\n",
       " \"I don't know if they do it all over.\",\n",
       " 'Employee Benefits.',\n",
       " 'How long are we supposed to talk for?',\n",
       " 'You start to take pleasure in it.',\n",
       " 'They had us fill out a long questionnaire.',\n",
       " \"We've had our way of life.\",\n",
       " 'His side of the family.',\n",
       " 'Good to hear from you.',\n",
       " \"I don't know where the answer is.\",\n",
       " \"Where'd you get the car?\",\n",
       " 'And you paint around it.',\n",
       " 'Crime is too much.',\n",
       " 'House robberies.',\n",
       " 'The Grand Canyon.',\n",
       " 'The experience.',\n",
       " \"He said he's been saying one more.\",\n",
       " \"I can't really think of anything else offhand.\",\n",
       " \"I couldn't even think of what you call them.\",\n",
       " 'I grew up water skiing.',\n",
       " 'They waited a couple years.',\n",
       " 'I have a gold one.',\n",
       " \"There's not a whole lot.\",\n",
       " 'There was no word on casualties.',\n",
       " 'It kind of wound down.',\n",
       " \"But my boss wouldn't pay for it.\",\n",
       " 'My mother was complaining last year about that.',\n",
       " 'Very scary to see what they found out.',\n",
       " 'Not too much soy sauce.',\n",
       " 'None of your business.',\n",
       " 'He put up all of his ash trays.',\n",
       " \"He didn't just say.\",\n",
       " 'If there was a credible punishment.',\n",
       " \"I've been pretty successful with that.\",\n",
       " 'In the previous version.',\n",
       " 'Was it third of December?',\n",
       " 'Invasion of Burma.',\n",
       " \"I guess we don't really use that many tin cans.\",\n",
       " \"We're a sit down together family.\",\n",
       " \"I'm originally from Maine.\",\n",
       " 'He had a hot pad.',\n",
       " \"Sometimes they're not very open.\",\n",
       " \"Right now I'm getting about sixty bucks a month.\",\n",
       " 'That kind of gas mileage.',\n",
       " \"He won't do that yet.\",\n",
       " \"I mean it's dying now.\",\n",
       " 'It was written many years ago.',\n",
       " 'Do you go by the ads when you look at them?',\n",
       " 'Do you get cable?',\n",
       " 'And this gets back to capital punishment.',\n",
       " 'Here are some key points from the briefing.',\n",
       " 'Ninety seven cents a week.',\n",
       " 'I think the roles now are less defined.',\n",
       " 'We started taking her.',\n",
       " 'So thank you for not using the exploit.',\n",
       " \"Their economy's a little bit weak.\",\n",
       " 'Employees will not get any severance pay.',\n",
       " 'I really feel bad for the people I see there.',\n",
       " 'Even in my case.',\n",
       " 'According to the judicial system.',\n",
       " 'Afford the payments on a used car.',\n",
       " 'Couple of hundred kids in the band.',\n",
       " 'I found that really helps.',\n",
       " 'That sort of stuff.',\n",
       " \"I know we've had this one lady that was attacked.\",\n",
       " 'We went to Colorado Springs.',\n",
       " \"Now it doesn't bother me at all.\",\n",
       " \"Don't have enough money.\",\n",
       " \"But I haven't told my husband.\",\n",
       " 'I have some neighbors across the field.',\n",
       " 'Have some part of the law.',\n",
       " 'Once the children were grown.',\n",
       " 'Special to the Detroit Free Press.',\n",
       " \"I'm doing all right.\",\n",
       " \"I guess it's close enough.\",\n",
       " 'A good appearance to do a good job where you work.',\n",
       " 'People donate more money.',\n",
       " 'People put them in.',\n",
       " 'Every time I do something.',\n",
       " 'Is there a basis for it?',\n",
       " 'I think because it costs less.',\n",
       " \"It's a weakness and all.\",\n",
       " \"It's a eighty eight.\",\n",
       " 'Things are free enough.',\n",
       " 'At the time you hired on with this company.',\n",
       " 'For the things that we want.',\n",
       " 'Streaming season is opening boys!',\n",
       " \"It's already done for you.\",\n",
       " 'He had been in that position for years.',\n",
       " \"Outside the building it's not bad.\",\n",
       " \"The victims' families and things.\",\n",
       " \"I don't want to do anything.\",\n",
       " \"You know I don't know.\",\n",
       " \"It's difficult to really say why.\",\n",
       " 'There was a voice in her head.',\n",
       " 'Lawmakers passed a measure last year.',\n",
       " \"I'd be curious.\",\n",
       " \"It's like a major production.\",\n",
       " 'We ended up watching it for a couple of hours.',\n",
       " 'You live in Garland.',\n",
       " 'Anything on that.',\n",
       " \"He's never gone away.\",\n",
       " 'What will be a luxury in the future?',\n",
       " 'Fiction books that I really like.',\n",
       " 'I was really working with the middle class.',\n",
       " 'You have to get everything replaced.',\n",
       " 'There would be no way to do it.',\n",
       " 'What is it called?',\n",
       " 'They do take up a lot of your time.',\n",
       " \"I'm still going.\",\n",
       " 'What would you find if you just kept on going?',\n",
       " 'Helps them understand the world.',\n",
       " \"I do study what's going on in the economy.\",\n",
       " 'I like that they run tense.',\n",
       " 'Just like how you said my father was.',\n",
       " \"You're going to get it.\",\n",
       " 'Guided by voices.',\n",
       " 'When was the last time we measured you?',\n",
       " 'I wanted him to win best actor on top of it.',\n",
       " 'I heard this on a Christian program.',\n",
       " 'You look down at your arm.',\n",
       " 'The Bermuda Triangle.',\n",
       " \"He's nine months old.\",\n",
       " 'It is like a country.',\n",
       " 'A male profession.',\n",
       " 'The islanders sleep inside.',\n",
       " 'They are the Detroit delegates.',\n",
       " 'They have coupons.',\n",
       " \"I can't believe they can.\",\n",
       " 'You just have to keep buying them.',\n",
       " 'In a new house everything is white.',\n",
       " 'Easiest way out maybe.',\n",
       " 'That will help solve our problems.',\n",
       " 'What kind of things do you think can be done.',\n",
       " 'I try to make sure.',\n",
       " 'There are also these various disclosures.',\n",
       " 'Enjoy the Holidays.',\n",
       " 'No easy choices.',\n",
       " 'Drunk drivers kill people.',\n",
       " 'On probation or parole and killed someone else.',\n",
       " 'This brings me to the next point.',\n",
       " \"I don't think they're in a conspiracy.\",\n",
       " 'A million dollars a year.',\n",
       " \"Trump's budget is just a proposal at this point.\",\n",
       " \"We've had one as long as I can remember.\",\n",
       " 'However, there is one key point to keep in mind.',\n",
       " \"It's working up to a year now.\",\n",
       " 'A house full of snow.',\n",
       " 'All those European countries.',\n",
       " 'All of those things that one does with kids.',\n",
       " \"That aren't getting taught at home.\",\n",
       " \"That's what they said.\",\n",
       " 'Tax year of eighty one.',\n",
       " \"That's a big concern if you live there.\",\n",
       " 'So the doctor elected to have her.',\n",
       " 'Did they mail that to you?',\n",
       " 'Tim would like to take on other genres.',\n",
       " \"Can't give up on it.\",\n",
       " 'Through the newspaper reviews.',\n",
       " 'We should see to our own lives.',\n",
       " 'What have you seen?',\n",
       " 'What do you like to do this time of year.',\n",
       " \"She didn't announce that to you.\",\n",
       " 'I never even knew that.',\n",
       " 'There was one other person besides myself.',\n",
       " \"You don't have to hide it either.\",\n",
       " \"You can't get all of us.\",\n",
       " 'I think the newsmen.',\n",
       " \"That's understandable.\",\n",
       " 'We see them at least once a week.',\n",
       " 'That was a shocker to me.',\n",
       " 'It came out I guess about a month.',\n",
       " 'I saw all seven games of that.',\n",
       " 'The decision was not even close.',\n",
       " 'We always seem to.',\n",
       " 'The house payment.',\n",
       " 'Tactical decision.',\n",
       " \"That's what's happening here.\",\n",
       " 'They have a lot of cattle in the area.',\n",
       " \"That's one of the biggest ones I've seen.\",\n",
       " 'My name is Pat Johnson and I live in Texas.',\n",
       " 'They can if they want to.',\n",
       " 'A liberal arts school.',\n",
       " 'In the Houston area.',\n",
       " 'What kind of puppy you got?',\n",
       " 'You actually hand quilted it yourself.',\n",
       " 'Got into it when I was young.',\n",
       " \"It's depressing too.\",\n",
       " 'They live with us.',\n",
       " \"He's just goofing off like he always has.\",\n",
       " \"It's nothing like what it used to be.\",\n",
       " 'We try to do one thing once a year.',\n",
       " 'You can come and go as you please.',\n",
       " 'You just call him direct.',\n",
       " 'Owning on them and making payments on them.',\n",
       " 'You can have a variety.',\n",
       " 'You have to fly through.',\n",
       " 'Just the regular channel.',\n",
       " 'He plays pretty regularly.',\n",
       " \"You don't feel super cold.\",\n",
       " 'What are the problems with public education?',\n",
       " 'Oh well, no point in dwelling on the past.',\n",
       " 'You have to pay the interest.',\n",
       " \"I'm old fashioned.\",\n",
       " \"It's just a matter of passing the law.\",\n",
       " 'Zoology department.',\n",
       " 'Not that she will remember them.',\n",
       " \"It didn't matter.\",\n",
       " \"That's still not enough for a total.\",\n",
       " \"I'm not saying that they were.\",\n",
       " 'Pay and exhaustion and you know.',\n",
       " 'All my aunts and uncles.',\n",
       " 'A restaurant that employs minorities.',\n",
       " \"It's just one story.\",\n",
       " 'There was one point I was going to make.',\n",
       " 'I have more energy when I exercise.',\n",
       " 'Probably seventies.',\n",
       " 'The court is yet to make its decision public.',\n",
       " \"He's not that old.\",\n",
       " \"He's an indoor cat.\",\n",
       " \"It's a nightmare.\",\n",
       " 'That kind of threw them off.',\n",
       " \"I'm mostly the easy cooker.\",\n",
       " \"It's not anything like Kansas.\",\n",
       " \"We're within walking distance.\",\n",
       " \"I don't care for that at all.\",\n",
       " \"That's a previous generation.\",\n",
       " 'I get great distance hitting it.',\n",
       " 'The kids like to go out.',\n",
       " 'I got it right after high school.',\n",
       " 'Regime change in Iran.',\n",
       " 'There is some definite snark in her report.',\n",
       " 'As far as that goes.',\n",
       " 'Not in terms of north and south.',\n",
       " 'A few minutes ago.',\n",
       " 'He was a courier.',\n",
       " 'Here in Indiana we pay.',\n",
       " \"We didn't do it very much.\",\n",
       " \"It's not at all anything worth talking about.\",\n",
       " 'They can pretty much get everybody there.',\n",
       " 'Have their bellies rubbed and everything.',\n",
       " 'Ambiance down there.',\n",
       " 'Go over there.',\n",
       " 'Was this his entire family?',\n",
       " 'Is she going to stay home?',\n",
       " 'I look at homemaking as a job also.',\n",
       " \"My family's not very big.\",\n",
       " 'They are tempting at times.',\n",
       " \"You'll have one or two that are good.\",\n",
       " 'Put a cramp in your viewing.',\n",
       " 'The vines are really taking over.',\n",
       " \"Governor, the people of Detroit don't forget.\",\n",
       " 'Seven or something like that.',\n",
       " 'Join the gang.',\n",
       " 'I have a standard.',\n",
       " \"I don't want any interruptions.\",\n",
       " \"And that's kind of the way I was raised.\",\n",
       " 'I love all the windows that they have out now too.',\n",
       " \"You know I don't.\",\n",
       " \"It'll go through the slot.\",\n",
       " 'A fine profession.',\n",
       " 'All the money.',\n",
       " 'The thing that should be.',\n",
       " 'If you are convinced that it is the right choice.',\n",
       " \"Because that's perfectly clean.\",\n",
       " \"Make him feel like he wasn't wanted.\",\n",
       " 'We decided to stay.',\n",
       " \"They're just so suspenseful.\",\n",
       " \"I don't like administration.\",\n",
       " 'Did you have to do that?',\n",
       " 'I really enjoy that team.',\n",
       " \"I can't think of his name now.\",\n",
       " 'In fact the first year we were married.',\n",
       " \"I've got a two year old.\",\n",
       " \"That's what the different types are.\",\n",
       " 'When I came back here.',\n",
       " 'Because she makes clothes.',\n",
       " \"I don't water them or anything.\",\n",
       " 'I did well in school.',\n",
       " 'A producer of movies in Baltimore called.',\n",
       " 'Just all different colors.',\n",
       " 'Barbecues and Mexican food.',\n",
       " 'Congratulations on that.',\n",
       " 'Especially not in some of these big cities.',\n",
       " 'I have my own policy on freeloaders.',\n",
       " 'The nursing home.',\n",
       " 'Snuggle up to you.',\n",
       " \"I don't know if you've heard of her.\",\n",
       " 'But bringing the party together will be easier said than done.',\n",
       " 'I have seen so many who know that God has not given us a spirit of fear.',\n",
       " 'I try to embrace it.',\n",
       " 'We all know that, unfortunately, schools are not always the safest places.',\n",
       " 'No deaths have been reported in the city.',\n",
       " 'He has no criminal convictions.',\n",
       " 'Thank you for signing up.',\n",
       " 'This is the price paid for counting eggs rather than chicken embryos.',\n",
       " 'Grassley grew up on a family farm, where he still works.',\n",
       " 'We decided to ask them.',\n",
       " 'And everyone involved in the case, bar the jury, had been here before.',\n",
       " 'A dog is featured below the text.',\n",
       " 'Lo and behold, they did not.',\n",
       " 'Oh, so you need to put away the milk and cereal?',\n",
       " 'Everything Trump wants to do threatens everything Trudeau wants to do.',\n",
       " 'Peter was in bed when I walked into the bedroom.',\n",
       " 'How does she live with herself?',\n",
       " \"I'm not even disputing the results of the board.\",\n",
       " 'And Hurricane Jose had not turned north yet.',\n",
       " 'The two have since taken some steps toward a rapprochement.',\n",
       " 'But there have often been times when Turkey has been a problematic ally.',\n",
       " 'Check out the new Sales items!',\n",
       " 'I just stood there and soaked it all up.',\n",
       " \"That's a good way to get started.\",\n",
       " 'I was on crutches.',\n",
       " 'When you stop to think of it.',\n",
       " 'It could be used to hassle somebody.',\n",
       " 'America is losing it.',\n",
       " \"I don't know whether you did yours first.\",\n",
       " 'The sticky slide rugs under the carpet.',\n",
       " 'I think the majority are in that.',\n",
       " 'The people I know are from there.',\n",
       " \"I haven't seen too many lately.\",\n",
       " \"I've only seen him in funny stuff.\",\n",
       " 'For a different section.',\n",
       " 'Their mother was sick at times.',\n",
       " 'Wilkinson had no regrets about her decision.',\n",
       " 'My three year old.',\n",
       " 'I really just started.',\n",
       " \"If you're on a trip or something.\",\n",
       " 'She handles it pretty well.',\n",
       " 'We ran into some problems.',\n",
       " 'I just got the new issue.',\n",
       " \"You'll still see people.\",\n",
       " 'To separate the news from the comment.',\n",
       " 'Like the old Hank Williams.',\n",
       " 'I would work in the summer.',\n",
       " 'Taylor attributed this to confirmation bias.',\n",
       " 'We apologize, but this video has failed to load.',\n",
       " \"Go talk to her in the lobby, I'll be right down!\",\n",
       " 'This project has been a real team effort.',\n",
       " 'He can consistently score the damage you need to prowl your hand into play.',\n",
       " 'This is headache powders revisited.',\n",
       " 'Understanding the mobile landscape.',\n",
       " 'Trump has nominated William Barr as the next attorney general.',\n",
       " 'Pasquale thinks a similar argument should be made for search engines.',\n",
       " 'The Hercules and rainbow stag beetles are highlights here.',\n",
       " 'A Visual Comparison of Various Distances',\n",
       " 'I could hear my accelerated breathing.',\n",
       " 'The news and stories that matter, delivered weekday mornings.',\n",
       " 'Which Amiga games impressed you in terms of gameplay or technical tricks?',\n",
       " 'Also, at that time we were starting to hate each other a little bit.',\n",
       " 'Are either or both dogs considered dangerous under Davis County ordinance?',\n",
       " 'Target and Difficulty Calculations',\n",
       " 'What is beef jerky, anyway?',\n",
       " 'The quantity you chose exceeds the quantity available.',\n",
       " 'But even at its most scientific, the concept is simple.',\n",
       " 'Are they the same there?',\n",
       " 'Is it a novelty or a real coin of importance?',\n",
       " 'Water into this bowl.',\n",
       " 'We just left it alone.',\n",
       " 'You can always heat it up again.',\n",
       " 'Trying to find someone at home.',\n",
       " 'Sort of like an exchange program.',\n",
       " \"I don't know how we could make it more fair.\",\n",
       " 'Better than Europe.',\n",
       " 'To go out fishing in a boat.',\n",
       " 'About four hundred showed up.',\n",
       " 'We came from living in a condo for nine years.',\n",
       " 'They were pretty much in good taste.',\n",
       " 'I never quite found a New York fan.',\n",
       " 'Before you realize anything is going on.',\n",
       " \"When I'm up here.\",\n",
       " \"And that's what it falls under.\",\n",
       " 'I never go to the fiction section.',\n",
       " 'Our sleeping bags I guess.',\n",
       " 'See things like that.',\n",
       " \"I don't get that.\",\n",
       " \"We're going to have to do something.\",\n",
       " \"I wasn't really.\",\n",
       " \"We think it's good.\",\n",
       " 'It is in agreement with its provisions.',\n",
       " 'I can really tell the difference.',\n",
       " 'The last book I read.',\n",
       " 'Almost like a tingling.',\n",
       " 'I think that could work as a toy.',\n",
       " 'I noticed those dogs.',\n",
       " 'Those games are fun to watch.',\n",
       " 'Things of that nature which made us feel good.',\n",
       " 'What kind of dog do you have.',\n",
       " 'They are going to get a gun no matter what.',\n",
       " 'A little brief autobiography of themselves.',\n",
       " 'On occasion I can wear jeans.',\n",
       " 'The news was first reported by Deadline Hollywood.',\n",
       " 'Students do not like this response.',\n",
       " 'There are no more vehicle tokens spread across the map.',\n",
       " 'Where is the evidence that they possess consciousness?',\n",
       " 'Nevertheless I roll my eyes as I get up.',\n",
       " 'People will do humiliating, grotesque things for ambition.',\n",
       " 'Several correspondents had their notebooks searched.',\n",
       " 'They send off emissaries to start new cancer colonies.',\n",
       " 'You actually look forward to foxes or raccoons raiding your garbage.',\n",
       " 'For example, the face might be typically painted red, black and white.',\n",
       " 'It was given to you the minute you cast your ballot.',\n",
       " 'Check out eBay, Craigslist and other online sites for deals.',\n",
       " 'Sign up for our daily newsletter of the top stories in Courier country.',\n",
       " 'Such threats are a violation of the UN Charter.',\n",
       " 'Never miss a moment!',\n",
       " 'He is not making any major changes for the rematch.',\n",
       " 'Worse is the implication of blame.',\n",
       " \"I didn't buy a kit.\",\n",
       " 'Once they have children over here.',\n",
       " 'My husband and all the men.',\n",
       " \"To ask automakers for more jobs won't work.\",\n",
       " 'I like just looking at the billboards.',\n",
       " \"That's true of any sport.\",\n",
       " 'I would not mind it.',\n",
       " \"I'm concerned.\",\n",
       " \"The motivation isn't there for a lot of people.\",\n",
       " 'So you enjoy gardening?',\n",
       " 'She wheeled it out on a cart.',\n",
       " 'Thanks for hearing me rap.',\n",
       " \"We're living longer and people are less trusting.\",\n",
       " 'The choice is yours.',\n",
       " 'Take off and leave your group and go explore.',\n",
       " 'Are you recycling?',\n",
       " 'Healthy vision.',\n",
       " 'They was just bad side effects.',\n",
       " 'I just bought a new house.',\n",
       " \"It's like we've lost our values in this country.\",\n",
       " 'It is time we made that decision together.',\n",
       " 'So many of them nowadays.',\n",
       " 'Basically the guy commits.',\n",
       " \"So it's been real fun here to see.\",\n",
       " 'waves born exercising boat grateful',\n",
       " 'shop spraying mathematics developing nashville',\n",
       " 'run bargains hopeless persists meals',\n",
       " 'keeps allowed monarch strap uses',\n",
       " 'casualty communism argue frost receptacle scroll',\n",
       " 'supported according key arrive crush',\n",
       " 'playing traditional used flat',\n",
       " \"turquoise teams dollars concentration wind's stupidly\",\n",
       " 'pops against idol accurate crop steak',\n",
       " 'kept touring shallows hang everyone',\n",
       " 'terms news earning by commitment',\n",
       " 'grandparents engagement throw stage',\n",
       " 'elk diplomatic plates reimbursed names',\n",
       " 'first terror slips done',\n",
       " 'ice hairs blooming coming opinion take',\n",
       " 'semesters plans lean zales fertilizers dementia',\n",
       " 'grade nucleus arrive excess destroying',\n",
       " 'bonsai meeting ghost mainly',\n",
       " 'resale telling serious understanding moral',\n",
       " 'democratically reduces carbon cramped baked',\n",
       " \"We're paying basketball people.\",\n",
       " 'It sounds like you have really strong views on it.',\n",
       " 'On Sunday the snow and ice came in.',\n",
       " 'But drastic times could call for drastic measures.',\n",
       " 'Either savings or investment.',\n",
       " 'Magnified vision, able to see at night.',\n",
       " \"Of course my job was such that I didn't.\",\n",
       " 'Blizzards now have a new visual effect.',\n",
       " \"It's like a joke I heard once.\",\n",
       " 'After we got married we moved.',\n",
       " 'Live without dessert for the most part.',\n",
       " \"I don't cook anymore.\",\n",
       " 'A lot of people complain.',\n",
       " \"She'd just get on the first step and lay down.\",\n",
       " 'It seems like you get hit the worst.',\n",
       " 'They got real fat.',\n",
       " 'I mean just nothing.',\n",
       " 'While my oldest was a year old.',\n",
       " 'So it was really too late to do much.',\n",
       " \"I'm tired of being a pacifist though.\",\n",
       " \"Maybe they're counting on that.\",\n",
       " 'Being able to have a choice.',\n",
       " 'We want to stay forever.',\n",
       " 'Of course they ate a lot of sea food.',\n",
       " 'It is estimated a total of three thousand properties were affected.',\n",
       " 'Not all men pay their employees less?',\n",
       " 'Also known as welfare.',\n",
       " 'Will the game be priced differently during and after Early Access?',\n",
       " 'Then reboot the system.',\n",
       " 'The product I tried was their mini corn dogs which were superb!',\n",
       " 'Vancouver Teacher Faces Disciplinary Action for Harassing Gay Student',\n",
       " 'How To Avoid Being Tracked on The Internet',\n",
       " 'He was that exceptional.',\n",
       " 'Details of this plot were reported two years ago.',\n",
       " 'We are absolutely as excited about this as you!',\n",
       " 'What other questions would you ask?',\n",
       " 'So, what about the enemies of the Assad regime?',\n",
       " 'Its too valuable to just let people stay in, sorry.',\n",
       " \"And if I'm so blessed by the gods, I'll never have to kill again.\",\n",
       " \"Your birthday and age won't be visible to other users.\",\n",
       " 'Please give me a paramedic.',\n",
       " 'A man with a small pension is a ward of the Government.',\n",
       " \"This week's pick is an algorithm that can diagnose a stroke.\",\n",
       " 'Yet the president is not wrong to be exasperated and enraged.',\n",
       " 'The birch canoe slid on the smooth planks.',\n",
       " 'Glue the sheet to the dark blue background.',\n",
       " \"It's easy to tell the depth of a well.\",\n",
       " 'These days a chicken leg is a rare dish.',\n",
       " 'Rice is often served in round bowls.',\n",
       " 'The juice of lemons makes fine punch.',\n",
       " 'The box was thrown beside the parked truck.',\n",
       " 'The hogs were fed chopped corn and garbage.',\n",
       " 'Four hours of steady work faced us.',\n",
       " 'The small pup gnawed a hole in the sock.',\n",
       " 'The fish twisted and turned on the bent hook.',\n",
       " 'Press the pants and sew a button on the vest.',\n",
       " 'The swan dive was far short of perfect.',\n",
       " 'The beauty of the view stunned the young boy.',\n",
       " 'Two blue fish swam in the tank.',\n",
       " 'Her purse was full of useless trash.',\n",
       " 'The colt reared and threw the tall rider.',\n",
       " 'It snowed, rained, and hailed the same morning.',\n",
       " 'Read verse out loud for pleasure.',\n",
       " 'Hoist the load to your left shoulder.',\n",
       " 'A vegetable garden.',\n",
       " 'It really helps those people.',\n",
       " 'I never really thought of it that way.',\n",
       " \"I don't know who looks forward to it more.\",\n",
       " 'A wide variety.',\n",
       " \"We can't find a place that will take everything.\",\n",
       " 'When they cut in and speak.',\n",
       " \"They're grown now.\",\n",
       " 'It comes down to measuring.',\n",
       " 'Very suspenseful.',\n",
       " 'Is that pollution?',\n",
       " 'The productivity and the training costs.',\n",
       " \"They're always willing to help you out.\",\n",
       " 'In the mountains.',\n",
       " 'The air is always cool.',\n",
       " \"That's the way I feel.\",\n",
       " 'What kind of running do you do?',\n",
       " \"I don't think it's quite as green.\",\n",
       " 'How large is Williams?',\n",
       " \"I think we've got to do more with recycling.\",\n",
       " 'I have many a time called him to come get me.',\n",
       " 'He worked hard at it.',\n",
       " 'The frosty air passed through the coat.',\n",
       " 'The crooked maze failed to fool the mouse.',\n",
       " 'Adding fast leads to wrong sums.',\n",
       " 'The show was a flop from the very start.',\n",
       " 'A saw is a tool used for making boards.',\n",
       " 'The wagon moved on well oiled wheels.',\n",
       " 'March the soldiers past the next hill.',\n",
       " 'A cup of sugar makes sweet fudge.',\n",
       " 'Place a rosebush near the porch steps.',\n",
       " 'Both lost their lives in the raging storm.',\n",
       " 'We talked of the side show in the circus.',\n",
       " 'Use a pencil to write the first draft.',\n",
       " 'He ran half way to the hardware store.',\n",
       " 'The clock struck to mark the third period.',\n",
       " 'A small creek cut across the field.',\n",
       " 'Cars and busses stalled in snow drifts.',\n",
       " 'The set of china hit the floor with a crash.',\n",
       " 'This is a grand season for hikes on the road.',\n",
       " 'The dune rose from the edge of the water.',\n",
       " 'Those words were the cue for the actor to leave.',\n",
       " 'A yacht slid around the point into the bay.',\n",
       " 'The two met while playing on the sand.',\n",
       " 'The ink stain dried on the finished page.',\n",
       " 'The walled town was seized without a fight.',\n",
       " 'The lease ran out in sixteen weeks.',\n",
       " \"It's just pocket change to a lot of people.\",\n",
       " 'They told me that this was the topic.',\n",
       " 'Are you involved in any other things?',\n",
       " 'The other thing to do.',\n",
       " 'They both cooperate together.',\n",
       " 'It got too cold up there.',\n",
       " \"So, let's make some conclusions.\",\n",
       " 'Train accidents and everything else.',\n",
       " \"I guess that's about it.\",\n",
       " \"I don't think it's a good idea.\",\n",
       " 'Especially for repeat offenders.',\n",
       " \"Since we've been married, I've stopped going.\",\n",
       " \"You don't want to or you don't have the time?\",\n",
       " 'Others said they were disappointed.',\n",
       " \"Somebody's going to change it.\",\n",
       " 'This is easy for me.',\n",
       " \"It's not as severe.\",\n",
       " \"If you don't refute it.\",\n",
       " 'Thank you for participating.',\n",
       " 'Employers have also voiced concerns.',\n",
       " 'An apartment or a home?',\n",
       " 'Inside the jail there.',\n",
       " 'What should be done to avoid all these problems?',\n",
       " \"They don't register.\",\n",
       " \"It's called reviewing my life.\",\n",
       " 'How are you doing?',\n",
       " 'Do you think this is right?',\n",
       " \"This is good, isn't it?\",\n",
       " 'I am also doing this.',\n",
       " 'I feel that we should help them.',\n",
       " 'What should we do now?',\n",
       " 'How are you?',\n",
       " 'Can you show me the way?',\n",
       " 'Many people will come here.',\n",
       " 'She got this from me.',\n",
       " 'This is really very good.',\n",
       " 'We have worked a lot on this.',\n",
       " 'I guess that is very good.',\n",
       " 'I will make it work.',\n",
       " 'I think we all do, right?',\n",
       " \"I wasn't saying this at all.\",\n",
       " 'I have to pay for four things.',\n",
       " 'Call me once you get here.',\n",
       " 'What part of this is hard?',\n",
       " \"There's always a way out of this.\",\n",
       " 'He came by looking for you.',\n",
       " \"They are very mean, I don't like it.\",\n",
       " 'Is there anything to do for me?',\n",
       " 'What could you do in a few days?',\n",
       " 'Remember to let other people through first.',\n",
       " 'Show me what you have got.',\n",
       " \"I can't believe this is true.\",\n",
       " 'I used their water.',\n",
       " 'I like the last bit of this movie.',\n",
       " 'Be nice to each other.',\n",
       " 'I will stay with my family for a week.',\n",
       " 'I went back to get the kids.',\n",
       " 'Something seems off with her.',\n",
       " 'She gave me a new watch.',\n",
       " 'I like to enjoy my life in the country.',\n",
       " 'Do you still care about your job?',\n",
       " 'She lives in the house right next to me.',\n",
       " 'Are they both still around?',\n",
       " 'I get less time to be with children these days.',\n",
       " 'Years have gone by.',\n",
       " 'Can we use this for something?',\n",
       " 'What is the point of all this?',\n",
       " \"I can't think of a better time.\",\n",
       " 'I hope to see you there.',\n",
       " 'We should at least try this out.',\n",
       " 'I would love to have more of these too.',\n",
       " \"It's great you could join us here.\",\n",
       " 'Keep this with you for now.',\n",
       " 'Someone thought this show was very bad.',\n",
       " \"I don't like this either.\",\n",
       " 'How far do we have to go.',\n",
       " 'Are you able to come with me next week?',\n",
       " \"Can you guess what's in this?\",\n",
       " 'Will you be around next week?',\n",
       " \"What's different about this?\",\n",
       " 'This house looks very big.',\n",
       " \"What's your point?\",\n",
       " 'Some people are quite good at this.',\n",
       " 'What would you like to do first?',\n",
       " 'I would love to be a part of this.',\n",
       " \"crime men turn couldn't morning somewhere.\",\n",
       " \"sports miss companies aren't television.\",\n",
       " \"drug newspaper you'll report baby.\",\n",
       " \"won't choices couldn't thirty cards.\",\n",
       " \"avoid anymore we'd guys oil.\",\n",
       " 'funny past decisions dallas future bring.',\n",
       " 'variety yours originally check second.',\n",
       " 'child boys classical clothes team.',\n",
       " 'store somewhere unusual helps miss.',\n",
       " 'course girl exactly fan watching.',\n",
       " \"you'll middle benefits education lives.\",\n",
       " 'mother testing second easier program number.',\n",
       " 'coming sun york favorite food.',\n",
       " \"grew spend men necessarily you've.\",\n",
       " 'vote month expected fan nursing.',\n",
       " 'thank york catch along set.',\n",
       " 'mother team system amount texas.',\n",
       " 'story noise sounds eight friends.',\n",
       " 'line talked turn depends across weekend.',\n",
       " 'Large number of employees will miss this.',\n",
       " 'This program will help our growing team.',\n",
       " 'My baby grew a lot in the first six months.',\n",
       " 'This policy is important for social interest.',\n",
       " 'I know what the deal was in the past.',\n",
       " 'Quality education will certainly help.',\n",
       " 'This does not appeal to me at all.',\n",
       " 'They gave variety of benefits to their employees.',\n",
       " 'We have a long evening ahead of us.',\n",
       " 'Different choices between past and future.',\n",
       " 'This will fall down soon.',\n",
       " 'American teams are clear this season.',\n",
       " 'Nothing will change my heart on this matter.',\n",
       " 'She was supposed to go with me.',\n",
       " 'He says he was paid today.',\n",
       " 'School gave a card and books to this small child.',\n",
       " 'Parents of small children care about this.',\n",
       " 'She was talking her time to do the work.',\n",
       " 'This policy mostly sounds right to me.',\n",
       " 'You should avoid this course.',\n",
       " 'Especially if you are coming this weekend.',\n",
       " 'We will listen to whatever jury says.',\n",
       " 'I will give you mine for free.',\n",
       " 'How much is your card worth?',\n",
       " 'Please order our regular food for everyone.',\n",
       " 'It pertains to my daily life at the present time.',\n",
       " 'But you have friends that have children.',\n",
       " 'Stones with runes on them served as checkpoints.',\n",
       " 'There was a story of a woman last year.',\n",
       " 'A medical problem.',\n",
       " 'What area of the country do you live in?',\n",
       " 'This happened about a week ago.',\n",
       " 'It seems like you walk quicker.',\n",
       " 'In the district.',\n",
       " 'They started looking into programs.',\n",
       " 'Join us for an upcoming event.',\n",
       " 'Do you believe in the Dallas Cowboys?',\n",
       " 'Do you find yourself funny?',\n",
       " \"To get into the system's very difficult.\",\n",
       " 'Second generation.',\n",
       " 'Real rough type camping.',\n",
       " 'So I made my own version.',\n",
       " 'They said it was really riveting.',\n",
       " 'That would be good.',\n",
       " \"I'm not building any reserves.\",\n",
       " 'Just put your paper in the same place every day.',\n",
       " 'Pull all this weight.',\n",
       " 'Bring the pot to a boil.',\n",
       " \"It's my voice.\",\n",
       " 'What does the American report say?',\n",
       " 'Social states give more benefits to employees.',\n",
       " 'I am taking my car to check this out in the evening.',\n",
       " 'This car is super expensive.',\n",
       " 'Her friend will thank her for the card.',\n",
       " 'Originally, it was a year long nursing program.',\n",
       " 'What happened to the sound?',\n",
       " 'Tell me your education story.',\n",
       " \"There's good music and good food.\",\n",
       " 'What is the cost of each piece you sell?',\n",
       " 'I often listen to this wonderful music.',\n",
       " 'I have to go across the city to see her.',\n",
       " 'My teachers exactly knew my mind.',\n",
       " 'How long have you been married?',\n",
       " 'Your experience is very good for this job.',\n",
       " 'His good luck will help him avoid problems.',\n",
       " 'I am free to make a decision about my college.',\n",
       " 'Avoid capital punishment.',\n",
       " 'Getting married is not a crime.',\n",
       " 'What happened after I left?',\n",
       " \"Let's make a program for kids.\",\n",
       " 'Light clothes are bad.',\n",
       " 'How many miles were you driving today?',\n",
       " 'Government will run from the capital city.',\n",
       " 'They will take up recycling from now on.',\n",
       " 'My yard is very clean today.',\n",
       " 'I am not a fan of this kind of music.',\n",
       " 'Nothing is more important to him than power.',\n",
       " 'I like to work in this small room.',\n",
       " 'Their situation is not as bad.',\n",
       " \"This is teacher's favourite topic.\",\n",
       " 'She has experience in education.',\n",
       " 'This will not matter much in the long run.',\n",
       " 'I tried a lot, but this is still wrong.',\n",
       " 'I paid all my taxes the night before.',\n",
       " 'This is a growing company with many employees.',\n",
       " 'The law gives everyone this one right.',\n",
       " 'Listen to your parents right now.',\n",
       " \"What's the occasion today?\",\n",
       " 'Your mother said this is enough.',\n",
       " 'How many books on law do you have?',\n",
       " 'Which book are you reading to children?',\n",
       " 'This seems like a large catch.',\n",
       " 'Do you have your credit card?',\n",
       " 'The couple got married last week.',\n",
       " 'I have lived in Illinois and Dallas.',\n",
       " 'Television service is awful here.',\n",
       " 'The jury cannot change this fact.',\n",
       " 'I was in the nursing school originally.',\n",
       " 'Recycling law depends on your city.',\n",
       " \"I'm watching my kid's education.\",\n",
       " 'I bought these new clothes today.',\n",
       " 'This may seem important now.',\n",
       " 'Tax season is certainly interesting.',\n",
       " \"There's so much noise in the capital city.\",\n",
       " 'Your story is not clear anymore.',\n",
       " 'This room is quite neat.',\n",
       " 'Children do not like punishment.',\n",
       " 'My wife thinks this is not worth the time.',\n",
       " 'They make less money in this season with snow.',\n",
       " 'Will you spend all that time reading your book?',\n",
       " \"I am here for my friend's company.\",\n",
       " 'You should vote for the future.',\n",
       " 'The morning sun light is white.',\n",
       " 'My teachers are very helpful.',\n",
       " 'Is this noise from the TV?',\n",
       " \"Eat your food, it's getting cold.\",\n",
       " 'Who benefits from this policy?',\n",
       " 'My daughter likes to play outside in the yard.',\n",
       " 'He will understand when I talk to him.',\n",
       " 'How many casualties were there?',\n",
       " 'Luxury does not have to be expensive.',\n",
       " 'I like all this open space around the house.',\n",
       " 'Evening is the best time for reading.',\n",
       " 'Nothing can stop me from doing this.',\n",
       " 'She will sit across the room.',\n",
       " 'Seventy awful years.',\n",
       " 'Appeal of luxury things is something else.',\n",
       " 'This is a special city in the special country.',\n",
       " 'Certain people enjoy this kind of music.',\n",
       " 'Camping is getting more expensive.',\n",
       " 'Who thought of this wonderful idea?',\n",
       " 'Some more time in the morning would be nice.',\n",
       " 'What was the exact line in the play?',\n",
       " 'We must always keep looking forward.',\n",
       " 'Check the oil in the car first.',\n",
       " 'The view from here is not worth it.',\n",
       " 'He is quite a social person.',\n",
       " 'There will be more light somewhere around here.',\n",
       " 'That woman will check this fact.',\n",
       " 'Do not sell your free time or your peace of mind.',\n",
       " 'It all worked out quite well for our family.',\n",
       " 'The joy of an early morning walk.',\n",
       " 'It is easier to go without food.',\n",
       " 'My vision does not amount to anything.',\n",
       " 'The baby will stick its head in the food.',\n",
       " 'How often do you go for a walk with him?',\n",
       " 'We should turn around and pick her up from school.',\n",
       " 'The car will go north from here.',\n",
       " 'They must think if they can live with this decision.',\n",
       " 'I am not sure if this will be interesting to the kids.',\n",
       " 'That was a close call.',\n",
       " 'First of all, remember to be nice.',\n",
       " 'Can you put the book down for a few minutes?',\n",
       " 'Yes, I am talking to you.',\n",
       " 'This will make somebody care for the situation.',\n",
       " 'We talked about this the other day.',\n",
       " 'What was your experience working with him?',\n",
       " 'She was the division head in the college.',\n",
       " 'He wanted to walk for miles.',\n",
       " \"Please, that's enough music for today.\",\n",
       " 'We need to work more on the social part.',\n",
       " 'This married couple will enjoy their visit.',\n",
       " \"I don't see what the problem is.\",\n",
       " 'I guess something has to go.',\n",
       " \"Do you know that I've never seen a monkey?\",\n",
       " \"They've enjoyed the school.\",\n",
       " 'Prince of Persia.',\n",
       " \"Seeing as you've got some older children.\",\n",
       " \"I think it's caused a lot.\",\n",
       " \"If you haven't slowed down.\",\n",
       " 'Keeps to their own turf.',\n",
       " 'We also set aside money for entertainment.',\n",
       " 'I enjoy the news.',\n",
       " 'Everything I learned about ancient Rome.',\n",
       " 'As far as doing things for them.',\n",
       " \"Right now I'm busy chasing my kids.\",\n",
       " \"Wasn't allowed to run a lawn mower.\",\n",
       " 'The coastline was just incredible.',\n",
       " 'Why do you say that actually.',\n",
       " 'You were great.',\n",
       " 'Ten months later he said.',\n",
       " \"I'm glad to hear that.\",\n",
       " 'I had to change the water in that.',\n",
       " \"I mean I've haven't had it that long.\",\n",
       " 'When they fall over.',\n",
       " 'Check out our free versions!',\n",
       " 'I make my living by phone.',\n",
       " \"I'm in charge of raising them.\",\n",
       " 'I think it would work out well.',\n",
       " 'They will not get everything done anyway.',\n",
       " 'People from my college were already there.',\n",
       " 'I will probably say no to this.',\n",
       " 'It is the policy in this country.',\n",
       " 'Having too much power can be difficult.',\n",
       " 'Luxury clothes these days are quite expensive.',\n",
       " 'Is your family from Dallas?',\n",
       " 'There seems to be some confusion about this.',\n",
       " 'There are some awful places in that country.',\n",
       " 'My computer is not working anymore.',\n",
       " 'Employees should do some field work when they join.',\n",
       " 'Where is that awful noise coming from?',\n",
       " 'My American friends are from Texas.',\n",
       " 'The old newspaper will have a report on this.',\n",
       " 'How much do you have to pay for your credit card?',\n",
       " 'We cannot go there without you.',\n",
       " 'Not everyone can exercise in the morning.',\n",
       " 'The work gets easier with experience.',\n",
       " 'We will have to place a large order.',\n",
       " 'I have been working on this since early morning.',\n",
       " 'The air is clear at night.',\n",
       " 'I will probably own a car soon.',\n",
       " 'With their good luck, they will avoid these problems.',\n",
       " 'How many minutes did it take to cook this food?',\n",
       " \"Let's play this new version of my favorite music.\",\n",
       " 'Most of us think we know the story of Detroit, Michigan.',\n",
       " \"That was one thing that's been really nice to have.\",\n",
       " 'They shot him point blank in the face.',\n",
       " 'These are made with a flour tortilla.',\n",
       " 'Did you hear from another person?',\n",
       " 'It has been like fifteen years.',\n",
       " 'Here are my notes from the first draft of the paper.',\n",
       " 'Do you have any pets now?',\n",
       " 'My mom lives like I do.',\n",
       " 'That is a very good point.',\n",
       " 'Who is related to who?',\n",
       " 'How much time do you spend with your children?',\n",
       " 'She is more famous since she did that.',\n",
       " 'They will financially recover from the loan.',\n",
       " \"They don't make them anymore.\",\n",
       " 'There were none left by the year two thousand.',\n",
       " 'Wayne State University is in Detroit, Michigan.',\n",
       " \"It's an invasion of privacy.\",\n",
       " 'There were no reports of casualties.',\n",
       " 'The blue sky looks so pretty.',\n",
       " 'It is impressive in more than just quantity.',\n",
       " 'That is strange to me.',\n",
       " \"I'm out more money than they are.\",\n",
       " 'I am from Argentina.',\n",
       " 'It is like winning the lottery.',\n",
       " 'I did it all on the job.',\n",
       " 'Pets can be a lot of trouble.',\n",
       " 'Whatever you like.',\n",
       " 'The movie is slow burning.',\n",
       " \"We don't listen to any elevator music at my house.\",\n",
       " 'What do you think?',\n",
       " 'He was a good player.',\n",
       " 'You came out on top.',\n",
       " 'That really ticks me off.',\n",
       " 'There were thousands of cows on the farm.',\n",
       " 'Now some people object during primary debates.',\n",
       " 'You have to do the random drug testing.',\n",
       " \"The recipe doesn't require kneading the dough.\",\n",
       " 'They could sell five million of those.',\n",
       " 'The ranch dressing goes along well with that.',\n",
       " 'It used to cost six bucks.',\n",
       " \"In those situations I don't know how to behave.\",\n",
       " 'It would be fun just to win one.',\n",
       " 'The full decision statement is available here.',\n",
       " 'Especially with butter and honey on them.',\n",
       " \"I'm interested in having it in my office.\",\n",
       " 'I do not talk to many people in the military.',\n",
       " 'Who do they need to trade before the deadline?',\n",
       " 'You will gain competence with respect to the material.',\n",
       " \"It's not hard to find a job that's part time.\",\n",
       " 'Do you still drive that old car?',\n",
       " 'They would have to be supported in some way.',\n",
       " 'I miss the intellectual stimulation of taking classes in college.',\n",
       " 'As you get older you will understand.',\n",
       " 'You would know if you lost it or something.',\n",
       " 'Are you a computer hacker?',\n",
       " 'There is a very serious situation across the street.',\n",
       " \"I don't make that much money.\",\n",
       " \"It's actually just twelve years old.\",\n",
       " 'They were not really into sports.',\n",
       " 'Thank you, and please enjoy your stay!',\n",
       " 'My favorite author just released a brand new book.',\n",
       " \"I didn't know there was such a thing.\",\n",
       " 'That is my retirement plan.',\n",
       " 'I would like to see a little bit more of that.',\n",
       " 'She jumped at the sound of the thunder storm.',\n",
       " 'I have to balance work and life.',\n",
       " 'He took care of it.',\n",
       " 'I love to watch cartoons on Saturday morning.',\n",
       " 'She will be a sophomore in high school next year.',\n",
       " 'Some interviews will be a lot more casual.',\n",
       " 'You have a choice of which car to buy.',\n",
       " 'My real concern is this.',\n",
       " \"It's been kind of scary.\",\n",
       " 'They would have never picked it out.',\n",
       " \"That is why they're kind of behind on work.\",\n",
       " 'She can do it.',\n",
       " \"She's like seventy four years old now.\",\n",
       " \"I couldn't understand.\",\n",
       " 'I just want to enjoy myself a little bit.',\n",
       " 'It could be recycled.',\n",
       " 'It costs ten or twenty dollars per ounce.',\n",
       " ...]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"target_sentence\"].tolist()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7f56abfb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target_sentence</th>\n",
       "      <th>pred_sentence</th>\n",
       "      <th>WER_scores</th>\n",
       "      <th>METEOR_scores</th>\n",
       "      <th>ROUGE_scores</th>\n",
       "      <th>BERTScore_F1_scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>808</th>\n",
       "      <td>Your story is not clear anymore.</td>\n",
       "      <td>your story is not clear</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.997685</td>\n",
       "      <td>{'rouge1': Score(precision=1.0, recall=1.0, fm...</td>\n",
       "      <td>-2.887432e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>810</th>\n",
       "      <td>Children do not like punishment.</td>\n",
       "      <td>children do not like</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.996000</td>\n",
       "      <td>{'rouge1': Score(precision=1.0, recall=1.0, fm...</td>\n",
       "      <td>-4.925075e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>811</th>\n",
       "      <td>My wife thinks this is not worth the time.</td>\n",
       "      <td>my wife thinks this is not worth the</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.999314</td>\n",
       "      <td>{'rouge1': Score(precision=1.0, recall=1.0, fm...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>749</th>\n",
       "      <td>Just put your paper in the same place every day.</td>\n",
       "      <td>just put your paper in the same place every</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.999500</td>\n",
       "      <td>{'rouge1': Score(precision=1.0, recall=1.0, fm...</td>\n",
       "      <td>1.502312e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1008</th>\n",
       "      <td>I still like the New York Giants.</td>\n",
       "      <td>i still like the new york</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': Score(precision=1.0, recall=1.0, fm...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>322</th>\n",
       "      <td>The nursing home.</td>\n",
       "      <td>the nursing home</td>\n",
       "      <td>10.333333</td>\n",
       "      <td>0.482696</td>\n",
       "      <td>{'rouge1': Score(precision=0.08823529411764706...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>319</th>\n",
       "      <td>Congratulations on that.</td>\n",
       "      <td>congratulations on that and</td>\n",
       "      <td>10.666667</td>\n",
       "      <td>0.474910</td>\n",
       "      <td>{'rouge1': Score(precision=0.08571428571428572...</td>\n",
       "      <td>-2.255734e+17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562</th>\n",
       "      <td>Very suspenseful.</td>\n",
       "      <td>very successful in</td>\n",
       "      <td>12.500000</td>\n",
       "      <td>0.113636</td>\n",
       "      <td>{'rouge1': Score(precision=0.03846153846153846...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1395</th>\n",
       "      <td>She doesn't work.</td>\n",
       "      <td>she doesn't walk</td>\n",
       "      <td>12.666667</td>\n",
       "      <td>0.279851</td>\n",
       "      <td>{'rouge1': Score(precision=0.05, recall=0.6666...</td>\n",
       "      <td>-2.273988e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1081</th>\n",
       "      <td>Employment income.</td>\n",
       "      <td>employment com</td>\n",
       "      <td>14.500000</td>\n",
       "      <td>0.104167</td>\n",
       "      <td>{'rouge1': Score(precision=0.03333333333333333...</td>\n",
       "      <td>3.282795e+10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1426 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       target_sentence  \\\n",
       "808                   Your story is not clear anymore.   \n",
       "810                   Children do not like punishment.   \n",
       "811         My wife thinks this is not worth the time.   \n",
       "749   Just put your paper in the same place every day.   \n",
       "1008                 I still like the New York Giants.   \n",
       "...                                                ...   \n",
       "322                                  The nursing home.   \n",
       "319                           Congratulations on that.   \n",
       "562                                  Very suspenseful.   \n",
       "1395                                 She doesn't work.   \n",
       "1081                                Employment income.   \n",
       "\n",
       "                                    pred_sentence  WER_scores  METEOR_scores  \\\n",
       "808                       your story is not clear    0.000000       0.997685   \n",
       "810                          children do not like    0.000000       0.996000   \n",
       "811          my wife thinks this is not worth the    0.000000       0.999314   \n",
       "749   just put your paper in the same place every    0.000000       0.999500   \n",
       "1008                    i still like the new york    0.000000       0.998542   \n",
       "...                                           ...         ...            ...   \n",
       "322                              the nursing home   10.333333       0.482696   \n",
       "319                   congratulations on that and   10.666667       0.474910   \n",
       "562                            very successful in   12.500000       0.113636   \n",
       "1395                             she doesn't walk   12.666667       0.279851   \n",
       "1081                               employment com   14.500000       0.104167   \n",
       "\n",
       "                                           ROUGE_scores  BERTScore_F1_scores  \n",
       "808   {'rouge1': Score(precision=1.0, recall=1.0, fm...        -2.887432e+01  \n",
       "810   {'rouge1': Score(precision=1.0, recall=1.0, fm...        -4.925075e+00  \n",
       "811   {'rouge1': Score(precision=1.0, recall=1.0, fm...         1.000000e+00  \n",
       "749   {'rouge1': Score(precision=1.0, recall=1.0, fm...         1.502312e+01  \n",
       "1008  {'rouge1': Score(precision=1.0, recall=1.0, fm...         1.000000e+00  \n",
       "...                                                 ...                  ...  \n",
       "322   {'rouge1': Score(precision=0.08823529411764706...         1.000000e+00  \n",
       "319   {'rouge1': Score(precision=0.08571428571428572...        -2.255734e+17  \n",
       "562   {'rouge1': Score(precision=0.03846153846153846...         1.000000e+00  \n",
       "1395  {'rouge1': Score(precision=0.05, recall=0.6666...        -2.273988e+04  \n",
       "1081  {'rouge1': Score(precision=0.03333333333333333...         3.282795e+10  \n",
       "\n",
       "[1426 rows x 6 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(by=\"WER_scores\", ascending=True, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "448463c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>808</th>\n",
       "      <td>0</td>\n",
       "      <td>your story is not clear</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>810</th>\n",
       "      <td>1</td>\n",
       "      <td>children do not like</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>811</th>\n",
       "      <td>2</td>\n",
       "      <td>my wife thinks this is not worth the</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>749</th>\n",
       "      <td>3</td>\n",
       "      <td>just put your paper in the same place every</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1008</th>\n",
       "      <td>4</td>\n",
       "      <td>i still like the new york</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>322</th>\n",
       "      <td>1421</td>\n",
       "      <td>the nursing home</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>319</th>\n",
       "      <td>1422</td>\n",
       "      <td>congratulations on that and</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562</th>\n",
       "      <td>1423</td>\n",
       "      <td>very successful in</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1395</th>\n",
       "      <td>1424</td>\n",
       "      <td>she doesn't walk</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1081</th>\n",
       "      <td>1425</td>\n",
       "      <td>employment com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1426 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id                                         text\n",
       "808      0                      your story is not clear\n",
       "810      1                         children do not like\n",
       "811      2         my wife thinks this is not worth the\n",
       "749      3  just put your paper in the same place every\n",
       "1008     4                    i still like the new york\n",
       "...    ...                                          ...\n",
       "322   1421                             the nursing home\n",
       "319   1422                  congratulations on that and\n",
       "562   1423                           very successful in\n",
       "1395  1424                             she doesn't walk\n",
       "1081  1425                               employment com\n",
       "\n",
       "[1426 rows x 2 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d3a4e15",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "evo",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
