{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import json\n",
    "import pickle\n",
    "import random\n",
    "import re\n",
    "import os\n",
    "from datetime import datetime\n",
    "import evaluate\n",
    "import torch\n",
    "import numpy as np\n",
    "from datasets import load_dataset\n",
    "from tqdm import tqdm\n",
    "\n",
    "from eval import *\n",
    "from llama.metrics import *\n",
    "from llama.generation import Llama\n",
    "from llama.mixed_generation import MixedLlama\n",
    "from llama.tokenizer import Tokenizer\n",
    "from ngrams.ngram_models import make_models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load data\n",
    "with open(\"../gpt-2-output-dataset/data/webtext.test.jsonl\", \"r\") as f:\n",
    "    dataset = [json.loads(line)[\"text\"] for line in f]\n",
    "mixing_options = [\"sample\", \"sample_new_weights_with_score\", \"sample_weights_with_current\"]\n",
    "smoothing_options = [None, \"geom\", \"all\"]\n",
    "\n",
    "# params\n",
    "prompt_len = 15\n",
    "max_gen_len = 10\n",
    "n_drafts = 3\n",
    "n_token_sample = 3 * n_drafts\n",
    "n_token_consider = 32000\n",
    "bsz = 32\n",
    "tokenizer = Tokenizer('./7B/tokenizer.model')\n",
    "mixing_method = mixing_options[1]\n",
    "smoothing = smoothing_options[1]\n",
    "sample_tokens = False\n",
    "sample_beams = False\n",
    "data = dataset\n",
    "n_prompts = len(data)\n",
    "\n",
    "# weighting\n",
    "ckpt_path = None\n",
    "ckpt_path = \"./ckpts-200k\"\n",
    "\n",
    "\n",
    "i_weights = [0.01, 0.04, 0.15, 0.18, 0.12]\n",
    "i_length = [1, 2, 3, 4, 5]\n",
    "alpha = 0.54\n",
    "temp = 0.12\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Making bigram...\n",
      "1310800\n",
      "Making trigram...\n",
      "671088728\n",
      "Making fourgram...\n",
      "2684354648\n"
     ]
    }
   ],
   "source": [
    "if ckpt_path is not None:\n",
    "    ngrams = make_models(ckpt_path, bigram=True, trigram=True, fourgram=True, fivegram=False, sixgram=False, sevengram=False)\n",
    "else:\n",
    "    ngrams = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "mixed_device = torch.device(\"cuda:0\")\n",
    "reg_device = torch.device(\"cuda:1\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Mixed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ[\"RANK\"] = \"0\"\n",
    "os.environ[\"WORLD_SIZE\"] = \"1\"\n",
    "os.environ[\"MASTER_ADDR\"] = \"127.0.0.1\"\n",
    "os.environ[\"MASTER_PORT\"] = \"10302\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> initializing model parallel with size 1\n",
      "> initializing ddp with size 1\n",
      "> initializing pipeline with size 1\n",
      "0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/temp/miniconda3/envs/mixed/lib/python3.11/site-packages/torch/__init__.py:696: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:451.)\n",
      "  _C._set_default_tensor_type(t)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded in 6.72 seconds\n",
      "cuda:0\n"
     ]
    }
   ],
   "source": [
    "weight_path = \"./7B/\"\n",
    "model = MixedLlama.build(ckpt_dir=weight_path, \n",
    "                         tokenizer_path='./7B/tokenizer.model', \n",
    "                         max_seq_len=100, \n",
    "                         max_batch_size=32,\n",
    "                         device=mixed_device,\n",
    "                         model_parallel_size=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Smoothing with geom\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████████| 157/157 [03:16<00:00,  1.25s/it]\n"
     ]
    }
   ],
   "source": [
    "print(f\"Smoothing with {smoothing}\")\n",
    "start_time = datetime.now()\n",
    "mixed_sequences, mixed_ppl, time = evaluate_mixed_losses(data=data,\n",
    "                                       model=model,\n",
    "                                       tokenizer=tokenizer,\n",
    "                                       prompt_len=prompt_len,\n",
    "                                       max_gen_len=max_gen_len,\n",
    "                                       alpha=0.55,\n",
    "                                       temp=0.1,\n",
    "                                       n_drafts=n_drafts,\n",
    "                                       n_token_consider=n_token_consider,\n",
    "                                       n_token_sample=n_token_sample,\n",
    "                                       mixing_method=mixing_method,\n",
    "                                       smoothing=smoothing,\n",
    "                                       debug=False,\n",
    "                                       bsz=bsz, # for timing\n",
    "                                       i_weights=i_weights[:3],\n",
    "                                       i_length=i_length[:3],\n",
    "                                       ngrams=ngrams,\n",
    "                                       sample_beams=sample_beams,\n",
    "                                       sample_tokens=sample_tokens,\n",
    "                                       get_time=True,\n",
    "                                       penalty=200,\n",
    "                                       marker=True)\n",
    "finish_time = datetime.now()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([5000, 3, 25])\n",
      "Time: 0:03:16.217468, Average Time: 0:00:00.039243\n",
      "Average Time on Corpus: 0:00:00.029101\n"
     ]
    }
   ],
   "source": [
    "# IF TIMING, NEED BSZ = 1\n",
    "duration = finish_time - start_time\n",
    "print(mixed_sequences.shape)\n",
    "print(f\"Time: {duration}, Average Time: {duration / len(data)}\")\n",
    "\n",
    "print(f\"Average Time on Corpus: {time / len(data)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(f\"owt/p{prompt_len}_d{n_drafts}_ngram4_llama7B_owt.pkl\", \"wb\") as f:\n",
    "    pickle.dump(mixed_sequences, f)    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Nucleus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> initializing model parallel with size 1\n",
      "> initializing ddp with size 1\n",
      "> initializing pipeline with size 1\n",
      "0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/temp/miniconda3/envs/mixed/lib/python3.11/site-packages/torch/__init__.py:696: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:451.)\n",
      "  _C._set_default_tensor_type(t)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded in 8.41 seconds\n"
     ]
    }
   ],
   "source": [
    "reg_model = Llama.build(ckpt_dir=\"./7B/\", \n",
    "                    tokenizer_path='./7B/tokenizer.model', \n",
    "                    max_seq_len=100, \n",
    "                    max_batch_size=32,\n",
    "                    device=reg_device,\n",
    "                    model_parallel_size=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5000\n"
     ]
    }
   ],
   "source": [
    "# create three times for nucleus data\n",
    "nucleus_data = []\n",
    "# bsz = 1\n",
    "# data = dataset[:250]\n",
    "for s in data:\n",
    "    nucleus_data += [s for i in range(n_drafts)]\n",
    "print(len(nucleus_data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████████| 157/157 [00:42<00:00,  3.68it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time: 0:00:42.705609, Average Time: 0:00:00.008541\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "start_time = datetime.now()\n",
    "nucleus_sequences, nucleus_ppl = evaluate_nucleus_losses(data=nucleus_data,\n",
    "                                       model=reg_model,\n",
    "                                       tokenizer=tokenizer,\n",
    "                                       prompt_len=prompt_len,\n",
    "                                       max_gen_len=max_gen_len,\n",
    "                                       temp=0,\n",
    "                                       bsz=bsz)\n",
    "finish_time = datetime.now() \n",
    "duration = finish_time - start_time\n",
    "print(f\"Time: {duration}, Average Time: {duration / len(data)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([5000, 1, 25])\n",
      "Time: 0:00:42.705609, Average Time: 0:00:00.008541\n"
     ]
    }
   ],
   "source": [
    "nucleus_ppl = nucleus_ppl.reshape(-1, n_drafts)\n",
    "nucleus_sequences = nucleus_sequences.reshape(len(data), n_drafts, -1)\n",
    "print(nucleus_sequences.shape)\n",
    "duration = finish_time - start_time\n",
    "print(f\"Time: {duration}, Average Time: {duration / len(data)}\")\n",
    "with open(f\"./owt/p{prompt_len}_d{n_drafts}_greedy_owt.pkl\", \"wb\") as f:\n",
    "    pickle.dump(nucleus_sequences, f)    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.set_default_dtype(torch.float32)\n",
    "mauve = evaluate.load('mauve')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def decode(tokenizer, encoding):\n",
    "    \"\"\"\n",
    "    Args:\n",
    "        tokenizer (Any): Tokenizer\n",
    "        encoding (torch.Tensor): Encoding\n",
    "    Returns:\n",
    "        decoding (str)\n",
    "    \"\"\"\n",
    "    eos_locs = (encoding == tokenizer.eos_id).nonzero()\n",
    "    if len(eos_locs > 0):\n",
    "        encoding = encoding[:eos_locs[0]]\n",
    "    return tokenizer.decode(encoding.to(torch.int32).tolist())\n",
    "    \n",
    "def evaluate_mauve(tokenizer, predictions, references, n_drafts=n_drafts, mauve=True, verbose=True):\n",
    "    \"\"\"\n",
    "    Evaluate mauve.\n",
    "    Args:\n",
    "        tokenizer (Any): Tokenizer\n",
    "        predictions (torch.Tensor): Tokens of predicted sequences, flattened to (n_prompts * n_drafts, gen_len)\n",
    "        references (List[str]): References\n",
    "        verbose (bool): Whether to print out examples\n",
    "    Returns:\n",
    "        Mauve score\n",
    "    \"\"\"\n",
    "    decoded_preds = []\n",
    "    prompts = []\n",
    "    count = 0\n",
    "    for i in tqdm(range(len(predictions))):\n",
    "        d = decode(tokenizer, predictions[i])\n",
    "        decoded_preds.append(d)\n",
    "        if verbose and i <= 200:\n",
    "            # first draft of this prompt\n",
    "            if i % n_drafts == 0:\n",
    "                count = 0\n",
    "                print(\"---------------\")\n",
    "                prompt = decode(tokenizer, predictions[i][:prompt_len])\n",
    "                print(f\"prompt: {prompt}\")\n",
    "            print(f\"{count}: {d}\")\n",
    "            count += 1\n",
    "        else: \n",
    "            break\n",
    "    if mauve:\n",
    "        mauve_score = mauve.compute(predictions=decoded_preds, references=references).mauve\n",
    "        print(mauve_score)\n",
    "\n",
    "def evaluate_diversity(predictions, prompt_len):\n",
    "    n_p, n_d, sl = predictions.shape\n",
    "    predictions = predictions[:, :, prompt_len:]\n",
    "    diversity = calculate_diversity(predictions.tolist())\n",
    "    return diversity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Tokenize and truncate references\n",
    "mixed_references = prepare_encodings(nucleus_data, tokenizer, prompt_len+max_gen_len)\n",
    "nucleus_references = prepare_encodings(nucleus_data, tokenizer, prompt_len+max_gen_len)\n",
    "mixed_references = tokenizer.decode(mixed_references)\n",
    "nucleus_references = tokenizer.decode(nucleus_references)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████████████| 120/120 [00:00<00:00, 5902.41it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------\n",
      "prompt: Is this restaurant family-friendly ? Yes No Unsure\n",
      "\n",
      "0: Is this restaurant family-friendly ? Yes No Unsure\n",
      "The restaurant is located in the heart of the city\n",
      "1: Is this restaurant family-friendly ? Yes No Unsure\n",
      "The restaurant is located in the heart of the village\n",
      "2: Is this restaurant family-friendly ? Yes No Unsure\n",
      "The restaurant is located in the center of the city\n",
      "---------------\n",
      "prompt: Clinton talks about her time of 'reflection' during sick\n",
      "0: Clinton talks about her time of 'reflection' during sick leave\n",
      "Clinton talks about her time of\n",
      "1: Clinton talks about her time of 'reflection' during sick leave\n",
      "Clinton talks about her health of\n",
      "2: Clinton talks about her time of 'reflection' during sick leave\n",
      "Clinton talks to her time of\n",
      "---------------\n",
      "prompt: House Majority Whip Steve Scalise has been discharged\n",
      "0: House Majority Whip Steve Scalise has been discharged from the hospital after being shot at a congression\n",
      "1: House Majority Whip Steve Scalise has been discharged from the hospital after being shot in a congression\n",
      "2: House Majority Whip Steve Scalise has been discharged from the hospital after being shot during a congression\n",
      "---------------\n",
      "prompt: Insight Course: Lesson 14\n",
      "\n",
      "Control of\n",
      "0: Insight Course: Lesson 14\n",
      "\n",
      "Control of the Mind\n",
      "\n",
      "The mind is the most important\n",
      "1: Insight Course: Lesson 14\n",
      "\n",
      "Control of the Mind\n",
      "\n",
      "The mind is the most powerful\n",
      "2: Insight Course: Lesson 14\n",
      "\n",
      "Control of the Mind\n",
      "\n",
      "The first is the most important\n",
      "---------------\n",
      "prompt: BY JENNIE MCNULTY\n",
      "\n",
      "Lesbian.\n",
      "0: BY JENNIE MCNULTY\n",
      "\n",
      "Lesbian.\n",
      "\n",
      "A Novel.\n",
      "\n",
      "BY J\n",
      "1: BY JENNIE MCNULTY\n",
      "\n",
      "Lesbian.\n",
      "\n",
      "A Novel.\n",
      "\n",
      "BY\n",
      "\n",
      "2: BY JENNIE MCNULTY\n",
      "\n",
      "Lesbian.\n",
      "\n",
      "A Novel\n",
      "\n",
      "\n",
      "BY J\n",
      "---------------\n",
      "prompt: The Buddha's Teaching As It Is\n",
      "\n",
      "In\n",
      "0: The Buddha's Teaching As It Is\n",
      "\n",
      "In the first Sermon, the Buddha\n",
      "1: The Buddha's Teaching As It Is\n",
      "\n",
      "In the following Sermon, the Buddha\n",
      "2: The Buddha's Teaching As It Is\n",
      "\n",
      "In the Bud Sermon, the Buddha\n",
      "---------------\n",
      "prompt: As part of a broad initiative to combat sexual harassment and\n",
      "0: As part of a broad initiative to combat sexual harassment and assault, the University of California, Berkeley,\n",
      "1: As part of a broad initiative to combat sexual harassment and assault on the University of California, Berkeley,\n",
      "2: As part of a broad initiative to combat sexual harassment and assault, the University of California, Berkeley is\n",
      "---------------\n",
      "prompt: The Atlanta Falcons have started the 2015 season \n",
      "0: The Atlanta Falcons have started the 2015 season 1-3, and the team is in a\n",
      "1: The Atlanta Falcons have started the 2015 season 1-4, and the team is in a\n",
      "2: The Atlanta Falcons have started the 2015 season 1-3, and the team is in the\n",
      "---------------\n",
      "prompt: Front Page Torrents Favorites My Home My Galleries Top\n",
      "0: Front Page Torrents Favorites My Home My Galleries Top 100\n",
      "The 100\n",
      "1: Front Page Torrents Favorites My Home My Galleries Top 100\n",
      "The 200\n",
      "2: Front Page Torrents Favorites My Home My Galleries Top 100\n",
      "My 100\n",
      "---------------\n",
      "prompt: They have changed the phone menu to try to deflect us to email\n",
      "0: They have changed the phone menu to try to deflect us to email.\n",
      "I have been trying to get a hold\n",
      "1: They have changed the phone menu to try to deflect us to email.\n",
      "I have been trying to get a ref\n",
      "2: They have changed the phone menu to try to deflect us to email.\n",
      "I have been trying to get ahold\n",
      "---------------\n",
      "prompt: One Page\n",
      "\n",
      "One Page is a browser extension for automatically displaying multi\n",
      "0: One Page\n",
      "\n",
      "One Page is a browser extension for automatically displaying multi-page web pages in a single page.\n",
      "\n",
      "1: One Page\n",
      "\n",
      "One Page is a browser extension for automatically displaying multi-page web pages on a single page.\n",
      "\n",
      "2: One Page\n",
      "\n",
      "One Page is a browser extension for automatically displaying multi-page webpages on a single page.\n",
      "\n",
      "---------------\n",
      "prompt: Intro \"In his search for food, early man tried all kinds\n",
      "0: Intro \"In his search for food, early man tried all kinds of things. He tried to eat the fish,\n",
      "1: Intro \"In his search for food, early man tried all kinds of things. He tried to catch the animals,\n",
      "2: Intro \"In his search for food, early man tried all kinds of things. He tried to eat the animals,\n",
      "---------------\n",
      "prompt: Having trouble viewing the video? Try disabling any ad blocking extensions\n",
      "0: Having trouble viewing the video? Try disabling any ad blocking extensions in your browser.\n",
      "The 201\n",
      "1: Having trouble viewing the video? Try disabling any ad blocking extensions in your browser.\n",
      "The 101\n",
      "2: Having trouble viewing the video? Try disabling any ad blocking extensions in your browser.\n",
      "The 202\n",
      "---------------\n",
      "prompt: Get Liverpool FC updates directly to your inbox Subscribe Thank you for\n",
      "0: Get Liverpool FC updates directly to your inbox Subscribe Thank you for subscribing! Could not subscribe, try again\n",
      "1: Get Liverpool FC updates directly to your inbox Subscribe Thank you for subscribing! Could do subscribe, try again\n",
      "2: Get Liverpool FC updates directly to your inbox Subscribe Thank you for subscribing! We not subscribe, try again\n",
      "---------------\n",
      "prompt: Super Mario Run will be available on Android devices beginning in March, N\n",
      "0: Super Mario Run will be available on Android devices beginning in March, Nintendo announced today.\n",
      "The game will be available\n",
      "1: Super Mario Run will be available on Android devices beginning in March, Nintendo announced today.\n",
      "The game will be released\n",
      "2: Super Mario Run will be available on Android devices beginning in March, Nintendo announced today.\n",
      "The game will be free\n",
      "---------------\n",
      "prompt: The California-based electric car manufacturer joins Jaguar, Land\n",
      "0: The California-based electric car manufacturer joins Jaguar, Land Rover, and Volvo in the race to\n",
      "1: The California-based electric car manufacturer joins Jaguar, Land Rover, and Volvo in a race to\n",
      "2: The California-based electric car manufacturer joins Jaguar, Land Rover, and Volvo in the growing to\n",
      "---------------\n",
      "prompt: The Owings Mills Mall in Maryland officially closed its doors in\n",
      "0: The Owings Mills Mall in Maryland officially closed its doors in 2015, but the mall\n",
      "1: The Owings Mills Mall in Maryland officially closed its doors in 2017, but the mall\n",
      "2: The Owings Mills Mall in Maryland officially closed its doors in 2016, but the mall\n",
      "---------------\n",
      "prompt: I've easily purchased 25 of these over the last \n",
      "0: I've easily purchased 25 of these over the last 10 years. I've had a few\n",
      "1: I've easily purchased 25 of these over the last 10 years. I've used a few\n",
      "2: I've easily purchased 25 of these over the last 10 years. I've had a couple\n",
      "---------------\n",
      "prompt: Sevilla midfielder Hiroshi Kiyotake\n",
      "0: Sevilla midfielder Hiroshi Kiyotake has been linked with a move to the Premier League\n",
      "1: Sevilla midfielder Hiroshi Kiyotake has been linked with a move to Liverpool Premier League\n",
      "2: Sevilla midfielder Hiroshi Kiyotake has been linked with a move to the J League\n",
      "---------------\n",
      "prompt: There's constantly some sort of plagiarism row going on\n",
      "0: There's constantly some sort of plagiarism row going on in the world of music. It's a\n",
      "1: There's constantly some sort of plagiarism row going on in the world of music. It's not\n",
      "2: There's constantly some sort of plagiarism row going on in the world of music. It's been\n",
      "---------------\n",
      "prompt: Written by and copyright © 2005-\n",
      "0: Written by and copyright © 2005-2019 by the individual contributors.\n",
      "1: Written by and copyright © 2005-2019 by the individual contributors\n",
      "\n",
      "2: Written by and copyright © 2005-2019 by the individual contributors,\n",
      "---------------\n",
      "prompt: The Wolf currently has the former Seminole as his RB3\n",
      "0: The Wolf currently has the former Seminole as his RB3.\n",
      "The 2019 season was\n",
      "1: The Wolf currently has the former Seminole as his RB3.\n",
      "The 2017 season was\n",
      "2: The Wolf currently has the former Seminole as his RB3.\n",
      "The 2018 season was\n",
      "---------------\n",
      "prompt: The Obama administration is being slammed from all sides for its failing\n",
      "0: The Obama administration is being slammed from all sides for its failing to protect the American people from the threat of terror\n",
      "1: The Obama administration is being slammed from all sides for its failing to protect the American people from the threat of Islam\n",
      "2: The Obama administration is being slammed from all sides for its failing to protect the American people from the threat of a\n",
      "---------------\n",
      "prompt: The nonpartisan Congressional Budget Office reinforced what\n",
      "0: The nonpartisan Congressional Budget Office reinforced what we have been saying for years: The Affan\n",
      "1: The nonpartisan Congressional Budget Office reinforced what we have been saying for years: the Republican Party\n",
      "2: The nonpartisan Congressional Budget Office reinforced what we have been saying for years: The Republican Party\n",
      "---------------\n",
      "prompt: \"The attrition rate, even very high up in the draft,\n",
      "0: \"The attrition rate, even very high up in the draft, is not as high as it used to be.\n",
      "1: \"The attrition rate, even very high up in the draft, is not as high as it used to be,\"\n",
      "2: \"The attrition rate, even very high up in the draft, is not as high as it used to be.\"\n",
      "---------------\n",
      "prompt: Major events in the history of SGI and Priesthood\n",
      "\n",
      "0: Major events in the history of SGI and Priesthood\n",
      "The SGI was founded in 19\n",
      "1: Major events in the history of SGI and Priesthood\n",
      "The SGI was established in 19\n",
      "2: Major events in the history of SGI and Priesthood\n",
      "The SGI was founded by 19\n",
      "---------------\n",
      "prompt: When the head of the CIA's torture unit decided to\n",
      "0: When the head of the CIA's torture unit decided to quit, he was asked to stay on the job\n",
      "1: When the head of the CIA's torture unit decided to quit, he was asked to stay on for job\n",
      "2: When the head of the CIA's torture unit decided to quit, he was asked to stay on. job\n",
      "---------------\n",
      "prompt: Laser rangefinders are excellent tools that help you to determine longer\n",
      "0: Laser rangefinders are excellent tools that help you to determine longer distances.\n",
      "Their main purpose is to help\n",
      "1: Laser rangefinders are excellent tools that help you to determine longer distances.\n",
      "Their main purpose is to measure\n",
      "2: Laser rangefinders are excellent tools that help you to determine longer distances.\n",
      "Their main purpose is to determine\n",
      "---------------\n",
      "prompt: The Coalition For Economic Survival (CES) ur\n",
      "0: The Coalition For Economic Survival (CES) urges the Government to reconsider the decision to\n",
      "1: The Coalition For Economic Survival (CES) urges the Government to reconsider its decision to\n",
      "2: The Coalition For Economic Survival (CES) urges the government to reconsider the decision to\n",
      "---------------\n",
      "prompt: If you haven't been paying attention: Republicans have been\n",
      "0: If you haven't been paying attention: Republicans have been trying to get rid of the Affordable Care\n",
      "1: If you haven't been paying attention: Republicans have been trying to get rid of the Affordable Health\n",
      "2: If you haven't been paying attention: Republicans have been trying to get rid of the Affordable care\n",
      "---------------\n",
      "prompt: BW semi-spirit themed tempo/aggro deck.\n",
      "0: BW semi-spirit themed tempo/aggro deck.\n",
      "I've been playing this deck for a\n",
      "1: BW semi-spirit themed tempo/aggro deck.\n",
      "I've been playing a deck for a\n",
      "2: BW semi-spirit themed tempo/aggro deck.\n",
      "I've been playing this deck for about\n",
      "---------------\n",
      "prompt: About \"List of WWE/WWF Finishers\"\n",
      "\n",
      "0: About \"List of WWE/WWF Finishers\"\n",
      "List of WWE/WWF/E Fin\n",
      "1: About \"List of WWE/WWF Finishers\"\n",
      "List of WWE/WWF/W Fin\n",
      "2: About \"List of WWE/WWF Finishers\"\n",
      "List of WWE/WWF\n",
      "E Fin\n",
      "---------------\n",
      "prompt: That's right! What everyone has been taking about! This episode\n",
      "0: That's right! What everyone has been taking about! This episode is a little different.Љ\n",
      "The first episode\n",
      "1: That's right! What everyone has been taking about! This episode is a little different.Ъ\n",
      "The first episode\n",
      "2: That's right! What everyone has been taking about! This episode is a little different.Ћ\n",
      "The first episode\n",
      "---------------\n",
      "prompt: Several people have asked about the techniques we used when cleaning out my\n",
      "0: Several people have asked about the techniques we used when cleaning out my mother's house. I've been meaning\n",
      "1: Several people have asked about the techniques we used when cleaning out my father's house. I've been meaning\n",
      "2: Several people have asked about the techniques we used when cleaning out my mother’s house. I've been meaning\n",
      "---------------\n",
      "prompt: The pace of the race to the top of Fortune's Un\n",
      "0: The pace of the race to the top of Fortune's Unicorn List is accelerating.\n",
      "The list\n",
      "1: The pace of the race to the top of Fortune's Unicorn List is accelerating.\n",
      "The \n",
      "2: The pace of the race to the top of Fortune's Unicorn List has accelerating.\n",
      "The list\n",
      "---------------\n",
      "prompt: source GAIA package: Sx_MilitaryTimes_\n",
      "0: source GAIA package: Sx_MilitaryTimes_2019_01_01\n",
      "1: source GAIA package: Sx_MilitaryTimes_2017_01_01\n",
      "2: source GAIA package: Sx_MilitaryTimes_2018_01_01\n",
      "---------------\n",
      "prompt: Women across Denmark have complained of harassment by migrants\n",
      "0: Women across Denmark have complained of harassment by migrants, including sexual assaults, and the government has\n",
      "1: Women across Denmark have complained of harassment by migrants, including sexual assaults, and the country has\n",
      "2: Women across Denmark have complained of harassment by migrants, including sexual assaults, and the government is\n",
      "---------------\n",
      "prompt: Hebrews 1 New International Version (NIV)\n",
      "\n",
      "\n",
      "0: Hebrews 1 New International Version (NIV)\n",
      "\n",
      "Hebrews 1:1-4\n",
      "1: Hebrews 1 New International Version (NIV)\n",
      "\n",
      "Hebrews 1:1-2\n",
      "2: Hebrews 1 New International Version (NIV)\n",
      "\n",
      "Hebrews 1:1-1\n",
      "---------------\n",
      "prompt: NBA owners approved uniform advertising beginning in the 17-\n",
      "0: NBA owners approved uniform advertising beginning in the 17-18 season.\n",
      "The NBA has the first\n",
      "1: NBA owners approved uniform advertising beginning in the 17-18 season.\n",
      "The NBA is the first\n",
      "2: NBA owners approved uniform advertising beginning in the 17-18 season.\n",
      "The league is the first\n",
      "---------------\n",
      "prompt: Please insert an email address\n",
      "\n",
      "Password must:<br>-\n",
      "0: Please insert an email address\n",
      "\n",
      "Password must:<br>- be at least 8 characters long,<br\n",
      "1: Please insert an email address\n",
      "\n",
      "Password must:<br>- be at least 6 characters long,<br\n",
      "2: Please insert an email address\n",
      "\n",
      "Password must:<br>- be at least 8 characters long\n",
      "<br\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "evaluate_mauve(tokenizer, predictions=mixed_sequences.reshape(len(data) * n_drafts, -1), references=None, mauve=False, verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 3756.91it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------\n",
      "prompt: Is this restaurant family-friendly ? Yes No Unsure\n",
      "\n",
      "0: Is this restaurant family-friendly ? Yes No Unsure\n",
      "Is this restaurant good for lunch ? Yes No\n",
      "---------------\n",
      "prompt: Clinton talks about her time of 'reflection' during sick\n",
      "0: Clinton talks about her time of 'reflection' during sick leave\n",
      "Clinton talks about her time of\n",
      "---------------\n",
      "prompt: House Majority Whip Steve Scalise has been discharged\n",
      "0: House Majority Whip Steve Scalise has been discharged from the hospital after being shot at a congression\n",
      "---------------\n",
      "prompt: Insight Course: Lesson 14\n",
      "\n",
      "Control of\n",
      "0: Insight Course: Lesson 14\n",
      "\n",
      "Control of a Robot Arm\n",
      "\n",
      "### Goals\n",
      "---------------\n",
      "prompt: BY JENNIE MCNULTY\n",
      "\n",
      "Lesbian.\n",
      "0: BY JENNIE MCNULTY\n",
      "\n",
      "Lesbian.\n",
      "\n",
      "1.\n",
      "\n",
      "I\n",
      "\n",
      "Les\n",
      "---------------\n",
      "prompt: The Buddha's Teaching As It Is\n",
      "\n",
      "In\n",
      "0: The Buddha's Teaching As It Is\n",
      "\n",
      "In 1959, the Venerable\n",
      "---------------\n",
      "prompt: As part of a broad initiative to combat sexual harassment and\n",
      "0: As part of a broad initiative to combat sexual harassment and assault in the entertainment industry, Time's\n",
      "---------------\n",
      "prompt: The Atlanta Falcons have started the 2015 season \n",
      "0: The Atlanta Falcons have started the 2015 season 2-1, and are coming off a win\n",
      "---------------\n",
      "prompt: Front Page Torrents Favorites My Home My Galleries Top\n",
      "0: Front Page Torrents Favorites My Home My Galleries Top Torrents\n",
      "Pornography, or por\n",
      "---------------\n",
      "prompt: They have changed the phone menu to try to deflect us to email\n",
      "0: They have changed the phone menu to try to deflect us to email. They have a very poor selection of shows and\n",
      "---------------\n",
      "prompt: One Page\n",
      "\n",
      "One Page is a browser extension for automatically displaying multi\n",
      "0: One Page\n",
      "\n",
      "One Page is a browser extension for automatically displaying multi-page web content on one page.\n",
      "\n",
      "\n",
      "---------------\n",
      "prompt: Intro \"In his search for food, early man tried all kinds\n",
      "0: Intro \"In his search for food, early man tried all kinds of things. It was not until the development of\n",
      "---------------\n",
      "prompt: Having trouble viewing the video? Try disabling any ad blocking extensions\n",
      "0: Having trouble viewing the video? Try disabling any ad blocking extensions in your browser.\n",
      "WATCH: A video\n",
      "---------------\n",
      "prompt: Get Liverpool FC updates directly to your inbox Subscribe Thank you for\n",
      "0: Get Liverpool FC updates directly to your inbox Subscribe Thank you for subscribingWe have more newslettersShow me\n",
      "---------------\n",
      "prompt: Super Mario Run will be available on Android devices beginning in March, N\n",
      "0: Super Mario Run will be available on Android devices beginning in March, Nintendo announced today.\n",
      "The game will be available\n",
      "---------------\n",
      "prompt: The California-based electric car manufacturer joins Jaguar, Land\n",
      "0: The California-based electric car manufacturer joins Jaguar, Land Rover, and Volvo in a bid to\n",
      "---------------\n",
      "prompt: The Owings Mills Mall in Maryland officially closed its doors in\n",
      "0: The Owings Mills Mall in Maryland officially closed its doors in 2014, but the building that\n",
      "---------------\n",
      "prompt: I've easily purchased 25 of these over the last \n",
      "0: I've easily purchased 25 of these over the last 3 years. I am a HUGE fan\n",
      "---------------\n",
      "prompt: Sevilla midfielder Hiroshi Kiyotake\n",
      "0: Sevilla midfielder Hiroshi Kiyotake has revealed that he has not yet decided whether to\n",
      "---------------\n",
      "prompt: There's constantly some sort of plagiarism row going on\n",
      "0: There's constantly some sort of plagiarism row going on with the news. It's a constant battle\n",
      "---------------\n",
      "prompt: Written by and copyright © 2005-\n",
      "0: Written by and copyright © 2005-2019 by Charles E. Cran\n",
      "---------------\n",
      "prompt: The Wolf currently has the former Seminole as his RB3\n",
      "0: The Wolf currently has the former Seminole as his RB3 and the former Nittany Lion as his R\n",
      "---------------\n",
      "prompt: The Obama administration is being slammed from all sides for its failing\n",
      "0: The Obama administration is being slammed from all sides for its failing foreign policy, especially in the Middle East. The\n",
      "---------------\n",
      "prompt: The nonpartisan Congressional Budget Office reinforced what\n",
      "0: The nonpartisan Congressional Budget Office reinforced what we already know: the Republican tax plan is a\n",
      "---------------\n",
      "prompt: \"The attrition rate, even very high up in the draft,\n",
      "0: \"The attrition rate, even very high up in the draft, is probably 30 percent,\" said Mark Rog\n",
      "---------------\n",
      "prompt: Major events in the history of SGI and Priesthood\n",
      "\n",
      "0: Major events in the history of SGI and Priesthood\n",
      "The first SGI-USA Young Women’\n",
      "---------------\n",
      "prompt: When the head of the CIA's torture unit decided to\n",
      "0: When the head of the CIA's torture unit decided to retire, he had to make sure the secre\n",
      "---------------\n",
      "prompt: Laser rangefinders are excellent tools that help you to determine longer\n",
      "0: Laser rangefinders are excellent tools that help you to determine longer distances with ease. They are a good option for\n",
      "---------------\n",
      "prompt: The Coalition For Economic Survival (CES) ur\n",
      "0: The Coalition For Economic Survival (CES) urges you to support the campaign to save the T\n",
      "---------------\n",
      "prompt: If you haven't been paying attention: Republicans have been\n",
      "0: If you haven't been paying attention: Republicans have been trying to do away with the Department of Education.\n",
      "---------------\n",
      "prompt: BW semi-spirit themed tempo/aggro deck.\n",
      "0: BW semi-spirit themed tempo/aggro deck.\n",
      "This is a very fun deck to play.\n",
      "---------------\n",
      "prompt: About \"List of WWE/WWF Finishers\"\n",
      "\n",
      "0: About \"List of WWE/WWF Finishers\"\n",
      "List of WWE/WWF Finishers\n",
      "---------------\n",
      "prompt: That's right! What everyone has been taking about! This episode\n",
      "0: That's right! What everyone has been taking about! This episode is all about the mysterious M.H.\n",
      "---------------\n",
      "prompt: Several people have asked about the techniques we used when cleaning out my\n",
      "0: Several people have asked about the techniques we used when cleaning out my mother-in-law's house. The\n",
      "---------------\n",
      "prompt: The pace of the race to the top of Fortune's Un\n",
      "0: The pace of the race to the top of Fortune's Unstoppable Companies list was set by\n",
      "---------------\n",
      "prompt: source GAIA package: Sx_MilitaryTimes_\n",
      "0: source GAIA package: Sx_MilitaryTimes_2021-05-17\n",
      "---------------\n",
      "prompt: Women across Denmark have complained of harassment by migrants\n",
      "0: Women across Denmark have complained of harassment by migrants in public spaces.\n",
      "A recent survey by the\n",
      "---------------\n",
      "prompt: Hebrews 1 New International Version (NIV)\n",
      "\n",
      "\n",
      "0: Hebrews 1 New International Version (NIV)\n",
      "\n",
      "Hebrews 1:1-1\n",
      "---------------\n",
      "prompt: NBA owners approved uniform advertising beginning in the 17-\n",
      "0: NBA owners approved uniform advertising beginning in the 17-18 season.\n",
      "The NBA is going to\n",
      "---------------\n",
      "prompt: Please insert an email address\n",
      "\n",
      "Password must:<br>-\n",
      "0: Please insert an email address\n",
      "\n",
      "Password must:<br>- be at least 8 characters long<br>\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "evaluate_mauve(tokenizer, predictions=nucleus_sequences.reshape(len(data) * 1, -1), references=None, n_drafts=1, mauve=False, verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
