{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "178d7b49",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import pickle\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
    "\n",
    "import os\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import glob\n",
    "import numpy as np\n",
    "from IPython.display import clear_output\n",
    "from collections import Counter\n",
    "os.chdir(\"../\")\n",
    "\n",
    "from src import utils\n",
    "from matplotlib.lines import Line2D\n",
    "from matplotlib.patches import Patch\n",
    "from matplotlib.ticker import LogLocator\n",
    "import torch\n",
    "from termcolor import colored  # Use termcolor for colored output in the terminal\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ef8f08a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split and verify functions\n",
    "def verify_sampling_conditions(tokens, prompt_length, top_k=None, top_p=None, model=None, tokenizer=None, temp = 1.0):\n",
    "    # Convert tokens to tensor and run the model\n",
    "    input_ids = torch.tensor([tokens]).to(\"cpu\")\n",
    "    with torch.no_grad():\n",
    "        outputs = model(input_ids)\n",
    "    \n",
    "    logits = outputs.logits\n",
    "\n",
    "    all_top_k_met = True\n",
    "    all_top_p_met = True\n",
    "\n",
    "    # Evaluate only on tokens after the prompt\n",
    "    for i in range(prompt_length, len(tokens)):  # Start from tokens after the prompt\n",
    "        previous_logits = logits[0, i - 1]  # Logits for predicting the current token\n",
    "        probabilities = torch.softmax(previous_logits / temp, dim=-1)  # Convert logits to probabilities\n",
    "\n",
    "        # Get current token\n",
    "        \n",
    "        \n",
    "        current_token = tokens[i]\n",
    "        token_probability = probabilities[current_token].item()\n",
    "\n",
    "        # Check top-k condition\n",
    "        top_k_condition = False\n",
    "        if top_k is not None:\n",
    "            top_k_indices = torch.topk(probabilities, k=top_k).indices\n",
    "            top_k_condition = current_token in top_k_indices.tolist()\n",
    "            all_top_k_met = all_top_k_met and top_k_condition  # Update overall status\n",
    "\n",
    "        # Check top-p condition\n",
    "        top_p_condition = False\n",
    "        if top_p is not None:\n",
    "            sorted_probs, sorted_indices = torch.sort(probabilities, descending=True)\n",
    "            cumulative_probs = torch.cumsum(sorted_probs, dim=-1)\n",
    "            top_p_indices = sorted_indices[cumulative_probs <= top_p]\n",
    "            # Include the first token that pushes cumulative probability over top_p\n",
    "            if len(top_p_indices) < len(sorted_probs):\n",
    "                top_p_indices = torch.cat([top_p_indices, sorted_indices[len(top_p_indices):len(top_p_indices) + 1]])\n",
    "            top_p_condition = current_token in top_p_indices.tolist()\n",
    "            all_top_p_met = all_top_p_met and top_p_condition  # Update overall status\n",
    "\n",
    "\n",
    "\n",
    "    return {\n",
    "        \"all_top_k_met\": all_top_k_met if top_k is not None else None,\n",
    "        \"all_top_p_met\": all_top_p_met if top_p is not None else None,\n",
    "    }\n",
    "\n",
    "\n",
    "\n",
    "def split_token(sequence, tokenizer, vocab):\n",
    "    \"\"\"\n",
    "    Heuristic function to splits a token into two subtokens based on the sum of their indices in the vocabulary.\n",
    "    \"\"\"\n",
    "    \n",
    "    # Reverse mapping: ID -> Token\n",
    "    id_to_token = {v: k for k, v in vocab.items()}\n",
    "\n",
    "    # Select a token ID to split (heuristic: pick the lowest ID token)\n",
    "    \n",
    "    #Get all token IDs in the sequence that have at least two characters\n",
    "    valid_ids = [token_id for token_id in sequence if len(tokenizer.decode([token_id])) > 1]\n",
    "    if len(valid_ids) == 0:\n",
    "        print(\"No valid token IDs found, returning original sequence\", sequence)\n",
    "        return sequence\n",
    "    \n",
    "    token_id_to_split = max(valid_ids)\n",
    "\n",
    "    # Get the token corresponding to the selected ID\n",
    "    token_to_split = id_to_token[token_id_to_split]\n",
    "\n",
    "    \n",
    "    \n",
    "    # Initialize variables to store the best split\n",
    "    best_split = None\n",
    "    \n",
    "    \n",
    "    max_index = -float('inf')  # Start with a very low number for comparison\n",
    "\n",
    "    # Try all possible splits and calculate the sum of the indices for each part\n",
    "    for mid_index in range(1, len(token_to_split)):  # Split at various points\n",
    "        Y = token_to_split[:mid_index]\n",
    "        Z = token_to_split[mid_index:]\n",
    "        \n",
    "        # Get the token IDs for Y and Z\n",
    "        Y_id = vocab.get(Y)  # No default value; will return None if Y isn't valid\n",
    "        Z_id = vocab.get(Z)  # No default value; will return None if Z isn't valid\n",
    "\n",
    "\n",
    "        # Skip this split if either Y or Z is invalid\n",
    "        if Y_id is None or Z_id is None:\n",
    "            continue\n",
    "\n",
    "        # Calculate the sum of the indices\n",
    "        index_min = min(Y_id, Z_id)\n",
    "\n",
    "        # If the sum of the indices is the largest found so far, update best split\n",
    "        if index_min > max_index:\n",
    "            best_split = (Y, Z)\n",
    "            max_index = index_min\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "    # If no valid split was found, return the original sequence\n",
    "    if best_split is None:\n",
    "        return sequence\n",
    "\n",
    "    # Replace the token X with its split subtokens Y and Z in the sequence\n",
    "    new_sequence = []\n",
    "    updated = False\n",
    "    for token_id in sequence:\n",
    "        if token_id == token_id_to_split and not updated:\n",
    "            # Replace token X with subtokens Y and Z\n",
    "            new_sequence.extend([vocab[best_split[0]], vocab[best_split[1]]])\n",
    "            updated = True\n",
    "        else:\n",
    "            new_sequence.append(token_id)\n",
    "\n",
    "    return new_sequence\n",
    "\n",
    "\n",
    "\n",
    "def print_tokens(tokenizer, token_ids, separator=\"|\"):\n",
    "    \"\"\"\n",
    "    Decodes a sequence of token IDs into strings, separated by a specified symbol.\n",
    "\n",
    "    Args:\n",
    "        tokenizer: A tokenizer from the transformers library.\n",
    "        token_ids: A list of token IDs.\n",
    "        separator: A string to separate decoded tokens (default is '|').\n",
    "\n",
    "    Returns:\n",
    "        A single string with each token's decoded representation separated by the separator.\n",
    "    \"\"\"\n",
    "    # Decode each token ID into its corresponding string\n",
    "    token_strings = [tokenizer.decode([token_id], clean_up_tokenization_spaces=False) for token_id in token_ids]\n",
    "    # Join the decoded strings with the separator\n",
    "    result = separator.join(token_strings)\n",
    "    return result\n",
    "\n",
    "\n",
    "def print_tokens_with_reference(tokenizer, token_ids, reference_ids, separator=\"|\"):\n",
    "    \"\"\"\n",
    "    Decodes a sequence of token IDs into strings, separated by a specified symbol.\n",
    "    Highlights tokens not in the reference sequence in red.\n",
    "\n",
    "    Args:\n",
    "        tokenizer: A tokenizer from the transformers library.\n",
    "        token_ids: A list of token IDs.\n",
    "        reference_ids: A reference list of token IDs to compare against.\n",
    "        separator: A string to separate decoded tokens (default is '|').\n",
    "\n",
    "    Returns:\n",
    "        A single string with each token's decoded representation, \n",
    "        new tokens highlighted in red, separated by the separator.\n",
    "    \"\"\"\n",
    "    # Decode the reference token IDs into strings\n",
    "    reference_strings = set(\n",
    "        tokenizer.decode([token_id], clean_up_tokenization_spaces=False) for token_id in reference_ids\n",
    "    )\n",
    "    \n",
    "    # Decode the target token IDs and mark new tokens\n",
    "    token_strings = []\n",
    "    for token_id in token_ids:\n",
    "        token_string = tokenizer.decode([token_id], clean_up_tokenization_spaces=False)\n",
    "        if token_string not in reference_strings:\n",
    "            token_strings.append(colored(token_string, \"red\"))\n",
    "        else:\n",
    "            token_strings.append(token_string)\n",
    "    \n",
    "    # Join the tokens with the separator\n",
    "    result = separator.join(token_strings)\n",
    "    #print(result)\n",
    "    return result\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "71ce2d40",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_name = \"meta-llama/Llama-3.2-1B-Instruct\"\n",
    "model_cache = \"../models\"\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=model_cache)\n",
    "model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=model_cache)\n",
    "\n",
    "#Load the LMSYS outputs produced by a model\n",
    "#Use the LMSYS_generate.py script to generate the outputs\n",
    "#Note that the outputs are gitignored since their size is too large, >100MB \n",
    "with open(\"../outputs/cpt/factual_modelLlama-3.2-3B-Instruct_p1.0_kNone_numseq3_numprompts100_maxoutlen200_temp1.3_idare you .pkl\", \"rb\") as f:\n",
    "    data = pickle.load(f)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "579c19b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment| trusts| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for| acquiring| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a| gamble| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|stay|.| Div|idend| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been| simplified| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment| trusts| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for| acquiring| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a| gamble| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|stay|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been| simplified| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for| acquiring| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a| gamble| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|stay|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been| simplified| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for| acquiring| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|stay|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been| simplified| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for| acquiring| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been| simplified| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for| acquiring| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been|\u001b[31m simpl\u001b[0m|\u001b[31mified\u001b[0m| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for|\u001b[31m acqu\u001b[0m|\u001b[31miring\u001b[0m| income| such| as| ground| level| rental| or| owning| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been|\u001b[31m simpl\u001b[0m|\u001b[31mified\u001b[0m| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment| profitable|.| There| are| many| options| for|\u001b[31m acqu\u001b[0m|\u001b[31miring\u001b[0m| income| such| as| ground| level| rental| or|\u001b[31m ow\u001b[0m|\u001b[31mning\u001b[0m| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been|\u001b[31m simpl\u001b[0m|\u001b[31mified\u001b[0m| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment|\u001b[31m prof\u001b[0m|\u001b[31mitable\u001b[0m|.| There| are| many| options| for|\u001b[31m acqu\u001b[0m|\u001b[31miring\u001b[0m| income| such| as| ground| level| rental| or|\u001b[31m ow\u001b[0m|\u001b[31mning\u001b[0m| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher| reliability| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been|\u001b[31m simpl\u001b[0m|\u001b[31mified\u001b[0m| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The| easiest| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment|\u001b[31m prof\u001b[0m|\u001b[31mitable\u001b[0m|.| There| are| many| options| for|\u001b[31m acqu\u001b[0m|\u001b[31miring\u001b[0m| income| such| as| ground| level| rental| or|\u001b[31m ow\u001b[0m|\u001b[31mning\u001b[0m| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher|\u001b[31m reli\u001b[0m|\u001b[31mability\u001b[0m| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been|\u001b[31m simpl\u001b[0m|\u001b[31mified\u001b[0m| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n",
      "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
      "?\n",
      "|The|\u001b[31m eas\u001b[0m|\u001b[31miest\u001b[0m| way| to| invest| in| property| with| the| capital| is| still| considered| for| |3| or| more| properties| or| a| real| estate| mutual| fund|.\n",
      "|Real| estate| investment|\u001b[31m trust\u001b[0m|s| or| RE|IT|s|,| real| estate| mutual| funds| may| be| the| easiest|.| One| can| not| simply| buy| into| a| building|,| but| you| can| gain| an| interest| or| gain| into| something| that| the| others| are| making| that| investment|\u001b[31m prof\u001b[0m|\u001b[31mitable\u001b[0m|.| There| are| many| options| for|\u001b[31m acqu\u001b[0m|\u001b[31miring\u001b[0m| income| such| as| ground| level| rental| or|\u001b[31m ow\u001b[0m|\u001b[31mning\u001b[0m| a| building| through| a| partnership|.\n",
      "|The| highest| performing| investing| may| remain| a|\u001b[31m gam\u001b[0m|\u001b[31mble\u001b[0m| and| have| no| guarantee|.| The| next| highest| would| have| to| be| investing| in| stocks| and| bonds|,| the| old| main|\u001b[31mst\u001b[0m|\u001b[31may\u001b[0m|.| Div|\u001b[31mid\u001b[0m|\u001b[31mend\u001b[0m| paying| and| bonds| both| have| higher|\u001b[31m reli\u001b[0m|\u001b[31mability\u001b[0m| from| being| the| \"|standard|\".\n",
      "\n",
      "|Note|:| the| previous| responses| and| answers| have| been|\u001b[31m simpl\u001b[0m|\u001b[31mified\u001b[0m| for| ease| of| use| as| not| wanting| any| information| and| not| looking| over|.| While| my| goal| is| being| simple| to| understand|.\n",
      "\n",
      "|I| was| not| in| contact| with| my| broker| and| was| on| vacation| but| had| read| what| a| high| return| stock| trading| investment| with| the| same| amount| was| that\n"
     ]
    }
   ],
   "source": [
    "#First example prompt_id = 28, seq_id = 0, min_tok, max_tok = 0, 100temperature = 1.3, top_p=0.95\n",
    "#Second example prompt_id = 36, seq_id = 0, min_tok, max_tok = 20, 150temperature = 1.3, top_p=0.95\n",
    "#Third example prompt_id = 34, seq_id = 1, min_tok, max_tok = 0, 200temperature = 1.3, top_p=0.95\n",
    "\n",
    "prompt_id = 36\n",
    "seq_id = 2\n",
    "min_tok, max_tok = 20, 150\n",
    "prompt = data[prompt_id][\"prompt\"]\n",
    "prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)\n",
    "max_split = 20\n",
    "\n",
    "output_sequence_original = [ token.item() for token in data[prompt_id][\"output\"][seq_id] ][min_tok: max_tok ]\n",
    "\n",
    "splits = []\n",
    "\n",
    "for m in range(max_split):\n",
    "    output_sequence = output_sequence_original\n",
    "\n",
    "    for _ in range(m):\n",
    "                        \n",
    "        output_sequence = split_token(output_sequence, tokenizer, tokenizer.vocab)\n",
    "    \n",
    "    splits.append(output_sequence)\n",
    "\n",
    "sampling_conditions = []\n",
    "temperature = 1.3\n",
    "top_p=0.95\n",
    "\n",
    "\n",
    "\n",
    "for m in range(max_split):\n",
    "    \n",
    "    \n",
    "    sampling_conditions.append(\n",
    "        verify_sampling_conditions(prompt_tokens + splits[m], len(prompt_tokens) , top_k=None, top_p=top_p, model=model, tokenizer=tokenizer, temp = temperature)[\"all_top_p_met\"]\n",
    "        \n",
    "        \n",
    "    )\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[0], splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[1],splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[2],splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[3],splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[4],splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[5],splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[6],splits[0])   )\n",
    "print(\"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\")\n",
    "print(   print_tokens_with_reference(tokenizer, splits[7],splits[0])   )\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
