{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import json\n",
    "import os\n",
    "import re\n",
    "from tqdm.auto import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = [\n",
    "    \"nomic-ai/nomic-embed-text-v1.5\",\n",
    "    \"jinaai/jina-embeddings-v2-base-en\",\n",
    "    \"BAAI/bge-m3\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = [\n",
    "    \"amazon_polarity\",\n",
    "    \"arguana\",\n",
    "    \"paul_graham\",\n",
    "    \"reddit\",\n",
    "    \"scientific_papers\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "mode = \"insert\"\n",
    "\n",
    "for model in models:\n",
    "    for dataset in datasets:\n",
    "        df = pd.read_pickle(f\"./data/{model.replace('/', '_')}/{dataset}_{mode}.pkl\")\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>original</th>\n",
       "      <th>text_needle_insert_lorem_0pos_0.05sz</th>\n",
       "      <th>text_needle_insert_lorem_0.5pos_0.05sz</th>\n",
       "      <th>text_needle_insert_lorem_1pos_0.05sz</th>\n",
       "      <th>text_needle_insert_lorem_0pos_0.1sz</th>\n",
       "      <th>text_needle_insert_lorem_0.5pos_0.1sz</th>\n",
       "      <th>text_needle_insert_lorem_1pos_0.1sz</th>\n",
       "      <th>text_needle_insert_lorem_0pos_0.2sz</th>\n",
       "      <th>text_needle_insert_lorem_0.5pos_0.2sz</th>\n",
       "      <th>text_needle_insert_lorem_1pos_0.2sz</th>\n",
       "      <th>...</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_1pos_0.1sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_0pos_0.2sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_0.5pos_0.2sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_1pos_0.2sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_0pos_0.5sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_0.5pos_0.5sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_1pos_0.5sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_0pos_1sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_0.5pos_1sz</th>\n",
       "      <th>cosine_similarity_needle_insert_lorem_1pos_1sz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>Lorem ipsum dolor sitThis sound track was beau...</td>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>This sound track was beautiful! It paints the ...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.950111</td>\n",
       "      <td>0.860392</td>\n",
       "      <td>0.891269</td>\n",
       "      <td>0.906543</td>\n",
       "      <td>0.737501</td>\n",
       "      <td>0.817572</td>\n",
       "      <td>0.814055</td>\n",
       "      <td>0.677161</td>\n",
       "      <td>0.785201</td>\n",
       "      <td>0.786855</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>Lorem ipsum dolor sit ametI'm reading a lot of...</td>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>I'm reading a lot of reviews saying that this ...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.973223</td>\n",
       "      <td>0.856864</td>\n",
       "      <td>0.962597</td>\n",
       "      <td>0.947977</td>\n",
       "      <td>0.776059</td>\n",
       "      <td>0.869828</td>\n",
       "      <td>0.869973</td>\n",
       "      <td>0.694085</td>\n",
       "      <td>0.819481</td>\n",
       "      <td>0.828308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>This soundtrack is my favorite music of all ti...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.986278</td>\n",
       "      <td>0.872955</td>\n",
       "      <td>0.938253</td>\n",
       "      <td>0.943580</td>\n",
       "      <td>0.822633</td>\n",
       "      <td>0.924585</td>\n",
       "      <td>0.940297</td>\n",
       "      <td>0.768543</td>\n",
       "      <td>0.910249</td>\n",
       "      <td>0.929751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>I truly like this soundtrack and I enjoy video...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.990826</td>\n",
       "      <td>0.847621</td>\n",
       "      <td>0.945417</td>\n",
       "      <td>0.948462</td>\n",
       "      <td>0.755354</td>\n",
       "      <td>0.923924</td>\n",
       "      <td>0.932174</td>\n",
       "      <td>0.708627</td>\n",
       "      <td>0.898113</td>\n",
       "      <td>0.928615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>Lorem ipsum dolor sit ametIf you've played the...</td>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>If you've played the game, you know how divine...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.960305</td>\n",
       "      <td>0.900816</td>\n",
       "      <td>0.963921</td>\n",
       "      <td>0.949138</td>\n",
       "      <td>0.787718</td>\n",
       "      <td>0.870125</td>\n",
       "      <td>0.893615</td>\n",
       "      <td>0.699624</td>\n",
       "      <td>0.788773</td>\n",
       "      <td>0.863809</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>Lorem ipsum dolor sit amet,I have only read th...</td>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>I have only read the first 44 pages of this bo...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.962324</td>\n",
       "      <td>0.890096</td>\n",
       "      <td>0.931806</td>\n",
       "      <td>0.950038</td>\n",
       "      <td>0.798512</td>\n",
       "      <td>0.869520</td>\n",
       "      <td>0.900035</td>\n",
       "      <td>0.752651</td>\n",
       "      <td>0.829703</td>\n",
       "      <td>0.874337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consecteturI am cu...</td>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>I am currently reading the Kindle version of t...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.943312</td>\n",
       "      <td>0.797737</td>\n",
       "      <td>0.847413</td>\n",
       "      <td>0.858597</td>\n",
       "      <td>0.718440</td>\n",
       "      <td>0.828096</td>\n",
       "      <td>0.855546</td>\n",
       "      <td>0.639152</td>\n",
       "      <td>0.772391</td>\n",
       "      <td>0.837774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>Lorem ipsum dolor sit ametThis is a text book ...</td>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>This is a text book I ordered for my classes f...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.959729</td>\n",
       "      <td>0.800550</td>\n",
       "      <td>0.903573</td>\n",
       "      <td>0.939190</td>\n",
       "      <td>0.690732</td>\n",
       "      <td>0.845811</td>\n",
       "      <td>0.795806</td>\n",
       "      <td>0.639489</td>\n",
       "      <td>0.774844</td>\n",
       "      <td>0.780497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>I find it hard to believe people fall for this...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.962969</td>\n",
       "      <td>0.784737</td>\n",
       "      <td>0.853096</td>\n",
       "      <td>0.905229</td>\n",
       "      <td>0.705122</td>\n",
       "      <td>0.851851</td>\n",
       "      <td>0.847751</td>\n",
       "      <td>0.617321</td>\n",
       "      <td>0.777114</td>\n",
       "      <td>0.829936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>Lorem ipsum dolor sit amet\"On average, we brea...</td>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>Lorem ipsum dolor sit amet, consectetur adipis...</td>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>\"On average, we breathe through one nostril fo...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.977667</td>\n",
       "      <td>0.891969</td>\n",
       "      <td>0.950666</td>\n",
       "      <td>0.954407</td>\n",
       "      <td>0.793791</td>\n",
       "      <td>0.915179</td>\n",
       "      <td>0.924690</td>\n",
       "      <td>0.759320</td>\n",
       "      <td>0.856816</td>\n",
       "      <td>0.904644</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 52 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              original  \\\n",
       "0    This sound track was beautiful! It paints the ...   \n",
       "1    I'm reading a lot of reviews saying that this ...   \n",
       "2    This soundtrack is my favorite music of all ti...   \n",
       "3    I truly like this soundtrack and I enjoy video...   \n",
       "4    If you've played the game, you know how divine...   \n",
       "..                                                 ...   \n",
       "995  I have only read the first 44 pages of this bo...   \n",
       "996  I am currently reading the Kindle version of t...   \n",
       "997  This is a text book I ordered for my classes f...   \n",
       "998  I find it hard to believe people fall for this...   \n",
       "999  \"On average, we breathe through one nostril fo...   \n",
       "\n",
       "                  text_needle_insert_lorem_0pos_0.05sz  \\\n",
       "0    Lorem ipsum dolor sitThis sound track was beau...   \n",
       "1    Lorem ipsum dolor sit ametI'm reading a lot of...   \n",
       "2    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "3    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "4    Lorem ipsum dolor sit ametIf you've played the...   \n",
       "..                                                 ...   \n",
       "995  Lorem ipsum dolor sit amet,I have only read th...   \n",
       "996  Lorem ipsum dolor sit amet, consecteturI am cu...   \n",
       "997  Lorem ipsum dolor sit ametThis is a text book ...   \n",
       "998  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "999  Lorem ipsum dolor sit amet\"On average, we brea...   \n",
       "\n",
       "                text_needle_insert_lorem_0.5pos_0.05sz  \\\n",
       "0    This sound track was beautiful! It paints the ...   \n",
       "1    I'm reading a lot of reviews saying that this ...   \n",
       "2    This soundtrack is my favorite music of all ti...   \n",
       "3    I truly like this soundtrack and I enjoy video...   \n",
       "4    If you've played the game, you know how divine...   \n",
       "..                                                 ...   \n",
       "995  I have only read the first 44 pages of this bo...   \n",
       "996  I am currently reading the Kindle version of t...   \n",
       "997  This is a text book I ordered for my classes f...   \n",
       "998  I find it hard to believe people fall for this...   \n",
       "999  \"On average, we breathe through one nostril fo...   \n",
       "\n",
       "                  text_needle_insert_lorem_1pos_0.05sz  \\\n",
       "0    This sound track was beautiful! It paints the ...   \n",
       "1    I'm reading a lot of reviews saying that this ...   \n",
       "2    This soundtrack is my favorite music of all ti...   \n",
       "3    I truly like this soundtrack and I enjoy video...   \n",
       "4    If you've played the game, you know how divine...   \n",
       "..                                                 ...   \n",
       "995  I have only read the first 44 pages of this bo...   \n",
       "996  I am currently reading the Kindle version of t...   \n",
       "997  This is a text book I ordered for my classes f...   \n",
       "998  I find it hard to believe people fall for this...   \n",
       "999  \"On average, we breathe through one nostril fo...   \n",
       "\n",
       "                   text_needle_insert_lorem_0pos_0.1sz  \\\n",
       "0    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "1    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "2    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "3    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "4    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "..                                                 ...   \n",
       "995  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "996  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "997  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "998  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "999  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "\n",
       "                 text_needle_insert_lorem_0.5pos_0.1sz  \\\n",
       "0    This sound track was beautiful! It paints the ...   \n",
       "1    I'm reading a lot of reviews saying that this ...   \n",
       "2    This soundtrack is my favorite music of all ti...   \n",
       "3    I truly like this soundtrack and I enjoy video...   \n",
       "4    If you've played the game, you know how divine...   \n",
       "..                                                 ...   \n",
       "995  I have only read the first 44 pages of this bo...   \n",
       "996  I am currently reading the Kindle version of t...   \n",
       "997  This is a text book I ordered for my classes f...   \n",
       "998  I find it hard to believe people fall for this...   \n",
       "999  \"On average, we breathe through one nostril fo...   \n",
       "\n",
       "                   text_needle_insert_lorem_1pos_0.1sz  \\\n",
       "0    This sound track was beautiful! It paints the ...   \n",
       "1    I'm reading a lot of reviews saying that this ...   \n",
       "2    This soundtrack is my favorite music of all ti...   \n",
       "3    I truly like this soundtrack and I enjoy video...   \n",
       "4    If you've played the game, you know how divine...   \n",
       "..                                                 ...   \n",
       "995  I have only read the first 44 pages of this bo...   \n",
       "996  I am currently reading the Kindle version of t...   \n",
       "997  This is a text book I ordered for my classes f...   \n",
       "998  I find it hard to believe people fall for this...   \n",
       "999  \"On average, we breathe through one nostril fo...   \n",
       "\n",
       "                   text_needle_insert_lorem_0pos_0.2sz  \\\n",
       "0    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "1    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "2    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "3    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "4    Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "..                                                 ...   \n",
       "995  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "996  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "997  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "998  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "999  Lorem ipsum dolor sit amet, consectetur adipis...   \n",
       "\n",
       "                 text_needle_insert_lorem_0.5pos_0.2sz  \\\n",
       "0    This sound track was beautiful! It paints the ...   \n",
       "1    I'm reading a lot of reviews saying that this ...   \n",
       "2    This soundtrack is my favorite music of all ti...   \n",
       "3    I truly like this soundtrack and I enjoy video...   \n",
       "4    If you've played the game, you know how divine...   \n",
       "..                                                 ...   \n",
       "995  I have only read the first 44 pages of this bo...   \n",
       "996  I am currently reading the Kindle version of t...   \n",
       "997  This is a text book I ordered for my classes f...   \n",
       "998  I find it hard to believe people fall for this...   \n",
       "999  \"On average, we breathe through one nostril fo...   \n",
       "\n",
       "                   text_needle_insert_lorem_1pos_0.2sz  ...  \\\n",
       "0    This sound track was beautiful! It paints the ...  ...   \n",
       "1    I'm reading a lot of reviews saying that this ...  ...   \n",
       "2    This soundtrack is my favorite music of all ti...  ...   \n",
       "3    I truly like this soundtrack and I enjoy video...  ...   \n",
       "4    If you've played the game, you know how divine...  ...   \n",
       "..                                                 ...  ...   \n",
       "995  I have only read the first 44 pages of this bo...  ...   \n",
       "996  I am currently reading the Kindle version of t...  ...   \n",
       "997  This is a text book I ordered for my classes f...  ...   \n",
       "998  I find it hard to believe people fall for this...  ...   \n",
       "999  \"On average, we breathe through one nostril fo...  ...   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_1pos_0.1sz  \\\n",
       "0                                           0.950111   \n",
       "1                                           0.973223   \n",
       "2                                           0.986278   \n",
       "3                                           0.990826   \n",
       "4                                           0.960305   \n",
       "..                                               ...   \n",
       "995                                         0.962324   \n",
       "996                                         0.943312   \n",
       "997                                         0.959729   \n",
       "998                                         0.962969   \n",
       "999                                         0.977667   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_0pos_0.2sz  \\\n",
       "0                                           0.860392   \n",
       "1                                           0.856864   \n",
       "2                                           0.872955   \n",
       "3                                           0.847621   \n",
       "4                                           0.900816   \n",
       "..                                               ...   \n",
       "995                                         0.890096   \n",
       "996                                         0.797737   \n",
       "997                                         0.800550   \n",
       "998                                         0.784737   \n",
       "999                                         0.891969   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_0.5pos_0.2sz  \\\n",
       "0                                             0.891269   \n",
       "1                                             0.962597   \n",
       "2                                             0.938253   \n",
       "3                                             0.945417   \n",
       "4                                             0.963921   \n",
       "..                                                 ...   \n",
       "995                                           0.931806   \n",
       "996                                           0.847413   \n",
       "997                                           0.903573   \n",
       "998                                           0.853096   \n",
       "999                                           0.950666   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_1pos_0.2sz  \\\n",
       "0                                           0.906543   \n",
       "1                                           0.947977   \n",
       "2                                           0.943580   \n",
       "3                                           0.948462   \n",
       "4                                           0.949138   \n",
       "..                                               ...   \n",
       "995                                         0.950038   \n",
       "996                                         0.858597   \n",
       "997                                         0.939190   \n",
       "998                                         0.905229   \n",
       "999                                         0.954407   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_0pos_0.5sz  \\\n",
       "0                                           0.737501   \n",
       "1                                           0.776059   \n",
       "2                                           0.822633   \n",
       "3                                           0.755354   \n",
       "4                                           0.787718   \n",
       "..                                               ...   \n",
       "995                                         0.798512   \n",
       "996                                         0.718440   \n",
       "997                                         0.690732   \n",
       "998                                         0.705122   \n",
       "999                                         0.793791   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_0.5pos_0.5sz  \\\n",
       "0                                             0.817572   \n",
       "1                                             0.869828   \n",
       "2                                             0.924585   \n",
       "3                                             0.923924   \n",
       "4                                             0.870125   \n",
       "..                                                 ...   \n",
       "995                                           0.869520   \n",
       "996                                           0.828096   \n",
       "997                                           0.845811   \n",
       "998                                           0.851851   \n",
       "999                                           0.915179   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_1pos_0.5sz  \\\n",
       "0                                           0.814055   \n",
       "1                                           0.869973   \n",
       "2                                           0.940297   \n",
       "3                                           0.932174   \n",
       "4                                           0.893615   \n",
       "..                                               ...   \n",
       "995                                         0.900035   \n",
       "996                                         0.855546   \n",
       "997                                         0.795806   \n",
       "998                                         0.847751   \n",
       "999                                         0.924690   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_0pos_1sz  \\\n",
       "0                                         0.677161   \n",
       "1                                         0.694085   \n",
       "2                                         0.768543   \n",
       "3                                         0.708627   \n",
       "4                                         0.699624   \n",
       "..                                             ...   \n",
       "995                                       0.752651   \n",
       "996                                       0.639152   \n",
       "997                                       0.639489   \n",
       "998                                       0.617321   \n",
       "999                                       0.759320   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_0.5pos_1sz  \\\n",
       "0                                           0.785201   \n",
       "1                                           0.819481   \n",
       "2                                           0.910249   \n",
       "3                                           0.898113   \n",
       "4                                           0.788773   \n",
       "..                                               ...   \n",
       "995                                         0.829703   \n",
       "996                                         0.772391   \n",
       "997                                         0.774844   \n",
       "998                                         0.777114   \n",
       "999                                         0.856816   \n",
       "\n",
       "    cosine_similarity_needle_insert_lorem_1pos_1sz  \n",
       "0                                         0.786855  \n",
       "1                                         0.828308  \n",
       "2                                         0.929751  \n",
       "3                                         0.928615  \n",
       "4                                         0.863809  \n",
       "..                                             ...  \n",
       "995                                       0.874337  \n",
       "996                                       0.837774  \n",
       "997                                       0.780497  \n",
       "998                                       0.829936  \n",
       "999                                       0.904644  \n",
       "\n",
       "[1000 rows x 52 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
