{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import os\n",
    "from pathlib import Path\n",
    "from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer\n",
    "import torch\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named '_datasets'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m_datasets\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetConfig\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtransform_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mneedle_haystack_similarity\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExperimentConfig\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named '_datasets'"
     ]
    }
   ],
   "source": [
    "from _datasets.data import DatasetConfig\n",
    "from utils.transform_utils import *\n",
    "from needle_haystack_similarity import ExperimentConfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 276,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset scientific_papers loaded.\n",
      "Filtered dataset to 500 examples.\n",
      "Loaded 3 needle configs.\n"
     ]
    }
   ],
   "source": [
    "config = {\n",
    "    \"mode\": \"insert\",\n",
    "    \"dataset_name\": \"scientific_papers\",\n",
    "    \"num_examples\": 500,\n",
    "    \"needle_keywords\": [None],\n",
    "    \"needle_sizes\": [.5],\n",
    "    \"needle_posns\": [0, 0.5, 1],\n",
    "    \"model_name\": \"BAAI/bge-m3\",\n",
    "    \"max_length\": 8192,\n",
    "}\n",
    "config = ExperimentConfig(**config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 277,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dataset_name = \"scientific_papers\"\n",
    "# num_examples = 10\n",
    "# df = DatasetConfig(dataset_name, num_examples).get_dataset()\n",
    "df = config.dataset_config.get_dataset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 278,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>original</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>objectivesmetabolic syndrome is a precursor o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340</th>\n",
       "      <td>purposethe purpose of this study was to deriv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>purposeto report a case of inadvertent anteri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>479</th>\n",
       "      <td>techniques for isolation and culture of fetal...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>255</th>\n",
       "      <td>the contamination of seafood by bacteria of f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>introduction : the objective of this study wa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>hyperostosis frontalis interna is an unexplai...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>seasonal \\n and pandemic influenza outbreaks ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>\\n objective . to examine risk factors for fa...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>500 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              original\n",
       "304   objectivesmetabolic syndrome is a precursor o...\n",
       "340   purposethe purpose of this study was to deriv...\n",
       "47    green fluorescent protein ( gfp ) and gfp - l...\n",
       "67    purposeto report a case of inadvertent anteri...\n",
       "479   techniques for isolation and culture of fetal...\n",
       "..                                                 ...\n",
       "255   the contamination of seafood by bacteria of f...\n",
       "72    introduction : the objective of this study wa...\n",
       "396   hyperostosis frontalis interna is an unexplai...\n",
       "235   seasonal \\n and pandemic influenza outbreaks ...\n",
       "37    \\n objective . to examine risk factors for fa...\n",
       "\n",
       "[500 rows x 1 columns]"
      ]
     },
     "execution_count": 278,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seed = 42\n",
    "# np.random.seed(seed)\n",
    "df = df.sample(frac=1, random_state=1)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 279,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['purposeto evaluate retinopathy associated with interferon therapy in patients with chronic hepatitis c.methodsone hundred patients with chronic hepatitis c undergoing interferon therapy were examined for the presence of cotton wool spots , retinal hemorrhages , cystoid macular edema , capillary non - perfusion , and arteriolar occlusion.',\n",
       " 'complete ophthalmological examination including indirect ophthalmoscopic fundus examination was carried out for all patients and colored fundus photography and fluorescein angiography were carried out for the patients with positive fundus findings.',\n",
       " 'the follow - up period was 9 months.resultssixteen percent of patients developed retinopathy in the form of cotton wool spots , retinal hemorrhages , cystoid macular edema , and capillary non-perfusion.conclusioninterferon therapy can lead to retinopathy which is mostly reversible and dose related.',\n",
       " 'periodic fundoscopic examinations help in early detection and prevent progression to permanent visual loss.',\n",
       " 'according to a patient - centered viewpoint , the meaning of harmonization in the context of laboratory medicine is that the information should be comparable irrespective of the measurement procedure used and where and/or when a measurement is made.',\n",
       " 'harmonization represents a fundamental aspect of quality in laboratory medicine as its ultimate goal is to improve patient outcomes through the provision of an accurate and actionable laboratory information.',\n",
       " 'although the initial focus has to a large extent been to harmonize and standardize analytical processes and methods , the scope of harmonization goes beyond to include all other aspects of the total testing process ( ttp ) , such as terminology and units , report formats , reference intervals and decision limits , as well as tests and test profiles request and criteria for interpretation.',\n",
       " 'two major progresses have been made in the area of harmonization in laboratory medicine : first , the awareness that harmonization should take into consideration not only the analytical phase but all steps of the ttp , thus dealing with the request , the sample , the measurement , and the report.',\n",
       " 'second , as the processes required to achieve harmonization are complicated , a systematic approach is needed.',\n",
       " 'the international federation of clinical chemistry and laboratory medicine ( ifcc ) has played a fundamental and successful role in the development of standardized and harmonized assays , and now it should continue to work in the field through the collaboration and cooperation with many other stakeholders.']"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "' tardive dystonia ( td ) is a serious side effect of antipsychotic medications , more with typical antipsychotics , that is potentially irreversible in affected patients . \\n studies show that newer atypical antipsychotics have a lower risk of td . as a result , many clinicians may have developed a false sense of security when prescribing these medications . \\n we report a case of 20-year - old male with hyperthymic temperament and borderline intellectual functioning , who developed severe td after low dose short duration exposure to atypical antipsychotic risperidone and then olanzapine . \\n the goal of this paper is to alert the reader to be judicious and cautious before using casual low dose second generation antipsychotics in patient with no core psychotic features , hyperthymic temperament , or borderline intellectual functioning suggestive of organic brain damage , who are more prone to develop adverse effects such as td and monitor the onset of td in patients taking atypical antipsychotics . '"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def circular_rotate(series, n):\n",
    "    n = n % len(series)  # Handle cases where n is larger than the length of the series\n",
    "    return pd.concat([series[n:], series[:n]]).reset_index(drop=True)\n",
    "\n",
    "def get_negatives(texts: pd.Series, num_neg=2, by_sentence=False):\n",
    "    # orig = df['original']\n",
    "    shifted = []\n",
    "    for i in range(1, num_neg+1):\n",
    "        curr = circular_rotate(texts, i)\n",
    "        shifted.append(curr)\n",
    "    # print(shifted)\n",
    "\n",
    "    shifted = list(zip(*shifted))\n",
    "    # print(shifted[0])\n",
    "    # print(shifted[1])\n",
    "\n",
    "    if by_sentence:\n",
    "        out = []\n",
    "        for neg_example in shifted:\n",
    "            exploded = []\n",
    "            for item in neg_example:\n",
    "                exploded.extend(get_sentences(item))    \n",
    "            out.append(exploded)\n",
    "    else:\n",
    "        out = shifted\n",
    "            \n",
    "    return out\n",
    "df['negatives'] = get_negatives(df['original'], num_neg=2, by_sentence=True)\n",
    "display(df['negatives'][2])\n",
    "display(df['original'][2])\n",
    "# get_sentences(df['negatives'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(df)\n",
    "# display(df['negatives'][0])\n",
    "# display(df['original'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       purposethe purpose of this study was to deriv...\n",
       "1       green fluorescent protein ( gfp ) and gfp - l...\n",
       "2       purposeto report a case of inadvertent anteri...\n",
       "3       techniques for isolation and culture of fetal...\n",
       "4       one of the room temperature ionic liquids ( r...\n",
       "                             ...                        \n",
       "495     introduction : the objective of this study wa...\n",
       "496     hyperostosis frontalis interna is an unexplai...\n",
       "497     seasonal \\n and pandemic influenza outbreaks ...\n",
       "498     \\n objective . to examine risk factors for fa...\n",
       "499     objectivesmetabolic syndrome is a precursor o...\n",
       "Name: original, Length: 500, dtype: object"
      ]
     },
     "execution_count": 281,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "circular_rotate(df['original'], 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>original</th>\n",
       "      <th>negatives</th>\n",
       "      <th>sentences</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>objectivesmetabolic syndrome is a precursor o...</td>\n",
       "      <td>[purposethe purpose of this study was to deriv...</td>\n",
       "      <td>[objectivesmetabolic syndrome is a precursor o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340</th>\n",
       "      <td>purposethe purpose of this study was to deriv...</td>\n",
       "      <td>[green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "      <td>[purposethe purpose of this study was to deriv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "      <td>[purposeto report a case of inadvertent anteri...</td>\n",
       "      <td>[green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>purposeto report a case of inadvertent anteri...</td>\n",
       "      <td>[techniques for isolation and culture of fetal...</td>\n",
       "      <td>[purposeto report a case of inadvertent anteri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>479</th>\n",
       "      <td>techniques for isolation and culture of fetal...</td>\n",
       "      <td>[one of the room temperature ionic liquids ( r...</td>\n",
       "      <td>[techniques for isolation and culture of fetal...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              original  \\\n",
       "304   objectivesmetabolic syndrome is a precursor o...   \n",
       "340   purposethe purpose of this study was to deriv...   \n",
       "47    green fluorescent protein ( gfp ) and gfp - l...   \n",
       "67    purposeto report a case of inadvertent anteri...   \n",
       "479   techniques for isolation and culture of fetal...   \n",
       "\n",
       "                                             negatives  \\\n",
       "304  [purposethe purpose of this study was to deriv...   \n",
       "340  [green fluorescent protein ( gfp ) and gfp - l...   \n",
       "47   [purposeto report a case of inadvertent anteri...   \n",
       "67   [techniques for isolation and culture of fetal...   \n",
       "479  [one of the room temperature ionic liquids ( r...   \n",
       "\n",
       "                                             sentences  \n",
       "304  [objectivesmetabolic syndrome is a precursor o...  \n",
       "340  [purposethe purpose of this study was to deriv...  \n",
       "47   [green fluorescent protein ( gfp ) and gfp - l...  \n",
       "67   [purposeto report a case of inadvertent anteri...  \n",
       "479  [techniques for isolation and culture of fetal...  "
      ]
     },
     "execution_count": 282,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"sentences\"] = get_sentences(\n",
    "            df[\"original\"].to_list()\n",
    "        )\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {},
   "outputs": [],
   "source": [
    "# exploded = df.explode('sentences').reset_index()\n",
    "# exploded = exploded.rename(columns={'index':'example'})\n",
    "# exploded.head(20)\n",
    "# exploded['sentences'].iloc[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['original', 'negatives', 'sentences'], dtype='object')\n",
      "16\n"
     ]
    }
   ],
   "source": [
    "print(df.columns)\n",
    "print(len(df['negatives'].iloc[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query</th>\n",
       "      <th>neg</th>\n",
       "      <th>pos</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>objectivesmetabolic syndrome is a precursor o...</td>\n",
       "      <td>[purposethe purpose of this study was to deriv...</td>\n",
       "      <td>[objectivesmetabolic syndrome is a precursor o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340</th>\n",
       "      <td>purposethe purpose of this study was to deriv...</td>\n",
       "      <td>[green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "      <td>[purposethe purpose of this study was to deriv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "      <td>[purposeto report a case of inadvertent anteri...</td>\n",
       "      <td>[green fluorescent protein ( gfp ) and gfp - l...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>purposeto report a case of inadvertent anteri...</td>\n",
       "      <td>[techniques for isolation and culture of fetal...</td>\n",
       "      <td>[purposeto report a case of inadvertent anteri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>479</th>\n",
       "      <td>techniques for isolation and culture of fetal...</td>\n",
       "      <td>[one of the room temperature ionic liquids ( r...</td>\n",
       "      <td>[techniques for isolation and culture of fetal...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>255</th>\n",
       "      <td>the contamination of seafood by bacteria of f...</td>\n",
       "      <td>[introduction : the objective of this study wa...</td>\n",
       "      <td>[the contamination of seafood by bacteria of f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>introduction : the objective of this study wa...</td>\n",
       "      <td>[hyperostosis frontalis interna is an unexplai...</td>\n",
       "      <td>[introduction : the objective of this study wa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>hyperostosis frontalis interna is an unexplai...</td>\n",
       "      <td>[seasonal., and pandemic influenza outbreaks r...</td>\n",
       "      <td>[hyperostosis frontalis interna is an unexplai...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>seasonal \\n and pandemic influenza outbreaks ...</td>\n",
       "      <td>[objective., to examine risk factors for false...</td>\n",
       "      <td>[seasonal., and pandemic influenza outbreaks r...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>\\n objective . to examine risk factors for fa...</td>\n",
       "      <td>[objectivesmetabolic syndrome is a precursor o...</td>\n",
       "      <td>[objective., to examine risk factors for false...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>500 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 query  \\\n",
       "304   objectivesmetabolic syndrome is a precursor o...   \n",
       "340   purposethe purpose of this study was to deriv...   \n",
       "47    green fluorescent protein ( gfp ) and gfp - l...   \n",
       "67    purposeto report a case of inadvertent anteri...   \n",
       "479   techniques for isolation and culture of fetal...   \n",
       "..                                                 ...   \n",
       "255   the contamination of seafood by bacteria of f...   \n",
       "72    introduction : the objective of this study wa...   \n",
       "396   hyperostosis frontalis interna is an unexplai...   \n",
       "235   seasonal \\n and pandemic influenza outbreaks ...   \n",
       "37    \\n objective . to examine risk factors for fa...   \n",
       "\n",
       "                                                   neg  \\\n",
       "304  [purposethe purpose of this study was to deriv...   \n",
       "340  [green fluorescent protein ( gfp ) and gfp - l...   \n",
       "47   [purposeto report a case of inadvertent anteri...   \n",
       "67   [techniques for isolation and culture of fetal...   \n",
       "479  [one of the room temperature ionic liquids ( r...   \n",
       "..                                                 ...   \n",
       "255  [introduction : the objective of this study wa...   \n",
       "72   [hyperostosis frontalis interna is an unexplai...   \n",
       "396  [seasonal., and pandemic influenza outbreaks r...   \n",
       "235  [objective., to examine risk factors for false...   \n",
       "37   [objectivesmetabolic syndrome is a precursor o...   \n",
       "\n",
       "                                                   pos  \n",
       "304  [objectivesmetabolic syndrome is a precursor o...  \n",
       "340  [purposethe purpose of this study was to deriv...  \n",
       "47   [green fluorescent protein ( gfp ) and gfp - l...  \n",
       "67   [purposeto report a case of inadvertent anteri...  \n",
       "479  [techniques for isolation and culture of fetal...  \n",
       "..                                                 ...  \n",
       "255  [the contamination of seafood by bacteria of f...  \n",
       "72   [introduction : the objective of this study wa...  \n",
       "396  [hyperostosis frontalis interna is an unexplai...  \n",
       "235  [seasonal., and pandemic influenza outbreaks r...  \n",
       "37   [objective., to examine risk factors for false...  \n",
       "\n",
       "[500 rows x 3 columns]"
      ]
     },
     "execution_count": 285,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.rename(columns={\n",
    "    'original': 'query',\n",
    "    'negatives': 'neg',\n",
    "    'sentences': 'pos',\n",
    "})\n",
    "# type(df['neg'][0])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"./data/custom.jsonl\"\n",
    "# df.to_json(\"./data/custom.jsonl\")\n",
    "df.to_json(path_or_buf=path, orient='records', lines=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['purposethe purpose of this study was to derive parameters that predict which high - energy blunt trauma patients should undergo computed tomography ( ct ) for detection of chest injury.methodsthis observational study prospectively included consecutive patients ( 16  years old ) who underwent multidetector ct of the chest after a high - energy mechanism of blunt trauma in one trauma centre.resultswe included 1,047 patients ( median age , 37 ; 70% male ) , of whom 508 had chest injuries identified by ct.',\n",
       " 'using logistic regression , we identified nine predictors of chest injury presence on ct ( age 55  years , abnormal chest physical examination , altered sensorium , abnormal thoracic spine physical examination , abnormal chest conventional radiography ( cr ) , abnormal thoracic spine cr , abnormal pelvic cr or abdominal ultrasound , base excess <.',\n",
       " '3  mmol / l and haemoglobin.',\n",
       " '< 6  mmol / l ).',\n",
       " 'of 855 patients with 1.',\n",
       " 'positive predictors , 484 had injury on ct ( 95% of all 508 patients with injury ).',\n",
       " 'of all 192 patients with no positive predictor , 24 ( 13% ).',\n",
       " 'had chest injury , of whom 4 ( 2% ) had injuries that were considered clinically relevant.conclusionomission of ct in patients without any positive predictor could reduce imaging frequency by 18% , while most clinically relevant chest injuries remain adequately detected.',\n",
       " 'green fluorescent protein ( gfp ) and gfp - like fluorescent proteins owe their photophysical properties to an autocatalytically formed intrinsic chromophore.',\n",
       " 'according to quantum mechanical calculations ,.',\n",
       " 'the excited state of chromophore model systems has significant dihedral freedom , which may lead to fluorescence quenching intersystem crossing.',\n",
       " 'molecular dynamics simulations with freely rotating chromophoric dihedrals were performed on green , yellow , and blue fluorescent proteins in order to model the dihedral freedom available to the chromophore in the excited state.',\n",
       " 'most current theories suggest that a restriction in the rotational freedom of the fluorescent protein chromophore will lead to an increase in fluorescence brightness and/or quantum yield.',\n",
       " 'according to our calculations , the dihedral freedom of the systems studied ( bfp > a5 > yfp > gfp ) increases in the inverse order to the quantum yield.',\n",
       " 'in all simulations ,.',\n",
       " 'the chromophore undergoes a negatively correlated hula twist ( also known as a bottom hula twist mechanism ).']"
      ]
     },
     "execution_count": 291,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df['neg'].iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "research",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
