{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d0e5dcc-31ba-465c-8648-bcf64a99ee26",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import json\n",
    "import os\n",
    "import openai\n",
    "import jsonlines\n",
    "from tqdm import tqdm\n",
    "import time\n",
    "import argparse\n",
    "\n",
    "model_name = \"gpt-4o-2024-08-06\" # \n",
    "openai.api_key = ''\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f86df3b-223b-4b91-9bf4-5cc3cc1c3d01",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"../../nlp_data/kisti/(분류) 국내 논문 전문 텍스트 데이터셋\"\n",
    "domains = os.listdir(path)\n",
    "domains[:5], len(domains)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f19278b7-8d25-4dd9-ad22-f40fa252e33c",
   "metadata": {},
   "outputs": [],
   "source": [
    "domain2papers = {}\n",
    "for domain in domains:\n",
    "    years = os.listdir(path + \"/\" + domain)\n",
    "    paper_paths = []\n",
    "    for year in years:\n",
    "        papers = os.listdir(path + \"/\" + domain + \"/\" + year)\n",
    "        paper_paths += papers\n",
    "        \n",
    "    domain2papers[domain] = paper_paths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "69154c83-c524-4d48-951d-38bef2f66449",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "domain2cnt = {domain:len(domain2papers[domain]) for domain in domains}\n",
    "domain2cnt = dict(sorted(domain2cnt.items(), key=lambda x:-x[1]))\n",
    "list(domain2cnt.items())[:100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92f4406e-bfdd-4f9d-b28d-f31f240d3fd9",
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(list(domain2cnt.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0edf3fff-157a-476e-8495-c6072fd8945f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def count_korean_and_english(text):\n",
    "    korean_count = 0\n",
    "    english_count = 0\n",
    "    \n",
    "    for char in text:\n",
    "        if '\\uAC00' <= char <= '\\uD7AF':\n",
    "            korean_count += 1\n",
    "        elif ('\\u0041' <= char <= '\\u005A') or ('\\u0061' <= char <= '\\u007A'):\n",
    "            english_count += 1\n",
    "    \n",
    "    return korean_count, english_count\n",
    "\n",
    "\n",
    "def count_korean_and_english_path(path):\n",
    "    f = open(path)\n",
    "    body = json.load(f,)['body_text']\n",
    "    total_korean_count = 0\n",
    "    total_english_count = 0\n",
    "    for section in body:\n",
    "        # if 'section' in section:\n",
    "        #     korean_count += len(section['section'])\n",
    "        if 'text' in section:\n",
    "            for text in section['text']:\n",
    "                korean_count, english_count = count_korean_and_english(text)\n",
    "                total_korean_count += korean_count\n",
    "                total_english_count += english_count\n",
    "    return total_korean_count, total_english_count\n",
    "\n",
    "                # korean_count += len(text)\n",
    "total_korean_count, total_english_count = count_korean_and_english_path(\"../../nlp_data/kisti/(분류) 국내 논문 전문 텍스트 데이터셋/재무관리연구/2009/JAKO200933359739803.json\")\n",
    "total_korean_count, total_english_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a6177fa-2abb-4304-b1a6-a26634aed26a",
   "metadata": {},
   "outputs": [],
   "source": [
    "paper2lang_cnt = {}\n",
    "for domain in domains:\n",
    "    years = os.listdir(path + \"/\" + domain)\n",
    "    en_paper_paths = []\n",
    "    for year in years:\n",
    "        papers = os.listdir(path + \"/\" + domain + \"/\" + year)\n",
    "        for p_path in papers:\n",
    "            f_path = path + \"/\" + domain + \"/\" + year + \"/\" + p_path\n",
    "            total_korean_count, total_english_count = count_korean_and_english_path(f_path)\n",
    "            paper2lang_cnt[f_path] = (total_korean_count, total_english_count)\n",
    "      "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d1cea509-b5bb-47c1-ae5b-f9dc312ee51e",
   "metadata": {},
   "outputs": [],
   "source": [
    "en_papers = {}\n",
    "for f_path, cnts in paper2lang_cnt.items():\n",
    "    total_korean_count, total_english_count = cnts\n",
    "    if total_korean_count+total_english_count>0 and total_english_count/(total_korean_count+total_english_count)> 0.99:\n",
    "        en_papers[f_path] = cnts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23d8ea5f-2e01-4dda-9f79-3d1a563cbacc",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(en_papers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "809fec6d-33a5-4c24-9e54-dc9386b47c07",
   "metadata": {},
   "outputs": [],
   "source": [
    "list(paper2lang_cnt.keys())[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ecec6845-dd5e-46bf-a1a5-83cd1cd9f127",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "torch.save(en_papers, \"../../nlp_data/kisti/db_files/en_paperpath2lang_cnt\")\n",
    "torch.save(paper2lang_cnt, \"../../nlp_data/kisti/db_files/paperpath2lang_cnt\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a82dcaf1-0508-4455-80e4-762f46e04cb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "paper2lang_cnt['../../nlp_data/kisti/(분류) 국내 논문 전문 텍스트 데이터셋/천문학논총/2014/JAKO201428854482770.json']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2752b0b3-8fed-47d1-b945-e6727ad8f849",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "domain_en_papers = [p_path.split('/')[6] for p_path in list(en_papers.keys())]\n",
    "domain_en_papers = Counter(domain_en_papers)\n",
    "domain_en_papers.most_common(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d095ac98-10a2-4e11-b9f7-3ce2345c8fe4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"아세아태평양축산학회지\" in en_p_path] # 2576\n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"한국발생생물학회지-발생과생식\" in en_p_path] # 405\n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"조선자연과학논문집\" in en_p_path] # 347\n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"천문학논총\" in en_p_path] # 347\n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"목재공학\" in en_p_path] # 396\n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"패션비즈니스\" in en_p_path] # \n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"대한위암학회지\" in en_p_path] # 409\n",
    "# certain_domain_en_ps = [en_p_path for en_p_path in list(en_papers.keys()) if \"한국전문물리치료학회지\" in en_p_path] # 280\n",
    "\n",
    "len(certain_domain_en_ps), certain_domain_en_ps[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e746fdbf-f637-4af5-99d7-d7172fde049e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def extract_content_path(path):\n",
    "    f = open(path) # certain_domain_en_ps[5]\n",
    "    # print(path)\n",
    "    body = json.load(f,)['body_text']\n",
    "    all_content = \"\"\n",
    "    # print(body)\n",
    "    prev_section_name = \"\"\n",
    "    for section in body:\n",
    "\n",
    "        if 'text' in section:\n",
    "            section_name = section['section'] if 'section' in section else \"\"\n",
    "            for text in section['text']:\n",
    "                if prev_section_name:\n",
    "                    # print(prev_section_name+\": \"+section_name+\": \"+ text)\n",
    "                    all_content += prev_section_name+\": \"+section_name+\": \"+ text + \"\\n\"\n",
    "                else:\n",
    "                    # print(section_name+\": \"+ text)\n",
    "                    all_content += section_name+\": \"+ text + \" \"\n",
    "            prev_section_name = \"\"\n",
    "        else:\n",
    "            prev_section_name = section['section']\n",
    "    # print()\n",
    "    # len(all_content)\n",
    "    return all_content\n",
    "\n",
    "long_enough_ps = [f_path for f_path in certain_domain_en_ps if len(extract_content_path(f_path))>15000]\n",
    "len(long_enough_ps)\n",
    "\n",
    "import random\n",
    "random.seed(0)\n",
    "sampled_papers = random.sample(long_enough_ps, 900)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c03bb153-0141-4c05-a910-ff67a71da4a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "template = \\\n",
    "\"\"\"Create an information-seeking conversation between two annotators: a questioner and an answerer. We give you multiple sections of an academic paper as seed topics for the information-seeking questions. \n",
    "Assume that the questioner has access only to the main topic of the given content, while the answerer can access the full text. \n",
    "Allow topic switching by enabling the answerer to refer to sections from different sections of papers.\n",
    "\n",
    "The annotators are provided with the following guidelines.\n",
    "Guidelines for the questioner: \n",
    "    • The first question should be unambiguous and clear about the main topic of academic paper. The questioner, only knowing the main topic, cannot ask directly about the study's focus. Thus, do not ask like \"What is the primary focus of the study?\" as 1st question.\n",
    "    • The follow-up questions are contextualized and always dependent on the conversation history (especially, last answerer's repond) so that question itself is hard to understand. \n",
    "    • Avoid using same words as in section titles of the document. E.g. if the section title is “Awards”, a plausible question can be “What accolades did she receive for her work?”. • The conversation should involve multiple documents (topics).\n",
    "    \n",
    "Guidelines for the answerer: \n",
    "    • Based on the question, identify the relevant document and section. \n",
    "    • The answer should be based on the contents of the identified document. \n",
    "    • The rationale should be a sub-string of content such that it justifies the answer and should be recorded below the answers.\n",
    "    • The answer should be a sub-string in rationale whenever possible. However, answers should be edited to fit the conversational context (adding yes, no), perform reasoning (e.g. counting) etc. \n",
    "    • Personal opinions should never be included.\n",
    "\n",
    "- Example of an information-seeking conversation given the content:\n",
    "-- content: \n",
    "The Byzantine Empire, also referred to as the Eastern Roman Empire, was the continuation of the Roman Empire centered in Constantinople during Late Antiquity and the Middle Ages. The eastern half of the Empire survived the conditions that caused the fall of the West in the 5th century AD, and continued to exist until the fall of Constantinople to the Ottoman Empire in 1453. During most of its existence, the empire remained the most powerful economic, cultural, and military force in the Mediterranean world. The term \"Byzantine Empire\" was only coined following the empire's demise; its citizens referred to the polity as the \"Roman Empire\" and to themselves as \"Romans\".[a] Due to the imperial seat's move from Rome to Byzantium, the adoption of state Christianity, and the predominance of Greek instead of Latin, modern historians continue to make a distinction between the earlier Roman Empire and the later Byzantine Empire.\n",
    "The empire was largely dismantled in 1204, following the Sack of Constantinople by Latin armies at the end of the Fourth Crusade; its former territories were then divided into competing Greek rump states and Latin realms. Despite the eventual recovery of Constantinople in 1261, the reconstituted empire would wield only regional power during its final two centuries of existence. Its remaining territories were progressively annexed by the Ottomans in perennial wars fought throughout the 14th and 15th centuries. The fall of Constantinople to the Ottomans in 1453 ultimately brought the empire to an end. Many refugees who had fled the city after its capture settled in Italy and throughout Europe, helping to ignite the Renaissance. The fall of Constantinople is sometimes used to mark the dividing line between the Middle Ages and the early modern period.\n",
    "The situation became worse for Byzantium during the civil wars after Andronikos III died. A six-year-long civil war devastated the empire, allowing the Serbian ruler Stefan Dušan to overrun most of the empire's remaining territory and establish a Serbian Empire. In 1354, an earthquake at Gallipoli devastated the fort, allowing the Ottomans (who were hired as mercenaries during the civil war by John VI Kantakouzenos) to establish themselves in Europe.[115][116] By the time the Byzantine civil wars had ended, the Ottomans had defeated the Serbians and subjugated them as vassals. Following the Battle of Kosovo, much of the Balkans became dominated by the Ottomans.[117]\n",
    "Constantinople by this stage was underpopulated and dilapidated. The population of the city had collapsed so severely that it was now little more than a cluster of villages separated by fields. On 2 April 1453, Sultan Mehmed's army of 80,000 men and large numbers of irregulars laid siege to the city.[118] Despite a desperate last-ditch defence of the city by the massively outnumbered Christian forces (c. 7,000 men, 2,000 of whom were foreign),[119] Constantinople finally fell to the Ottomans after a two-month siege on 29 May 1453. The final Byzantine emperor, Constantine XI Palaiologos, was last seen casting off his imperial regalia and throwing himself into hand-to-hand combat after the walls of the city were taken.[120]\n",
    "Mehmed continued his conquests in Anatolia with its reunification and in Southeast Europe as far west as Bosnia. At home, he made many political and social reforms. He encouraged the arts and sciences, and by the end of his reign, his rebuilding program had changed Constantinople into a thriving imperial capital. He is considered a hero in modern-day Turkey and parts of the wider Muslim world. Among other things, Istanbul's Fatih district, Fatih Sultan Mehmet Bridge and Fatih Mosque are named after him.\n",
    "Anatolia (Turkish: Anadolu), also known as Asia Minor,[a] is a large peninsula or a region in Turkey, constituting most of its contemporary territory. Geographically, the Anatolian region is bounded by the Mediterranean Sea to the south, the Aegean Sea to the west, the Turkish Straits to the north-west, and the Black Sea to the north. The eastern and southeastern boundary is either the southeastern and eastern borders of Turkey,[1] or an imprecise line from the Black Sea to Gulf of Iskenderun.[2] Topographically, the Sea of Marmara connects the Black Sea with the Aegean Sea through the Bosporus strait and the Dardanelles strait, and separates Anatolia from Thrace in the Balkan peninsula of Southeastern Europe.\n",
    "The Akkadian Empire (/əˈkeɪdiən/)[2] was the first known ancient empire of Mesopotamia, succeeding the long-lived civilization of Sumer. Centered on the city of Akkad (/ˈækæd/)[3] and its surrounding region, the empire united Akkadian and Sumerian speakers under one rule and exercised significant influence across Mesopotamia, the Levant, and Anatolia, sending military expeditions as far south as Dilmun and Magan (modern United Arab Emirates, Saudi Arabia, Bahrain, Qatar and Oman) in the Arabian Peninsula.[4][page needed]\n",
    "\n",
    "-- Information-seeking conversation\n",
    "Q1: when was the byzantine empire born what was it originally called?\n",
    "A1: 5th century AD and was called Eastern Roman Empire, or Byzantium\n",
    "rationale: The Byzantine Empire, also referred to as the Eastern Roman Empire, was the continuation of the Roman Empire centered in Constantinople during Late Antiquity and the Middle Ages. The eastern half of the Empire survived the conditions that caused the fall of the West in the 5th century AD, and continued to exist until the fall of Constantinople to the Ottoman Empire in 1453.\n",
    "                                                                                                   \n",
    "Q2: and when did it fall?\n",
    "A2: 1453\n",
    "rationale: The fall of Constantinople to the Ottomans in 1453 ultimately brought the empire to an end. \n",
    "                                                                                                   \n",
    "Q3: which battle or event marked the fall of this empire?\n",
    "A3: A six-year-long civil war followed by attack from Sultan Mehmed’s army\n",
    "rationale: A six-year-long civil war devastated the empire;  On 2 April 1453, Sultan Mehmed's army of 80,000 men and large numbers of irregulars laid siege to the city.\n",
    "                                                                                                   \n",
    "Q4: did he conquer other territories as well?\n",
    "A4: Yes. Anatolia and in Southeast Europe as far west as Bosnia\n",
    "rationale: Mehmed continued his conquests in Anatolia with its reunification and in Southeast Europe as far west as Bosnia\n",
    "                                                                                                   \n",
    "Q5: where is the first area located in present day terms?\n",
    "A5: Turkey\n",
    "rationale: Anatolia (Turkish: Anadolu), also known as Asia Minor,[a] is a large peninsula or a region in Turkey\n",
    "                                                                                                   \n",
    "Q6: who were the oldest known inhabitants of this region?\n",
    "A6: Mesopotamian-based Akkadian Empire\n",
    "rationale: The Akkadian Empire (/əˈkeɪdiən/)[2] was the first known ancient empire of Mesopotamia\n",
    "\n",
    "- sections of academic papers:\n",
    "content: \"\"\"\n",
    "                \n",
    "last_inst = \"\"\"Write an information-seeking conversation with roughly ten to fifteen QA turns based on the given sections of academic paper above, following the format of the above example provided. \n",
    "Except for the first question, remember that questions should always be contextualized and based on the conversation history. For example, to ensure the question is challenging to understand without the conversation history, it should use coreferences rather than \"proper nouns,\" such as their specific identifiers such as names or titles or country names.\n",
    "Do not use content from an \"example of an information-seeking conversation\" when making questions.\n",
    "\"\"\"\n",
    "                                                                                                   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0727c36-6aa4-4d4f-9730-0558f3c0a603",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def generate_conv_turns(path):\n",
    "    # sampled_papers = random.sample(long_enough_ps, 100)\n",
    "    all_content = extract_content_path(path) # sampled_papers[1]\n",
    "\n",
    "    prompt = f\"{template}\\n{all_content[:10000]}\\n\\n{last_inst}\"\n",
    "\n",
    "    # print(prompt)\n",
    "    # prompt = set_prompt(line, args)\n",
    "\n",
    "    messages = [\n",
    "        {\"role\": \"user\", \"content\": prompt}\n",
    "    ]\n",
    "\n",
    "    retries = 5\n",
    "    delay = 1\n",
    "    while retries > 0:\n",
    "        try:\n",
    "            response = openai.ChatCompletion.create(\n",
    "                model=model_name,\n",
    "                messages=messages,\n",
    "                # temperature=0.8, # deterministic decoding: 0.\n",
    "                # max_tokens=2560,\n",
    "                # top_p=0.8, # deterministic decoding: 1.\n",
    "                n=1, # number of output\n",
    "            )\n",
    "            break\n",
    "            # return response\n",
    "        except:\n",
    "            pass\n",
    "        retries -= 1\n",
    "        time.sleep(delay)\n",
    "        delay *= 2\n",
    "\n",
    "    # print(response[\"choices\"][0][\"message\"][\"content\"])\n",
    "    return response\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78d1ca61-82e9-4473-a6b8-e4467d126ec1",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "paperpath2conv = {}\n",
    "for sampled_path in sampled_papers:# [:1]: # [:2]:\n",
    "    paperpath2conv[sampled_path] = generate_conv_turns(sampled_path)\n",
    "    \n",
    "# paperpath2conv\n",
    "key = list(paperpath2conv.keys())[0]\n",
    "print(paperpath2conv[key][\"choices\"][0][\"message\"][\"content\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6bf25f30-0d9d-4ece-8be3-c8da6b408414",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "torch.save(paperpath2conv, \"../../nlp_data/kisti/db_files/paperpath2gen_conv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c41afe3e-638c-40c1-b963-dba058c73478",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b631d88-e17d-43c2-9245-5786551b8f28",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "82ecf48a-25fd-4ead-811d-f5f6086d251c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "904ca631-ce2c-4c4c-bcad-ee47334ae690",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "78bd00ba-9df4-48b8-a24d-3a2a61eddb9e",
   "metadata": {},
   "source": [
    "### post-processing conv: paraphrasing responses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bf87045-efbe-4482-9d90-e6cc268ec926",
   "metadata": {},
   "outputs": [],
   "source": [
    "gen_paperpath2As = torch.load(\"../../nlp_data/kisti/db_files/gen_paperpath2As\")\n",
    "gen_paperpath2As[list(gen_paperpath2As.keys())[0]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a574d42-4bcb-4771-b54b-c6068e7f34cf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "template = \"The following text is a response from an AI chatbot. Please retain the exact meaning of the subsequent sentence but rephrase it as much as possible, ensuring that the key terms remain unchanged.\"\n",
    "def paraphrase_response(response):\n",
    "    # sampled_papers = random.sample(long_enough_ps, 100)\n",
    "    # all_content = extract_content_path(path) # sampled_papers[1]\n",
    "\n",
    "    prompt = f\"{template}\\n{response}\"\n",
    "\n",
    "    # print(prompt)\n",
    "    # prompt = set_prompt(line, args)\n",
    "\n",
    "    messages = [\n",
    "        {\"role\": \"user\", \"content\": prompt}\n",
    "    ]\n",
    "\n",
    "    retries = 5\n",
    "    delay = 1\n",
    "    while retries > 0:\n",
    "        try:\n",
    "            response = openai.ChatCompletion.create(\n",
    "                model=model_name,\n",
    "                messages=messages,\n",
    "                # temperature=0.8, # deterministic decoding: 0.\n",
    "                # max_tokens=2560,\n",
    "                # top_p=0.8, # deterministic decoding: 1.\n",
    "                n=1, # number of output\n",
    "            )\n",
    "            break\n",
    "            # return response\n",
    "        except:\n",
    "            pass\n",
    "        retries -= 1\n",
    "        time.sleep(delay)\n",
    "        delay *= 2\n",
    "\n",
    "    # print(response[\"choices\"][0][\"message\"][\"content\"])\n",
    "    return response\n",
    "\n",
    "# response = 'Yes, the infrared thermographic (IRT) technique can be used effectively for non-invasive monitoring of body surface temperature in the field.'\n",
    "# paraphrase_response(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "608b8ee1-5062-4499-b6c0-f4e6731269de",
   "metadata": {},
   "outputs": [],
   "source": [
    "gen_paperpath2newAs = {}\n",
    "   \n",
    "paths = list(gen_paperpath2As.keys())\n",
    "for path in paths:\n",
    "    responses = gen_paperpath2As[path]\n",
    "    new_responses = []\n",
    "    for resp in responses:\n",
    "        new_resp = paraphrase_response(resp)\n",
    "        new_resp = new_resp[\"choices\"][0][\"message\"][\"content\"]\n",
    "        new_responses += [new_resp]\n",
    "    gen_paperpath2newAs[path] = new_responses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2817e688-1dc9-438e-8e00-a34d2dced13c",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(gen_paperpath2newAs, \"../../nlp_data/kisti/db_files/gen_paperpath2newAs\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llmcqr",
   "language": "python",
   "name": "llmcqr"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
