{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import random\n",
    "import json\n",
    "random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = []\n",
    "with open('data/dialogue_data.json', 'r', encoding=\"utf-8\") as f:\n",
    "    for line in f:\n",
    "        data.append(json.loads(line))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000,\n",
       " {'knowledge': 'The Secret Life of Bees has genre Teen drama. Teen drama has examples: A Walk to RememberA Walk to Remember is produced by Hunt LowryHunt Lowry produced A Walk to Remember. A Walk to Remember is a/an Film',\n",
       "  'dialogue_history': '[Human]: Could you recommend any books like The Secret Life of Bees? [Assistant]: Yes, its a great teen book, have you heard about A walk to remember ? It is also a great teen drama [Human]: I think I have.. is that one with Hunt Lowry? ',\n",
       "  'right_response': 'yes and he also produced White Oleander and it was released in 1999',\n",
       "  'hallucinated_response': \"No, I don't think Hunt Lowry was involved in A Walk to Remember. However, he produced the movie adaptation of the popular novel, White Oleander, which was released in 1999.\"})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data), data[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "20000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_choices = []\n",
    "for d in data:\n",
    "    all_choices.append(d['right_response'])\n",
    "    all_choices.append(d['hallucinated_response'])\n",
    "len(all_choices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data = []\n",
    "for i in range(len(data)):\n",
    "    ex = {}\n",
    "    ex['source'] = 'Halu-OpenDialKG'\n",
    "    ex['task'] = 'dialogue response selection'\n",
    "    row = data[i]\n",
    "    ex['knowledge'] = row['knowledge']\n",
    "    ex['context'] = row['dialogue_history']\n",
    "    ex['question'] = \"Which of the following responses is the most suitable one for the given dialogue?\"\n",
    "    ex['choices'] = {}\n",
    "\n",
    "    chs = [row['right_response']]\n",
    "    if row['right_response'] != row['hallucinated_response']:\n",
    "        chs.append(row['hallucinated_response'])\n",
    "\n",
    "    assert len(chs) == 2\n",
    "\n",
    "    while len(chs) < 4:\n",
    "        a = random.sample(all_choices, 1)[0]\n",
    "        if a not in chs:\n",
    "            chs.append(a)\n",
    "\n",
    "    random.shuffle(chs)\n",
    "\n",
    "    idx = chs.index(row['right_response'])\n",
    "\n",
    "    chs.append(\"I don't know\")\n",
    "    chs.append(\"None of the above\")\n",
    "    letters = [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\"]\n",
    "    for j in range(len(letters)):\n",
    "        ex['choices'][letters[j]] = chs[j]\n",
    "    ex['answer'] = letters[idx]\n",
    "\n",
    "    new_data.append(ex) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(new_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "for idx, dd in enumerate(new_data):\n",
    "    dd['id'] = idx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"data/halu_dialogue.json\", 'w') as f:\n",
    "    json.dump(new_data, f, indent=2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
