{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9d386407",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "import os, sys\n",
    "import fitz\n",
    "import re\n",
    "import json\n",
    "from datetime import datetime\n",
    "from typing import Optional, List, Callable, Any, Tuple, Dict, Self, Union, TypedDict\n",
    "from abc import abstractmethod, ABC\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import copy\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "import pickle\n",
    "from tqdm.autonotebook import tqdm\n",
    "import itertools\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "sys.path.append(\"../\")\n",
    "\n",
    "load_dotenv(dotenv_path=\"../.env\")\n",
    "nltk.download('stopwords')\n",
    "\n",
    "random.seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1a17defe",
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataset.dataset_utils.reader import ADIQDataset\n",
    "from dataset.dataset_utils.question import Question\n",
    "\n",
    "\n",
    "ds = ADIQDataset(\"datasets/simpleV3.1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "94d5898d",
   "metadata": {},
   "outputs": [],
   "source": [
    "_template = \"\"\"\n",
    "## Asset Description:\n",
    "{asset_type}: {asset_desc}\n",
    "\n",
    "## Conditions:\n",
    "{condition_str}\n",
    "\n",
    "## How long the conditions were met:\n",
    "{temporal_condition}\n",
    "\n",
    "{question_prompt}\n",
    "\"\"\"\n",
    "\n",
    "def question_templating(q:Question, desc_dict:Dict[str,str]) -> str:\n",
    "    asset_type = q.asset_type\n",
    "    asset_desc = desc_dict.get(q.asset_type, \"NONE\")\n",
    "    condition_str = \"\\n\".join(list(map(lambda x:\"- \"+x, q.condition_description)))\n",
    "    temporal_condition = q.temporal_condition[0] if len(q.temporal_condition)>0 else \"None\"\n",
    "    question_prompt = q.question_prompt\n",
    "    return _template.format(\n",
    "        asset_type = asset_type,\n",
    "        asset_desc = asset_desc,\n",
    "        condition_str = condition_str,\n",
    "        temporal_condition = temporal_condition,\n",
    "        question_prompt = question_prompt\n",
    "        )\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def question_formatting(q: Question) -> dict[str,Any]:\n",
    "    #setting up prompts configs\n",
    "    q.question_first = True\n",
    "    q.text_type = \"choice\"\n",
    "    q.question = question_templating(q, ds.asset_descriptions)\n",
    "\n",
    "\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "be3c6546",
   "metadata": {},
   "outputs": [],
   "source": [
    "for q in ds.questions:\n",
    "    question_formatting(q)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3a9ed1c2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overridding Dataset\n"
     ]
    }
   ],
   "source": [
    "ds.save(\n",
    "    \"simpleV3.1\",\n",
    "    'datasets'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d595ed9a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
