{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6d86b760",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to\n",
      "[nltk_data]     /Users/devinyasithdesilva/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "import fitz\n",
    "import sys\n",
    "import re\n",
    "import json\n",
    "from datetime import datetime\n",
    "from typing import Optional, List, Callable, Any, Tuple, Dict\n",
    "from abc import abstractmethod, ABC\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import copy\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "import pickle\n",
    "import itertools\n",
    "from dataclasses import dataclass, asdict\n",
    "from enum import Enum\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "sys.path.append(\"../\")\n",
    "\n",
    "load_dotenv(dotenv_path=\"../.env\")\n",
    "nltk.download('stopwords')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c76a2f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_json(loc:str) -> dict:\n",
    "    with open(loc,\"r\") as f0:\n",
    "        return json.load(f0)\n",
    "\n",
    "class QuestionDifficulty(Enum):\n",
    "    entry = \"Entry-Level\"\n",
    "    intermediary = \"Intermediate\"\n",
    "    advance = \"Advanced\"\n",
    "\n",
    "@dataclass\n",
    "class Question:\n",
    "    question_id:str\n",
    "    rule_id:int\n",
    "    date_detected:str\n",
    "    asset_type:str\n",
    "    condition_description:List[str]\n",
    "    question_prompt:str\n",
    "    options:List[str]\n",
    "    answer:str\n",
    "    answer_number:str\n",
    "    rationale:str\n",
    "    tags:List[str]\n",
    "    difficulty:QuestionDifficulty\n",
    "\n",
    "    def to_dict(self) -> dict[str, Any]:\n",
    "        attrs =  vars(self)\n",
    "        attrs[\"difficulty\"] = attrs[\"difficulty\"].value\n",
    "\n",
    "        return attrs\n",
    "    \n",
    "    @staticmethod\n",
    "    def from_dict(data:dict[str,Any]) -> Any:\n",
    "        return Question(**data)\n",
    "    \n",
    "\n",
    "    def to_prompt(self,template:str, asset_descriptions:Dict[str,str]) -> str:\n",
    "        return template.format(\n",
    "            asset_type = self.asset_type,\n",
    "            asset_description = asset_descriptions.get(self.asset_type, \"NONE\"),\n",
    "            conditions = \"\\n\".join(list(map(lambda x:\"- \"+x, self.condition_description))),\n",
    "            question_prompt = self.question_prompt,\n",
    "            options = \"\\n\".join(self.options)\n",
    "        )\n",
    "    \n",
    "def clean_response(response:Optional[Dict[str,Any]|List[Dict[str,Any]]]):\n",
    "    if not response:\n",
    "        raise RuntimeError(\"Response is null!\")\n",
    "    \n",
    "    if isinstance(response, list):\n",
    "        response = response[0]\n",
    "    \n",
    "    return response[\"results\"][0][\"generated_text\"]\n",
    "\n",
    "class ADIQDataset:\n",
    "    def __init__(self, loc:str) -> None:\n",
    "        self.loc = loc\n",
    "        self.__build_dataset__()\n",
    "\n",
    "    def __build_dataset__(self) -> None:\n",
    "        self.data = load_json(self.loc)\n",
    "\n",
    "        for k,v in self.data.items():\n",
    "            setattr(self,k,v)\n",
    "\n",
    "        self.questions = [Question.from_dict(x) for x in self.questions]\n",
    "        random.shuffle(self.questions)\n",
    "\n",
    "    def get_questions(self) -> List[Question]:\n",
    "        return self.questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d19124f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "question_template = \"\"\"\n",
    "## Asset Description:\n",
    "{asset_type}: {asset_description}\n",
    "\n",
    "## Conditions:\n",
    "{conditions}\n",
    "\n",
    "{question_prompt}\n",
    "{options}\n",
    "    \n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "aeb6bc8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = ADIQDataset(\"datasets/test_dataset_pipeline.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "fe3fc82f",
   "metadata": {},
   "outputs": [],
   "source": [
    "example1 = ds.questions[0].to_prompt(question_template, {k:v[\"desc\"] for k,v in ds.asset_descriptions.items()})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "9bd34b51",
   "metadata": {},
   "outputs": [],
   "source": [
    "ask_understanding_template = \"\"\"\n",
    "Given below is a question please fully read through the question and tell me whether you have all the information required to answer the question, if the answer is NO , \n",
    "give the list of terms you don't understand \n",
    "\n",
    "#Question.\n",
    "{question}\n",
    "\n",
    "Answer:\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2f05182",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No, I do not have all the information required to answer the question. Here are the terms and concepts I don't understand:\n",
      "\n",
      "1. \"Supply Air Temperature\" - It's not clear what this refers to in the context of the CRAC system.\n",
      "2. \"Temperature Setpoint\" - While I understand this is the desired temperature, it's not clear how it's used in the given conditions.\n",
      "3. \"Met for 3 Hours\" - The meaning of \"Met\" in this context is unclear.\n",
      "4. \"Clean the tubes\" - It's not specified what tubes this is referring to.\n",
      "5. \"Chilled water valve not actuating well\" - The role of the chilled water valve in the CRAC system is not explained.\n",
      "6. \"Too much equipment in the local area\" - The relevance of this to the CRAC system or the given conditions is not clear.\n",
      "\n",
      "To provide an accurate answer, I would need more context and explanation of these terms and how they relate to the CRAC system and the given conditions.\n"
     ]
    }
   ],
   "source": [
    "import models_utils.llm.watsonx\n",
    "    \n",
    "response = models_utils.llm.watsonx.get_completion_response(\n",
    "    ask_understanding_template.format(\n",
    "        question = example1\n",
    "        )\n",
    "        )\n",
    "response = clean_response(response)\n",
    "print(response)\n",
    "    \n",
    "\n",
    "#print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d311bf8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
