{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9d386407",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1470301/2542374916.py:15: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from tqdm.autonotebook import tqdm\n",
      "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "import os, sys\n",
    "import fitz\n",
    "import re\n",
    "import json\n",
    "from datetime import datetime\n",
    "from typing import Optional, List, Callable, Any, Tuple, Dict, Self, Union, TypedDict\n",
    "from abc import abstractmethod, ABC\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import copy\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "import pickle\n",
    "from tqdm.autonotebook import tqdm\n",
    "import itertools\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "sys.path.append(\"../\")\n",
    "\n",
    "load_dotenv(dotenv_path=\"../.env\")\n",
    "nltk.download('stopwords')\n",
    "\n",
    "random.seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a4ec5e0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "BENCH_TAG = \"basic_bench\"\n",
    "BENCH_ID = str(datetime.now().isoformat())\n",
    "SAVE_LOC = os.path.join(\"results\",BENCH_TAG, BENCH_ID)\n",
    "#os.makedirs(SAVE_LOC, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1a17defe",
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataset.dataset_utils.reader import ADIQDataset\n",
    "from dataset.dataset_utils.question import Question\n",
    "\n",
    "\n",
    "ds = ADIQDataset(\"../dataset/datasets/simplePertV3.1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "94d5898d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from bench_utils.question import MultipleChoiceQuestion\n",
    "from bench_utils.inference_calls import LLMConfiguration, ModelConfig, MODEL_MAP\n",
    "\n",
    "_template = \"\"\"\n",
    "## Asset Description:\n",
    "{asset_type}: {asset_desc}\n",
    "\n",
    "## Conditions:\n",
    "{condition_str}\n",
    "\n",
    "## How long the conditions were met:\n",
    "{temporal_condition}\n",
    "\n",
    "{question_prompt}\n",
    "\"\"\"\n",
    "\n",
    "def question_templating(q:Question, desc_dict:Dict[str,str]) -> str:\n",
    "    asset_type = q.asset_type\n",
    "    asset_desc = desc_dict.get(q.asset_type, \"NONE\")\n",
    "    condition_str = \"\\n\".join(list(map(lambda x:\"- \"+x, q.condition_description)))\n",
    "    temporal_condition = q.temporal_condition[0] if len(q.temporal_condition)>0 else \"None\"\n",
    "    question_prompt = q.question_prompt\n",
    "    return _template.format(\n",
    "        asset_type = asset_type,\n",
    "        asset_desc = asset_desc,\n",
    "        condition_str = condition_str,\n",
    "        temporal_condition = temporal_condition,\n",
    "        question_prompt = question_prompt\n",
    "        )\n",
    "\n",
    "\n",
    "models_to_test:List[ModelConfig] = [\n",
    "    #ModelConfig(**{\n",
    "    #    \"name\":'mistral-large',\n",
    "    #    \"identifier\" : 'mistralai/mistral-large',\n",
    "    #}),\n",
    "    #ModelConfig(**{\n",
    "    #    \"name\":'llama-3-3-70b-instruct',\n",
    "    #    \"identifier\" : 'llama-3-3-70b-instruct',\n",
    "    #}),\n",
    " #   ModelConfig(**{\n",
    " #       \"name\":'qwen2-5-72b-instruct',\n",
    " #       'identifier':'Qwen/Qwen2.5-72B-Instruct'\n",
    " #   }),\n",
    " #   ModelConfig(**{\n",
    " #       \"name\":\"llama-3-1-405b-instruct-fp8\",\n",
    " #       \"identifier\":\"llama-3-1-405b-instruct-fp8\"\n",
    " #   }),\n",
    " #   ModelConfig(**{\n",
    " #       \"name\":\"llama-3-1-8b-instruct\",\n",
    " #       \"identifier\":\"llama-3-1-8b-instruct\"\n",
    " #   }),\n",
    " #   ModelConfig(**{\n",
    " #       \"name\":\"microsoft-phi-4\",\n",
    " #       \"identifier\":\"microsoft-phi-4\"\n",
    " #   }),\n",
    " #   ModelConfig(**{\n",
    " #       \"name\":\"mixtral-8x22b-instruct-v01\",\n",
    " #       \"identifier\":\"mixtral-8x22b-instruct-v01\"\n",
    " #   })\n",
    "    #ModelConfig(**{\n",
    "    #    \"name\":'o1',\n",
    "    #    \"identifier\" : 'openai/o1',\n",
    "    #}),\n",
    "    ModelConfig(**{\n",
    "        \"name\":'deepseek-r1',\n",
    "        \"identifier\" : 'deepseek-ai/deepseek-r1',\n",
    "    })\n",
    "    ]\n",
    "\n",
    "config = {\n",
    "    \"model_to_test\": models_to_test\n",
    "}\n",
    "\n",
    "\n",
    "\n",
    "def ask_question_from_llm(q: Question, model:ModelConfig) -> dict[str,Any]:\n",
    "    #setting up prompts configs\n",
    "    q.question_first = True\n",
    "    q.text_type = \"choice\"\n",
    "    q.question = question_templating(q, ds.asset_descriptions)\n",
    "\n",
    "    mcq = MultipleChoiceQuestion()\n",
    "    mcq.load_dict(q.to_dict())\n",
    "\n",
    "    prompt = mcq.get_prompt()\n",
    "    client = LLMConfiguration(model)\n",
    "    response =  client.get_response(prompt)\n",
    "\n",
    "    a = {\n",
    "                \"id\": q.id,\n",
    "                \"prompt\": prompt,\n",
    "                \"question_text\": mcq.question,\n",
    "                \"options_text\": mcq.options,\n",
    "                \"true_answer\": mcq.correct,\n",
    "                \"model\": model.__dict__,\n",
    "               \"model_output\": response,\n",
    "               \"model_original_output\": response,\n",
    "            }\n",
    "    \n",
    "    return a\n",
    "    \n"
   ]
  },
  {
   "cell_type": "raw",
   "id": "d036d21f",
   "metadata": {
    "vscode": {
     "languageId": "raw"
    }
   },
   "source": [
    "import openai\n",
    "import os\n",
    "import requests\n",
    "\n",
    "\n",
    "\n",
    "client = openai.Client(\n",
    "                api_key=os.environ.get(\"RITS_RESTRICTED_API_KEY\"),\n",
    "                base_url=\"https://restricted-3scale-apicast-production.apps.rits.fmaas.res.ibm.com/deepseek-r1/v1\",\n",
    "                default_headers={\"RITS_API_KEY\": os.environ[\"RITS_RESTRICTED_API_KEY\"]},\n",
    "                timeout=120.0,\n",
    "                max_retries=1,\n",
    "            )"
   ]
  },
  {
   "cell_type": "raw",
   "id": "56511b67",
   "metadata": {
    "vscode": {
     "languageId": "raw"
    }
   },
   "source": [
    "response = client.chat.completions.create(\n",
    "                model=\"deepseek-ai/deepseek-r1\",\n",
    "                messages=[\n",
    "                    {\"role\":\"user\", \"content\":\"test model online. Jarvis Are you online?\"}\n",
    "                ],\n",
    "            )"
   ]
  },
  {
   "cell_type": "raw",
   "id": "dc04ec66",
   "metadata": {
    "vscode": {
     "languageId": "raw"
    }
   },
   "source": [
    "res = requests.get(\n",
    "    \"https://restricted-3scale-apicast-production.apps.rits.fmaas.res.ibm.com/deepseek-r1/v1/models\",\n",
    "    headers={\"RITS_API_KEY\": os.environ[\"RITS_RESTRICTED_API_KEY\"]},\n",
    "    )\n",
    "print(res.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "b3e48db3",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'models_to_test' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m l \u001b[38;5;129;01min\u001b[39;00m \u001b[43mmodels_to_test\u001b[49m:\n\u001b[32m      2\u001b[39m     \u001b[38;5;28mprint\u001b[39m(client.get_response(\u001b[33m\"\u001b[39m\u001b[33mtest model online. Jarvis Are you online?\u001b[39m\u001b[33m\"\u001b[39m))\n",
      "\u001b[31mNameError\u001b[39m: name 'models_to_test' is not defined"
     ]
    }
   ],
   "source": [
    "for l in models_to_test:\n",
    "    print(client.get_response(\"test model online. Jarvis Are you online?\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a11f8584",
   "metadata": {},
   "outputs": [],
   "source": [
    "from models_utils.utils.concurrency import concurrent_dict_execution\n",
    "from dataset.utils import file_handle\n",
    "\n",
    "def run_bench_for_llm(llm:ModelConfig, dataset:ADIQDataset, already:List[str], instant_save=False) -> dict[int, dict[str,Any]]:\n",
    "    print(\"Run Experiment for {llm}\".format(llm=llm))\n",
    "\n",
    "    param_dict = {v.id:[v, llm] for v in dataset.questions if str(v.id) not in already}\n",
    "\n",
    "    if param_dict:\n",
    "        model_data = MODEL_MAP.get(llm.name)\n",
    "        results = {k:v for k,v in concurrent_dict_execution(\n",
    "                ask_question_from_llm,\n",
    "                param_dict,\n",
    "                num_max_workers=model_data.get(\"num_workers\",4)\n",
    "            )}\n",
    "        return results\n",
    "    else:\n",
    "        return {}\n",
    "\n",
    "def run_llm_suit(ds:ADIQDataset, save_loc_data:str, llm_list=config['model_to_test'], instant_save=False):\n",
    "    os.makedirs(save_loc_data, exist_ok=True)\n",
    "    for l in llm_list:\n",
    "        if os.path.exists(os.path.join(SAVE_LOC, f'{l.name}.json')):\n",
    "            print(\"Experiment exist for {llm}\".format(llm=l.name))\n",
    "            \n",
    "            _already = file_handle.load_json(\n",
    "                os.path.join(SAVE_LOC, f'{l.name}.json'\n",
    "            ))\n",
    "\n",
    "            bench_part = run_bench_for_llm(\n",
    "                l,\n",
    "                ds,\n",
    "                list(_already[\"results\"].keys())\n",
    "            )\n",
    "\n",
    "            _already[\"results\"].update(bench_part)\n",
    "\n",
    "            file_handle.save_json(\n",
    "                _already,\n",
    "                os.path.join(save_loc_data, f'{l.name}.json')\n",
    "            )\n",
    "\n",
    "        else:\n",
    "\n",
    "            _temp = copy.deepcopy(l.__dict__)\n",
    "            _temp[\"results\"] = run_bench_for_llm(\n",
    "                l,\n",
    "                ds,\n",
    "                []\n",
    "            )\n",
    "\n",
    "            file_handle.save_json(\n",
    "                _temp,\n",
    "                os.path.join(save_loc_data, f'{l.name}.json')\n",
    "            )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c93b61a3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run Experiment for ModelConfig(name='deepseek-r1', identifier='deepseek-ai/deepseek-r1')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Completed::   0%|          | 32/6690 [07:47<39:47:36, 21.52s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error in 2872: Request timed out.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Completed::   1%|          | 36/6690 [08:45<26:33:40, 14.37s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error in 2360: Request timed out.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Completed::   1%|          | 37/6690 [09:11<32:42:30, 17.70s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error in 4853: Request timed out.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Completed::   1%|          | 37/6690 [09:20<27:59:45, 15.15s/it]\n"
     ]
    }
   ],
   "source": [
    "run_llm_suit(ds,\"test\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "884cf098",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
