{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b5465947",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from lm_polygraph.utils.model import WhiteboxModel\n",
    "from lm_polygraph.estimators import MaximumClaimProbability, ClaimConditionedProbabilityClaim\n",
    "from lm_polygraph.stat_calculators import *\n",
    "from lm_polygraph.utils.openai_chat import OpenAIChat\n",
    "from lm_polygraph.utils.deberta import Deberta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3b7ae864",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = WhiteboxModel.from_pretrained(\"bigscience/bloomz-560m\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "802f4378",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
     ]
    }
   ],
   "source": [
    "texts = [\"Tell me a bio of Albert Einstein.\"]\n",
    "stat = {}\n",
    "\n",
    "os.environ[\"OPENAI_KEY\"] = \"MY_OPENAI_TOKEN\"\n",
    "\n",
    "for calculator in [\n",
    "    GreedyProbsCalculator(),\n",
    "    ClaimsExtractor(OpenAIChat(\"gpt-4\")),\n",
    "]:\n",
    "    stat.update(calculator(stat, texts, model))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a99e12fa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Output:  Albert Einstein (born August 1, 1905 in Berlin, Germany) was a German physicist and mathematician. He was the first person to observe the existence of gravity.\n",
      "\n",
      "claim: Albert Einstein was born on August 1, 1905.\n",
      "aligned tokens: [0, 1, 3, 4, 5, 7]\n",
      "\n",
      "claim: Albert Einstein was born in Berlin, Germany.\n",
      "aligned tokens: [0, 1, 3, 9, 11]\n",
      "\n",
      "claim: Albert Einstein was a German physicist.\n",
      "aligned tokens: [0, 1, 13, 14, 15, 16, 17]\n",
      "\n",
      "claim: Albert Einstein was a mathematician.\n",
      "aligned tokens: [0, 1, 13, 14, 19, 20]\n",
      "\n",
      "claim: He was the first person to observe something.\n",
      "aligned tokens: [22, 23, 24, 25, 26, 27, 28]\n",
      "\n",
      "claim: What he observed was the existence of gravity.\n",
      "aligned tokens: [22, 28, 29, 30, 31, 32]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Output:\", stat[\"greedy_texts\"][0])\n",
    "print()\n",
    "for claim in stat[\"claims\"][0]:\n",
    "    print(\"claim:\", claim.claim_text)\n",
    "    print(\"aligned tokens:\", claim.aligned_tokens)\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6750525d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.063859 , 3.237115 , 2.468878 , 3.382793 , 7.7747087, 7.9294844],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_prob = MaximumClaimProbability()\n",
    "max_prob(stat)  # Uncertainty for each claim, the higher, the less certain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "42d9982a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']\n",
      "- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([-0.00411375, -0.40427966, -0.20528004, -0.12898072, -0.56441526,\n",
       "       -0.41163102])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for calculator in [\n",
    "    GreedyAlternativesNLICalculator(Deberta())\n",
    "]:\n",
    "    stat.update(calculator(stat, texts, model))\n",
    "\n",
    "ccp = ClaimConditionedProbabilityClaim()\n",
    "ccp(stat)  # Uncertainty for each claim, the higher, the less certain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11d1a0d7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:.mlspace-calibration]",
   "language": "python",
   "name": "conda-env-.mlspace-calibration-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
