{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6958a441",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "from lm_polygraph.utils.model import WhiteboxModel, BlackboxModel\n",
    "from lm_polygraph.utils.manager import estimate_uncertainty\n",
    "from lm_polygraph.estimators import MaximumTokenProbability, LexicalSimilarity, SemanticEntropy, PointwiseMutualInformation, EigValLaplacian"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d3dc0e6-804f-490e-9b77-4f5b3cb0ad64",
   "metadata": {},
   "source": [
    "### Initialize model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e7a7afe",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_model = AutoModelForCausalLM.from_pretrained(\n",
    "    'bigscience/bloomz-560m',\n",
    "    device_map='cpu',\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-560m')\n",
    "\n",
    "model = WhiteboxModel(base_model, tokenizer)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad18648a-b1c7-4089-832e-84e17be8b203",
   "metadata": {},
   "source": [
    "### Token level UE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "247f5d7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator = MaximumTokenProbability()\n",
    "estimate_uncertainty(model, estimator, input_text='Who is George Bush?')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4043671a-939f-421b-b06b-24abf557fdc9",
   "metadata": {},
   "source": [
    "### Sequence level UE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8292b97e",
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator = LexicalSimilarity('rougeL')\n",
    "estimate_uncertainty(model, estimator, input_text='Who is George Bush?')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a906db0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator = SemanticEntropy()\n",
    "estimate_uncertainty(model, estimator, input_text='Who is George Bush?')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90f3c0ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "estimator = PointwiseMutualInformation()\n",
    "estimate_uncertainty(model, estimator, input_text='Once upon a time there was a little girl who liked to')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc03fd15",
   "metadata": {},
   "outputs": [],
   "source": [
    "ue_method = LexicalSimilarity()\n",
    "input_text = \"Who is George Bush?\"\n",
    "estimate_uncertainty(model, ue_method, input_text=input_text)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "20c7940d-9f83-4872-a0e3-d2e9a83d8a9e",
   "metadata": {},
   "source": [
    "### BlackBox UE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fb84386",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = BlackboxModel(\n",
    "    'YOUR_OPENAI_TOKEN',\n",
    "    'gpt-3.5-turbo'\n",
    ")\n",
    "estimator = EigValLaplacian(verbose=True)\n",
    "estimate_uncertainty(model, estimator, input_text='When did Albert Einstein die?')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5b63635",
   "metadata": {},
   "outputs": [],
   "source": [
    "API_TOKEN = 'YOUR_API_TOKEN'\n",
    "# for example let's take google/t5-small-ssm-nq model\n",
    "MODEL_ID = 'google/t5-large-ssm-nqo'\n",
    "\n",
    "model = BlackboxModel.from_huggingface(hf_api_token=API_TOKEN, hf_model_id=MODEL_ID, openai_api_key = None, openai_model_path = None)\n",
    "ue_method = LexicalSimilarity()\n",
    "input_text = \"Who is George Bush?\"\n",
    "estimate_uncertainty(model, ue_method, input_text=input_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10ec1991",
   "metadata": {},
   "outputs": [],
   "source": [
    "# for example let's take bigscience/bloomz-560m model\n",
    "MODEL_ID = 'bigscience/bloomz-560m'\n",
    "\n",
    "model = BlackboxModel.from_huggingface(hf_api_token=API_TOKEN, hf_model_id=MODEL_ID, openai_api_key = None, openai_model_path = None)\n",
    "ue_method = LexicalSimilarity()\n",
    "input_text = \"Who is George Bush?\"\n",
    "estimate_uncertainty(model, ue_method, input_text=input_text)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
