{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "!pip install -q git+https://github.com/huggingface/transformers.git\n",
    "!pip install -q git+https://github.com/huggingface/peft.git\n",
    "!pip install -q git+https://github.com/huggingface/accelerate.git@main\n",
    "!pip install huggingface_hub\n",
    "!pip install bitsandbytes\n",
    "!pip install SentencePiece"
   ],
   "id": "aa871df20492d4"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
   ],
   "id": "e5082a87b30ec827"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from huggingface_hub import notebook_login\n",
    "import torch\n",
    "\n",
    "notebook_login()"
   ],
   "id": "2567767f3e17b348"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from peft import PeftModel\n",
    "from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, BitsAndBytesConfig\n",
    "\n",
    "model_name = \"decapoda-research/llama-7b-hf\"\n",
    "tokenizer = LlamaTokenizer.from_pretrained(model_name)\n",
    "model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True),\n",
    "                                         device_map=\"auto\", use_auth_token=True)"
   ],
   "id": "b2afd79ccc02490c"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%%time\n",
    "model = PeftModel.from_pretrained(model, \"tloen/alpaca-lora-7b\", adapter_name=\"eng_alpaca\")"
   ],
   "id": "9d13d7980c25938c"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%%time\n",
    "model.load_adapter(\"22h/cabrita-lora-v0-1\", adapter_name=\"portuguese_alpaca\")"
   ],
   "id": "667bba26365e9e63"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "model",
   "id": "d3e26549b4b1d0db"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "model.to(\"cuda\")",
   "id": "b2acb6469c1547fb"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import torch\n",
    "\n",
    "device = \"cuda\"\n",
    "\n",
    "\n",
    "def generate_prompt(instruction, input=None):\n",
    "    if input:\n",
    "        return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
    "### Instruction:\n",
    "{instruction}\n",
    "### Input:\n",
    "{input}\n",
    "### Response:\"\"\"\n",
    "    else:\n",
    "        return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
    "### Instruction:\n",
    "{instruction}\n",
    "### Response:\"\"\"\n",
    "\n",
    "\n",
    "def evaluate(\n",
    "        instruction,\n",
    "        input=None,\n",
    "        temperature=0.1,\n",
    "        top_p=0.75,\n",
    "        top_k=40,\n",
    "        num_beams=4,\n",
    "        max_new_tokens=256,\n",
    "        **kwargs,\n",
    "):\n",
    "    prompt = generate_prompt(instruction, input)\n",
    "    inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
    "    input_ids = inputs[\"input_ids\"].to(device)\n",
    "    generation_config = GenerationConfig(\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        no_repeat_ngram_size=3,\n",
    "        **kwargs,\n",
    "    )\n",
    "\n",
    "    with torch.no_grad():\n",
    "        generation_output = model.generate(\n",
    "            input_ids=input_ids,\n",
    "            generation_config=generation_config,\n",
    "            return_dict_in_generate=True,\n",
    "            output_scores=True,\n",
    "            max_new_tokens=max_new_tokens,\n",
    "        )\n",
    "    s = generation_output.sequences[0]\n",
    "    output = tokenizer.decode(s)\n",
    "    return output.split(\"### Response:\")[1].strip()"
   ],
   "id": "b68121fc1a85419d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%%time\n",
    "model.set_adapter(\"eng_alpaca\")"
   ],
   "id": "6dafc29e384d8d8c"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "instruction = \"Tell me about alpacas.\"\n",
    "\n",
    "print(evaluate(instruction))"
   ],
   "id": "aee3514552be300b"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%%time\n",
    "model.set_adapter(\"portuguese_alpaca\")"
   ],
   "id": "8e39b8f793dd87c0"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "instruction = \"Invente uma desculpa criativa pra dizer que não preciso ir à festa.\"\n",
    "\n",
    "print(evaluate(instruction))"
   ],
   "id": "5f03f7628bf4485"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "with model.disable_adapter():\n",
    "    instruction = \"Invente uma desculpa criativa pra dizer que não preciso ir à festa.\"\n",
    "\n",
    "    print(evaluate(instruction))"
   ],
   "id": "8d7efb47c388c635"
  }
 ],
 "metadata": {},
 "nbformat": 5,
 "nbformat_minor": 9
}
