{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# Fine-tuning [Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on [timdettmers/openassistant-guanaco](https://huggingface.co/datasets/timdettmers/openassistant-guanaco) Dataset using QDora (quantized Lora w/ use_dora=True) on T4 Free Colab GPU.",
   "id": "39fb1814655e748a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Install the libraries\n",
    "!pip install -q -U bitsandbytes\n",
    "!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
    "!pip install -q -U git+https://github.com/huggingface/peft.git\n",
    "!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
    "!pip install -q datasets"
   ],
   "id": "5303f18603150dcc"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Required when training models/data that are gated on HuggingFace, and required for pushing models to HuggingFace\n",
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ],
   "id": "6863827ec4ca837c"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Loading the model and it's tokenizer in quantized setup!\n",
   "id": "9681521a255cec6e"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# setting up the config for 4-bit quantization of Qlora\n",
    "import torch\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
    "\n",
    "model_id = \"meta-llama/Meta-Llama-3-8B\"\n",
    "bnb_config = BitsAndBytesConfig(\n",
    "    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type=\"nf4\", bnb_4bit_compute_dtype=torch.bfloat16\n",
    ")\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
    "model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={\"\": 0})"
   ],
   "id": "bd7ac2978f242bbd"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# print(model)",
   "id": "7ffbef277f2b0ee8"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "#### Prepare model for PEFT fine-tuning",
   "id": "23deaad9596c7a16"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from peft import prepare_model_for_kbit_training\n",
    "\n",
    "model.gradient_checkpointing_enable()\n",
    "model = prepare_model_for_kbit_training(model)"
   ],
   "id": "aa14420ffca5294b"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "def print_trainable_parameters(model):\n",
    "    \"\"\"\n",
    "    Prints the number of trainable parameters in the model.\n",
    "    \"\"\"\n",
    "    trainable_params = 0\n",
    "    all_param = 0\n",
    "    for _, param in model.named_parameters():\n",
    "        all_param += param.numel()\n",
    "        if param.requires_grad:\n",
    "            trainable_params += param.numel()\n",
    "    print(\n",
    "        f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n",
    "    )"
   ],
   "id": "b76e6e35166624a8"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "### Setup `LoraConfig`\n",
    "### To use Dora we set the `use_dora=True`"
   ],
   "id": "a4497b06c05c21ca"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from peft import LoraConfig, get_peft_model\n",
    "\n",
    "config = LoraConfig(\n",
    "    use_dora=True,\n",
    "    r=8,\n",
    "    lora_alpha=32,\n",
    "    target_modules=[\n",
    "        \"q_proj\",\n",
    "        \"k_proj\",\n",
    "        \"v_proj\",\n",
    "        \"o_proj\",\n",
    "        \"gate_proj\",\n",
    "        \"up_proj\",\n",
    "        \"down_proj\",\n",
    "    ],  # parameters specific to llama\n",
    "    lora_dropout=0.05,\n",
    "    bias=\"none\",\n",
    "    task_type=\"CAUSAL_LM\",\n",
    ")\n",
    "\n",
    "model = get_peft_model(model, config)\n",
    "print_trainable_parameters(model)"
   ],
   "id": "a0cdf65552cbbc7d"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## Step 2) Fine-tuning process 💥\n",
   "id": "defbac7e597c70c9"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Load the dataset from HF\n",
    "from datasets import load_dataset\n",
    "\n",
    "data = load_dataset(\"timdettmers/openassistant-guanaco\")\n",
    "data = data.map(lambda samples: tokenizer(samples[\"text\"]), batched=True)"
   ],
   "id": "e971b32ce96af6a5"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "## Training\n",
    "\n",
    "For the sake of the demo, we just ran it for 10 steps just to showcase how to use this integration with existing tools on the HF ecosystem."
   ],
   "id": "d1f566ded5fcfd"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import transformers\n",
    "\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "\n",
    "trainer = transformers.Trainer(\n",
    "    model=model,\n",
    "    train_dataset=data[\"train\"],\n",
    "    args=transformers.TrainingArguments(\n",
    "        per_device_train_batch_size=1,\n",
    "        gradient_accumulation_steps=4,\n",
    "        warmup_steps=2,\n",
    "        max_steps=10,\n",
    "        learning_rate=2e-4,\n",
    "        fp16=True,\n",
    "        logging_steps=1,\n",
    "        output_dir=\"path/to/your/HF/repo\",  # change it to your desired repo!\n",
    "        optim=\"paged_adamw_8bit\",\n",
    "    ),\n",
    "    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
    ")\n",
    "model.config.use_cache = False  # silence the warnings. Please re-enable for inference!\n",
    "trainer.train()"
   ],
   "id": "71a81a20da27c866"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## Usage Example",
   "id": "24fd46bd7eb0f95"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "model.config.use_cache = True\n",
    "model.eval();"
   ],
   "id": "4a21ae5f638d074a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from transformers import GenerationConfig\n",
    "\n",
    "max_new_tokens = 120\n",
    "top_p = 0.9\n",
    "temperature = 0.7\n",
    "user_question = \"What is the purpose of quantization in LLMs?\"\n",
    "\n",
    "prompt = (\n",
    "    \"A chat between a curious human and an artificial intelligence assistant. \"\n",
    "    \"The assistant gives helpful, detailed, and polite answers to the user's questions. \"\n",
    "    \"### Human: {user_question}\"\n",
    "    \"### Assistant: \"\n",
    ")\n",
    "\n",
    "\n",
    "def generate(model, user_question, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature):\n",
    "    inputs = tokenizer(prompt.format(user_question=user_question), return_tensors=\"pt\").to(\"cuda\")\n",
    "\n",
    "    outputs = model.generate(\n",
    "        **inputs,\n",
    "        generation_config=GenerationConfig(\n",
    "            do_sample=True,\n",
    "            max_new_tokens=max_new_tokens,\n",
    "            top_p=top_p,\n",
    "            temperature=temperature,\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "    # print(text)\n",
    "    return text\n",
    "\n",
    "\n",
    "generate(model, user_question)"
   ],
   "id": "9a39d603dd6075d5"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "# trainer.push_to_hub()",
   "id": "dadfea7edd0edce1"
  }
 ],
 "metadata": {},
 "nbformat": 5,
 "nbformat_minor": 9
}
