{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05c9e512-e4f8-46fe-8e18-d906f6fd4d95",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\n",
    "\n",
    "os.environ[\"TRANSFORMERS_CACHE\"] = \"/data/../llm_cache\"\n",
    "from huggingface_hub import login\n",
    "\n",
    "login(\"..\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c20c2a9-e309-40c2-ae05-6ace4eda3789",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import math\n",
    "import torch\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import argparse\n",
    "import textwrap\n",
    "import transformers\n",
    "from peft import PeftModel\n",
    "from transformers import GenerationConfig, TextStreamer, BitsAndBytesConfig\n",
    "from llama_attn_replace import replace_llama_attn\n",
    "\n",
    "\n",
    "from dataclasses import dataclass, field\n",
    "from typing import Dict, Optional\n",
    "\n",
    "from accelerate import Accelerator\n",
    "import datasets\n",
    "from datasets import Dataset, load_dataset\n",
    "from peft import LoraConfig\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments, set_seed\n",
    "\n",
    "from trl import DPOTrainer\n",
    "\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import transformers\n",
    "from torch.utils.data import Dataset\n",
    "from transformers import Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig\n",
    "from llama_attn_replace_sft import replace_llama_attn\n",
    "from gptneox_attn_replace import replace_gpt_neox_attn\n",
    "from peft import LoraConfig, get_peft_model\n",
    "from torch.distributed import barrier\n",
    "from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6dd9e587-22d6-4e58-ad41-ac788ba821db",
   "metadata": {},
   "source": [
    "### Merge and unload with llama3.1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d09e8aa2-db0b-44bb-a9e0-276dc547ad2a",
   "metadata": {},
   "source": [
    "#### **set the below params**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a755dc1-c2a0-4978-9daf-2f95dcd3e268",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "step = 330 \n",
    "adapter_model_name = \"../your/model/path/\"\n",
    "adapter_model_name += \"checkpoint-{}\".format(step)\n",
    "base_model = \"../your/model/path/\"\n",
    "save_path = \"../your/model/path/\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19226267-7667-4c35-96af-8d1bb9b68991",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import argparse\n",
    "\n",
    "# step = 80 # 160 240 320 \n",
    "steps = [step,]\n",
    "for step in steps:\n",
    "    # Example dictionary of arguments\n",
    "    args_dict = {\n",
    "        'base_model':base_model, # \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
    "        'cache_dir': None,\n",
    "        'peft_model': adapter_model_name,\n",
    "        'save_path': save_path,\n",
    "\n",
    "    }\n",
    "\n",
    "    # Creating an args object\n",
    "    args = argparse.Namespace(**args_dict)\n",
    "    \n",
    "    quantization_config = BitsAndBytesConfig(\n",
    "            load_in_4bit=True,\n",
    "            bnb_4bit_use_double_quant=True,\n",
    "            bnb_4bit_quant_type=\"nf4\",\n",
    "            bnb_4bit_compute_dtype=torch.bfloat16,\n",
    ")\n",
    "\n",
    "    # Accessing arguments\n",
    "    print(args)\n",
    "\n",
    "    device = \"cuda:0\"\n",
    "    torch.cuda.set_device(device)\n",
    "\n",
    "    print(\"base model\", args.base_model)\n",
    "    print(\"cache dir\", args.cache_dir)\n",
    "    print(\"peft model\", args.peft_model)\n",
    "    \n",
    "    # Load the base model\n",
    "    model = AutoModelForCausalLM.from_pretrained(args.base_model,\n",
    "                                                 torch_dtype= torch.float16, # \"auto\",\n",
    "                                                 device_map=\"auto\",\n",
    "                                                # quantization_config = quantization_config\n",
    "                                                )\n",
    "    \n",
    "\n",
    "    # Load the tokenizer\n",
    "    tokenizer = AutoTokenizer.from_pretrained(args.base_model)\n",
    "    # set pad_token_id equal to the eos_token_id if not set\n",
    "    if tokenizer.pad_token_id is None:\n",
    "      tokenizer.pad_token_id = tokenizer.eos_token_id\n",
    "\n",
    "    # Set reasonable default for models without max length\n",
    "    if tokenizer.model_max_length > 100_000:\n",
    "      tokenizer.model_max_length = 4096 # 16384 # 2048\n",
    "\n",
    "    response_template = \"Rewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\n",
    "\n",
    "    # Define special tokens\n",
    "    special_tokens_dict = {\n",
    "        \"additional_special_tokens\": [response_template],\n",
    "    }\n",
    "    # Add the special tokens to the tokenizer\n",
    "    tokenizer.add_special_tokens(special_tokens_dict)\n",
    "\n",
    "    # Resize the model embeddings to accommodate the new special tokens\n",
    "    model.resize_token_embeddings(len(tokenizer))\n",
    "\n",
    "    # Load the adapter model\n",
    "    model = PeftModel.from_pretrained(model, \n",
    "                                      args.peft_model, \n",
    "                                      device_map=\"auto\",\n",
    "                                      torch_dtype=torch.float16,# \"auto\",\n",
    "                                     )\n",
    "    \n",
    "#########################\n",
    "##### Example usage\n",
    "#########################\n",
    "# input_text = \"Rewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|> What are aspects of the story of Ed the Happy Clown that involve Ronald Reagan?\"\n",
    "input_text = \"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 26 Jul 2024\\n\\n<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nGiven a question, its previous questions (Q) and answers (A), decontextualize the question by addressing coreference and omission issues. The resulting question should retain its original meaning and be as informative as possible, and should not duplicate any previously asked questions in the context.\\n\\nContext: [Q: When was Born to Fly released? A: Sara Evans's third studio album, Born to Fly, was released on October 10, 2000.]\\nQuestion: Was Born to Fly well received by critics?\\nRewrite: Was Born to Fly well received by critics?\\n\\nContext: [Q: When was Keith Carradine born? A: Keith Ian Carradine was born August 8, 1949. Q: Is he married? A: Keith Carradine married Sandra Will on February 6, 1982.]\\nQuestion: Do they have any children?\\nRewrite: Do Keith Carradine and Sandra Will have any children?\\n\\nContext: [Q: Who proposed that atoms are the basic units of matter? A: John Dalton proposed that each chemical element is composed of atoms of a single, unique type, and they can combine to form more complex structures called chemical compounds.]\\nQuestion: How did the proposal come about?\\nRewrite: How did John Dalton's proposal that each chemical element is composed of atoms of a single unique type, and they can combine to form more complex structures called chemical compounds come about?\\n\\nContext: [Q: What is it called when two liquids separate? A: Decantation is a process for the separation of mixtures of immiscible liquids or of a liquid and a solid mixture such as a suspension. Q: How does the separation occur? A: The layer closer to the top of the container-the less dense of the two liquids, or the liquid from which the precipitate or sediment has settled out-is poured off.]\\nQuestion: Then what happens?\\nRewrite: Then what happens after the layer closer to the top of the container is poured off with decantation?\\n\\nContext: [Q: What was Faith Hill's first country album? A: Take Me as I Am is the debut studio album by country singer Faith Hill. Q: What was a single from the album? A: The first single from Faith Hill's Take Me as I am is Wild One. Q: Was the song a success? A: Hill's rendition was also her first Number One, spending the first four chart weeks of 1994 at the top of the Billboard Hot Country Singles & Tracks chart. Q: Did the album perform well? A: Take Me as I am has been certified 3× platinum in the United States for shipments of three million copies. Q: Did she write her own songs? A: Faith Hill performs songs other people wrote. Q: Did she tour? A: Faith Hill's Soul2Soul II Tour 2006 with McGraw became the highest-grossing country tour of all time. Q: Who did she tour with? A: Faith Hill toured with Tim Mcgraw in 2006.]\\nQuestion: Did they sing together?\\nRewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\n",
    "inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n",
    "streamer = TextStreamer(tokenizer)\n",
    "\n",
    "# Generate text\n",
    "model.eval()\n",
    "outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.6,\n",
    "            top_p=0.9,\n",
    "            use_cache=True,\n",
    "                         streamer=streamer)\n",
    "generated_text = tokenizer.decode(outputs[0],) # skip_special_tokens=True)\n",
    "\n",
    "# print(\"Generated Text:\", generated_text)\n",
    "\n",
    "\n",
    "# print(\"Generated Text:\", generated_text)\n",
    "\n",
    "merged_model = model.merge_and_unload() \n",
    "# merge the adapters into the base model so you can use the model like a normal transformers model,\n",
    "# however the checkpoint will be significantly bigger\n",
    "merged_model.save_pretrained(args.save_path)\n",
    "tokenizer.save_pretrained(args.save_path)\n",
    "\n",
    "# Example usage\n",
    "# input_text = \"Rewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|> What are aspects of the story of Ed the Happy Clown that involve Ronald Reagan?\"\n",
    "input_text = \"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 26 Jul 2024\\n\\n<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nGiven a question, its previous questions (Q) and answers (A), decontextualize the question by addressing coreference and omission issues. The resulting question should retain its original meaning and be as informative as possible, and should not duplicate any previously asked questions in the context.\\n\\nContext: [Q: When was Born to Fly released? A: Sara Evans's third studio album, Born to Fly, was released on October 10, 2000.]\\nQuestion: Was Born to Fly well received by critics?\\nRewrite: Was Born to Fly well received by critics?\\n\\nContext: [Q: When was Keith Carradine born? A: Keith Ian Carradine was born August 8, 1949. Q: Is he married? A: Keith Carradine married Sandra Will on February 6, 1982.]\\nQuestion: Do they have any children?\\nRewrite: Do Keith Carradine and Sandra Will have any children?\\n\\nContext: [Q: Who proposed that atoms are the basic units of matter? A: John Dalton proposed that each chemical element is composed of atoms of a single, unique type, and they can combine to form more complex structures called chemical compounds.]\\nQuestion: How did the proposal come about?\\nRewrite: How did John Dalton's proposal that each chemical element is composed of atoms of a single unique type, and they can combine to form more complex structures called chemical compounds come about?\\n\\nContext: [Q: What is it called when two liquids separate? A: Decantation is a process for the separation of mixtures of immiscible liquids or of a liquid and a solid mixture such as a suspension. Q: How does the separation occur? A: The layer closer to the top of the container-the less dense of the two liquids, or the liquid from which the precipitate or sediment has settled out-is poured off.]\\nQuestion: Then what happens?\\nRewrite: Then what happens after the layer closer to the top of the container is poured off with decantation?\\n\\nContext: [Q: What was Faith Hill's first country album? A: Take Me as I Am is the debut studio album by country singer Faith Hill. Q: What was a single from the album? A: The first single from Faith Hill's Take Me as I am is Wild One. Q: Was the song a success? A: Hill's rendition was also her first Number One, spending the first four chart weeks of 1994 at the top of the Billboard Hot Country Singles & Tracks chart. Q: Did the album perform well? A: Take Me as I am has been certified 3× platinum in the United States for shipments of three million copies. Q: Did she write her own songs? A: Faith Hill performs songs other people wrote. Q: Did she tour? A: Faith Hill's Soul2Soul II Tour 2006 with McGraw became the highest-grossing country tour of all time. Q: Who did she tour with? A: Faith Hill toured with Tim Mcgraw in 2006.]\\nQuestion: Did they sing together?\\nRewrite:<|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\n",
    "inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n",
    "streamer = TextStreamer(tokenizer)\n",
    "\n",
    "# Generate text\n",
    "model.eval()\n",
    "outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.6,\n",
    "            top_p=0.9,\n",
    "            use_cache=True,\n",
    "                         streamer=streamer)\n",
    "generated_text = tokenizer.decode(outputs[0],) # skip_special_tokens=True)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama3",
   "language": "python",
   "name": "llama3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
