{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load trained model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sid/miniconda3/envs/mlp2/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import sys \n",
    "sys.path.append('../')\n",
    "from modeling import modeling_llama\n",
    "import os\n",
    "from pathlib import Path\n",
    "import json\n",
    "import torch "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoConfig, AutoModel\n",
    "\n",
    "model_name = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\"\n",
    "config = AutoConfig.from_pretrained(model_name)\n",
    "# model = AutoModel.from_pretrained(model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n"
     ]
    },
    {
     "ename": "RuntimeError",
     "evalue": "No GPU found. A GPU is needed for quantization.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[3], line 12\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mlowrank_config:\n\u001b[1;32m     10\u001b[0m     config\u001b[38;5;241m.\u001b[39mlowrank_config[key][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpeft_config\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mr2\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m8\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlora_alpha\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;241m4.\u001b[39m}\n\u001b[0;32m---> 12\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mmodeling_llama\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mLlamaModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/transformers/modeling_utils.py:3398\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   3395\u001b[0m     hf_quantizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   3397\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 3398\u001b[0m     \u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_environment\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3399\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch_dtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_tf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_tf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_flax\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_flax\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\n\u001b[1;32m   3400\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3401\u001b[0m     torch_dtype \u001b[38;5;241m=\u001b[39m hf_quantizer\u001b[38;5;241m.\u001b[39mupdate_torch_dtype(torch_dtype)\n\u001b[1;32m   3402\u001b[0m     device_map \u001b[38;5;241m=\u001b[39m hf_quantizer\u001b[38;5;241m.\u001b[39mupdate_device_map(device_map)\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/transformers/quantizers/quantizer_bnb_8bit.py:62\u001b[0m, in \u001b[0;36mBnb8BitHfQuantizer.validate_environment\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate_environment\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     61\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available():\n\u001b[0;32m---> 62\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo GPU found. A GPU is needed for quantization.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     64\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_accelerate_available():\n\u001b[1;32m     65\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing `bitsandbytes` 8-bit quantization requires Accelerate: `pip install accelerate`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mRuntimeError\u001b[0m: No GPU found. A GPU is needed for quantization."
     ]
    }
   ],
   "source": [
    "model_path = Path(\"../train_cache/qoydka27_saved_model\")\n",
    "assert os.path.exists(model_path)\n",
    "\n",
    "with open(model_path/'lowrank_config.json') as f:\n",
    "    lowrank_config = json.load(f) \n",
    "\n",
    "config.lowrank_config = lowrank_config\n",
    "\n",
    "for key in config.lowrank_config:\n",
    "    config.lowrank_config[key]['peft_config'] = {'r2': 8, 'lora_alpha': 4.}\n",
    "\n",
    "model = modeling_llama.LlamaModel.from_pretrained(model_path, config=config, load_in_8bit=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Try PEFT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "'float' object is not callable",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 14\u001b[0m\n\u001b[1;32m     10\u001b[0m input_tokens \u001b[38;5;241m=\u001b[39m tokenizer(input_text, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m'\u001b[39m)  \u001b[38;5;66;03m# returns PyTorch tensors\u001b[39;00m\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[1;32m     13\u001b[0m     \u001b[38;5;66;03m# Forward pass through the model\u001b[39;00m\n\u001b[0;32m---> 14\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minput_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# Print the output to see the result of the forward pass\u001b[39;00m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28mprint\u001b[39m(outputs)\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/Documents/code/uni/thesis/learn-to-compress-svd/notebooks/../modeling/modeling_llama.py:1086\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[1;32m   1075\u001b[0m     layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m   1076\u001b[0m         decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m   1077\u001b[0m         hidden_states,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1083\u001b[0m         cache_position,\n\u001b[1;32m   1084\u001b[0m     )\n\u001b[1;32m   1085\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1086\u001b[0m     layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1087\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1088\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcausal_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1089\u001b[0m \u001b[43m        \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1090\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1091\u001b[0m \u001b[43m        \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1092\u001b[0m \u001b[43m        \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1096\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m   1098\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache:\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/Documents/code/uni/thesis/learn-to-compress-svd/notebooks/../modeling/modeling_llama.py:825\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, **kwargs)\u001b[0m\n\u001b[1;32m    822\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_layernorm(hidden_states)\n\u001b[1;32m    824\u001b[0m \u001b[38;5;66;03m# Self Attention\u001b[39;00m\n\u001b[0;32m--> 825\u001b[0m hidden_states, self_attn_weights, present_key_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mself_attn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    826\u001b[0m \u001b[43m    \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    827\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    828\u001b[0m \u001b[43m    \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    829\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    830\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    831\u001b[0m \u001b[43m    \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    832\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    833\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    834\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    835\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m residual \u001b[38;5;241m+\u001b[39m hidden_states\n\u001b[1;32m    837\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/Documents/code/uni/thesis/learn-to-compress-svd/notebooks/../modeling/modeling_llama.py:722\u001b[0m, in \u001b[0;36mLlamaSdpaAttention.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, **kwargs)\u001b[0m\n\u001b[1;32m    719\u001b[0m bsz, q_len, _ \u001b[38;5;241m=\u001b[39m hidden_states\u001b[38;5;241m.\u001b[39msize()\n\u001b[1;32m    721\u001b[0m query_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mq_proj(hidden_states)\n\u001b[0;32m--> 722\u001b[0m key_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mk_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    723\u001b[0m value_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mv_proj(hidden_states)\n\u001b[1;32m    725\u001b[0m query_states \u001b[38;5;241m=\u001b[39m query_states\u001b[38;5;241m.\u001b[39mview(bsz, q_len, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_heads, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhead_dim)\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/mlp/lib/python3.9/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/Documents/code/uni/thesis/learn-to-compress-svd/notebooks/../modeling/lowrank_layers.py:88\u001b[0m, in \u001b[0;36mLinearLowRank.forward\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m     85\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_config:\n\u001b[1;32m     86\u001b[0m     \u001b[38;5;66;03m# Compute the LoRA adaptation for V_t\u001b[39;00m\n\u001b[1;32m     87\u001b[0m     peft_V_t_weight \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_V_t_B(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_V_t_A(inputs))\n\u001b[0;32m---> 88\u001b[0m     x \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlora_alpha \u001b[38;5;241m*\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlora_dropout_p\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpeft_V_t_weight\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     90\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mUE(x)\n\u001b[1;32m     92\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpeft_config:\n\u001b[1;32m     93\u001b[0m     \u001b[38;5;66;03m# print('Input: ', x.shape) \u001b[39;00m\n\u001b[1;32m     94\u001b[0m     \u001b[38;5;66;03m# print('self.peft_UE_A', self.peft_UE_A)\u001b[39;00m\n\u001b[1;32m     95\u001b[0m     \u001b[38;5;66;03m# print('self.peft_UE_B', self.peft_UE_B)\u001b[39;00m\n",
      "\u001b[0;31mTypeError\u001b[0m: 'float' object is not callable"
     ]
    }
   ],
   "source": [
    "from transformers import LlamaTokenizer, LlamaModel\n",
    "\n",
    "# Load the tokenizer and the model\n",
    "tokenizer = LlamaTokenizer.from_pretrained(\"TinyLlama/TinyLlama-1.1B-Chat-v1.0\")\n",
    "\n",
    "# Example text input for testing\n",
    "input_text = \"Hello, how are you?\"\n",
    "\n",
    "# Tokenize the input text\n",
    "input_tokens = tokenizer(input_text, return_tensors='pt')  # returns PyTorch tensors\n",
    "\n",
    "with torch.no_grad():\n",
    "    # Forward pass through the model\n",
    "    outputs = model(**input_tokens)\n",
    "\n",
    "# Print the output to see the result of the forward pass\n",
    "print(outputs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Linear(in_features=8, out_features=1253, bias=False)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.layers[0].mlp.down_proj.peft_V_t_B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1253"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.layers[0].mlp.down_proj.rank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Gradients set for PEFT. Trainable: 328240, Non-trainable: 1028139008 (0.03%)\n"
     ]
    }
   ],
   "source": [
    "def configure_required_grad(model):\n",
    "    \"\"\"\n",
    "    Enable gradients for 'peft_' prefixed parameters; disable others.\n",
    "    \"\"\"\n",
    "    trainable = non_trainable = 0\n",
    "    for name, param in model.named_parameters():\n",
    "        if 'peft_' in name:\n",
    "            param.requires_grad = True\n",
    "            trainable += param.numel()\n",
    "        else:\n",
    "            param.requires_grad = False\n",
    "            non_trainable += param.numel()\n",
    "    print(f'Gradients set for PEFT. Trainable: {trainable}, Non-trainable: {non_trainable} ({100 * trainable / (trainable + non_trainable):.2f}%)')\n",
    "\n",
    "configure_required_grad(model)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([[ 0.0694,  0.3468,  0.2989,  ..., -0.1931, -0.0148,  0.1255],\n",
       "         [-0.1012,  0.1529,  0.4478,  ..., -0.3015,  0.4883, -0.6439],\n",
       "         [ 0.5161,  0.1197,  1.2927,  ..., -0.7510,  0.3678, -0.6474],\n",
       "         ...,\n",
       "         [-0.2003,  0.0887,  0.4199,  ...,  0.1506, -0.1233, -0.2036],\n",
       "         [-0.2706, -0.2595, -0.2900,  ..., -0.8168, -0.2757, -0.7736],\n",
       "         [ 0.1028,  0.3683,  0.4279,  ...,  0.1038,  0.1316,  0.3506]],\n",
       "        grad_fn=<AddBackward0>),\n",
       " tensor([[ 0.0694,  0.3468,  0.2989,  ..., -0.1931, -0.0148,  0.1255],\n",
       "         [-0.1012,  0.1529,  0.4478,  ..., -0.3015,  0.4883, -0.6439],\n",
       "         [ 0.5161,  0.1197,  1.2927,  ..., -0.7510,  0.3678, -0.6474],\n",
       "         ...,\n",
       "         [-0.2003,  0.0887,  0.4199,  ...,  0.1506, -0.1233, -0.2036],\n",
       "         [-0.2706, -0.2595, -0.2900,  ..., -0.8168, -0.2757, -0.7736],\n",
       "         [ 0.1028,  0.3683,  0.4279,  ...,  0.1038,  0.1316,  0.3506]],\n",
       "        grad_fn=<MmBackward0>))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "class LinearLowRank(nn.Module):\n",
    "    def __init__(self, init_config):\n",
    "        super(LinearLowRank, self).__init__()  # Fixed class name here\n",
    "\n",
    "        self.in_features = init_config['in_features']\n",
    "        self.out_features = init_config['out_features']\n",
    "        self.rank = init_config['rank']\n",
    "\n",
    "        # Base low-rank decomposition layers\n",
    "        self.V_t = nn.Linear(self.in_features, self.rank, bias=False)\n",
    "        self.UE = nn.Linear(self.rank, self.out_features, bias=False)\n",
    "\n",
    "        self.peft_config = init_config.get('peft_config', None)\n",
    "        self.lora_alpha = 1.\n",
    "\n",
    "        if self.peft_config:\n",
    "            r2 = self.peft_config.get('lora_rank', self.rank)  # Reduced rank for LoRA layers, can be configured\n",
    "            self.lora_alpha = self.peft_config.get('lora_alpha', 2.)\n",
    "            self.lora_dropout_p  = nn.Dropout(self.peft_config.get('lora_dropout_p', 0.0))\n",
    "\n",
    "            # LoRA layers for V_t\n",
    "            self.peft_V_t_A = nn.Linear(self.in_features, r2, bias=False)\n",
    "            self.peft_V_t_B = nn.Linear(r2, self.rank, bias=False)\n",
    "\n",
    "            # LoRA layers for UE\n",
    "            self.peft_UE_A = nn.Linear(self.rank, r2, bias=False)\n",
    "            self.peft_UE_B = nn.Linear(r2, self.out_features, bias=False)\n",
    "\n",
    "            nn.init.zeros_(self.peft_V_t_A.weight)\n",
    "            nn.init.zeros_(self.peft_V_t_B.weight)\n",
    "            nn.init.zeros_(self.peft_UE_A.weight)\n",
    "            nn.init.zeros_(self.peft_UE_B.weight)\n",
    "\n",
    "    def forward(self, inputs):\n",
    "        # Forward pass for base layers\n",
    "        x = self.V_t(inputs)\n",
    "        \n",
    "        # If LoRA (peft) is available, add LoRA adaptation to both layers\n",
    "        if self.peft_config:\n",
    "            # Compute the LoRA adaptation for V_t\n",
    "            peft_V_t_weight = self.peft_V_t_B(self.peft_V_t_A(inputs))\n",
    "            x += self.lora_alpha * self.lora_dropout_p(peft_V_t_weight)\n",
    "\n",
    "        output = self.UE(x)\n",
    "\n",
    "        if self.peft_config:\n",
    "            # print('Input: ', x.shape) \n",
    "            # print('self.peft_UE_A', self.peft_UE_A)\n",
    "            # print('self.peft_UE_B', self.peft_UE_B)\n",
    "            peft_UE_weight = self.peft_UE_B(self.peft_UE_A(x))\n",
    "            output += self.lora_alpha * self.lora_dropout_p(peft_UE_weight)\n",
    "        return output\n",
    "\n",
    "    def __str__(self):\n",
    "        return f\"LinearLowRankPEFT(in_features={self.in_features}, out_features={self.out_features}, rank={self.rank})\"\n",
    "\n",
    "    def __repr__(self):\n",
    "        return self.__str__()\n",
    "\n",
    "# Initialize layer with peft_config\n",
    "init_config_with_peft = {\n",
    "    'in_features': 128,\n",
    "    'out_features': 128,\n",
    "    'rank': 20,\n",
    "    'peft_config': {\n",
    "        'lora_rank': 2,\n",
    "        'lora_alpha': 1.5,\n",
    "        'lora_dropout': 0.0\n",
    "    }\n",
    "}\n",
    "\n",
    "# Initialize with peft configuration\n",
    "layer_with_peft = LinearLowRank(init_config_with_peft)\n",
    "\n",
    "# Create random input tensor\n",
    "input_tensor = torch.randn(init_config_with_peft['in_features'], init_config_with_peft['in_features'])\n",
    "\n",
    "# Forward pass with peft configuration\n",
    "output_with_peft = layer_with_peft(input_tensor)\n",
    "\n",
    "# # Disable PEFT\n",
    "layer_with_peft.peft_config = None\n",
    "\n",
    "# Forward pass without peft configuration\n",
    "output_without_peft = layer_with_peft(input_tensor)\n",
    "\n",
    "# Compare outputs\n",
    "output_with_peft, output_without_peft"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.allclose(output_with_peft, output_without_peft)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mlp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
