{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"7\"\n",
    "\n",
    "import gc\n",
    "\n",
    "import pandas as pd\n",
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "from fusion_bench.method.pruning.wanda_utils.eval import eval_ppl\n",
    "from fusion_bench.models.modeling_losparse_llama import LoSparseLlamaForCausalLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fusion_bench.models.modeling_losparse_llama.modeling_losparse_llama import (\n",
    "    LoSparseLinear,\n",
    "    LoSparseLlamaForCausalLM,\n",
    ")\n",
    "\n",
    "\n",
    "def model_eval_ppl(model_path):\n",
    "    gc.collect()\n",
    "    torch.cuda.empty_cache()\n",
    "    model = AutoModelForCausalLM.from_pretrained(\n",
    "        model_path,\n",
    "        torch_dtype=torch.float16,\n",
    "        low_cpu_mem_usage=True,\n",
    "        device_map=\"auto\",\n",
    "    )\n",
    "    model.seqlen = model.config.max_position_embeddings\n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)\n",
    "\n",
    "    with torch.no_grad():\n",
    "        result = eval_ppl(model, tokenizer)\n",
    "\n",
    "    print(f\"PPL for {model_path}: {result}\")\n",
    "    return result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dense"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2daae8f3df3247758f5ab0607fa1aae2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n",
      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1a1a1288fbd4427c96b70d91fe500714",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/data/huggingface_models/decapoda-research/llama-7b-hf: 5.677204132080078\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.677204132080078"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/data/huggingface_models/decapoda-research/llama-7b-hf\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Magnitude"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6ece1f00b366456eb3fb612ced62ee57",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b2166943d8de417e9719b17e71cb54a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.1: 5.803542613983154\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d2976ff80ece4d53b0587caed601275a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a198a183513547b2bed5af411c82ee78",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.2: 6.017833709716797\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8286cfdcf8eb473db88b57f4068727c9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b5d019f1c53e4ca58cebe6863bf04f0d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.3: 6.621606349945068\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c0ecdd56b1a34c7d9865f28c602909a4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "61d79c5619454bac9f25eb5c9c9441e6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.4: 8.041034698486328\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8715a03331f149f49d5d33a7da4b7432",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0d945d89ba1e453b929dd66e2e7edac8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.5: 17.285242080688477\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "29819863986b47fd9efb3ab8c1e20103",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "750af8161fa0491fae79b5db29820a76",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.6: 152.3600616455078\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "191e454b65fd44d7a24ce4b8041c8bb7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eacbf06b006241728457d744258197f2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.7: 48427.84765625\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb8bb2da90714498885c37b0846d5f7e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "22878db0468c4a179057347680dd96a7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.8: 132127.296875\n",
      "{'sparsity_level': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], 'ppl': [5.803542613983154, 6.017833709716797, 6.621606349945068, 8.041034698486328, 17.285242080688477, 152.3600616455078, 48427.84765625, 132127.296875]}\n"
     ]
    }
   ],
   "source": [
    "ppl_data = {\"sparsity_level\": [], \"ppl\": []}\n",
    "for sparsity_level in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:\n",
    "    ppl_data[\"sparsity_level\"].append(sparsity_level)\n",
    "    ppl_data[\"ppl\"].append(\n",
    "        model_eval_ppl(\n",
    "            f\"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/{sparsity_level}\"\n",
    "        )\n",
    "    )\n",
    "print(ppl_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sparsity_level</th>\n",
       "      <th>ppl</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.1</td>\n",
       "      <td>5.803543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.2</td>\n",
       "      <td>6.017834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.3</td>\n",
       "      <td>6.621606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.4</td>\n",
       "      <td>8.041035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.5</td>\n",
       "      <td>17.285242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.6</td>\n",
       "      <td>152.360062</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.7</td>\n",
       "      <td>48427.847656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.8</td>\n",
       "      <td>132127.296875</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sparsity_level            ppl\n",
       "0             0.1       5.803543\n",
       "1             0.2       6.017834\n",
       "2             0.3       6.621606\n",
       "3             0.4       8.041035\n",
       "4             0.5      17.285242\n",
       "5             0.6     152.360062\n",
       "6             0.7   48427.847656\n",
       "7             0.8  132127.296875"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(ppl_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "005087ded73647509802a9f7b9cfb85c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3bc4b251498348fdb7f00f5eaeda69bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.45: 9.627829551696777\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "9.627829551696777"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.45\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d76f2a71ecdb4ea3a10c546f3af7d412",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ccde1e2c4b92475fa0bf3d92565a427c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.5: 17.285242080688477\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "17.285242080688477"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()\n",
    "torch.cuda.empty_cache()\n",
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "188b9e7e6f154108811fa6afd5e203f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "47e71187ffaa40229f3e890bd847e4ec",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.55: 24.706262588500977\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "24.706262588500977"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.55\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8fda2a289a2342439c01cf76d11ae368",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "16b4242135594482b6d3b96364ec47a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.65: 17000.6015625\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "17000.6015625"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.65\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1fb1baecde4946928276bfc055c50990",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f6ef2fbcd3844c0d8ade2da377bbdce9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.7: 48427.84765625\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "48427.84765625"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f62b57a16c174afd96391d71c0bf71e5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "040906d85aa6447485faec7092f014d3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.8: 132127.296875\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "132127.296875"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/unstructured/0.8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2b2f89867fb14ea2ba2e444762a8154f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "83ac5c66cc0f4dc090f091fa33820231",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/2_4: 42.534244537353516\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "42.534244537353516"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0ce3799db474434ba3b92e33b2ae6ca0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n",
      "Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub\n",
      "Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /data0/users/tanganke/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Wed Aug 28 13:14:33 2024).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b135215da7bf4264a0d38ed2bc8f53f5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/4_8: 16.829370498657227\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "16.829370498657227"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/magnitude/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Wanda"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8a5e2dfe8e024eafb0ef7d3a1915d754",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7f6e4dfb9ee8428e9661f300e7085e76",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.1: 5.696322441101074\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.696322441101074"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.1\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4a945a61345a498eb4f549596820c43e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ea2ef25397d7405b98127aae58b4bfba",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.2: 5.817018032073975\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.817018032073975"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.2\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d36065153e04ebdb92c6f8149fa5b1f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e46883d63bea4efe94970199e216168d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.3: 5.998795032501221\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.998795032501221"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.3\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "62d82ab546004f37816d6687f6c5c1e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "493b673069664387b449cec53be6a4b8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.4: 6.386667251586914\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.386667251586914"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2c03a0dabf924dab89ef4ee26392fbbb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3c6c83f9ec72424db138b6c6a88e1722",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.5: 7.257486343383789\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "7.257486343383789"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()\n",
    "torch.cuda.empty_cache()\n",
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e9eac1378dfe4856b6c3ea5c9ee61f52",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "24ea450773614e1cb7ba06042b6c5323",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.6: 10.691720962524414\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "10.691720962524414"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.6\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0a50fdf6539b40c281257041f430adaf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e514b93cc84b487db200848d96695353",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.7: 84.69001770019531\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "84.69001770019531"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "04a021bf2416475c940375ec123cdecb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e6b07e358f2a4a649a60cf896162e1fd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.8: 6239.4091796875\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6239.4091796875"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/unstructured/0.8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ad7115ba9fa64b9cae2b3f14d341762a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bfa8f3f6528f476492793d5a47930e8b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/2_4: 11.527883529663086\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "11.527883529663086"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()\n",
    "torch.cuda.empty_cache()\n",
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "108349e188a3493c82ac3bb04999c705",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "387c7d646a004c0d8a88d54c9cba2c4c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/4_8: 8.567511558532715\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.567511558532715"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()\n",
    "torch.cuda.empty_cache()\n",
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/wanda_pruning/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Wanda w/ SVD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78a4340646074aea891229940f37af00",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a36c7b66c15a47ce95dcf456e1233aae",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.5: 7.086696624755859\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "7.086696624755859"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1745f31c6bda470a830d68592aac76fe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a35b8d6d3e22488ab17e4fa92df84a0e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.6: 9.595812797546387\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "9.595812797546387"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.6\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f8b5aacf94840398cf4eec57d11a709",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5466157b030a42e48ed4ec32bc73883e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.7: 35.647666931152344\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "35.647666931152344"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9d0edcbc0d944e2b8078494317b6fe71",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb383243a17047ebae00d60692d401f0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.8: 978.4933471679688\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "978.4933471679688"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/unstructured/0.8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2f01e75c8e0e401ea54bfc499839fcdf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5a098084dd184c13a5c9c2aac53e35da",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/2_4: 10.475244522094727\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "10.475244522094727"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c5c6adea5214400d8dbb81e1bb070258",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f1a4d6614c0a4364aba23591be5e28bb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/4_8: 8.141329765319824\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.141329765319824"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/wanda/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Magnitude w/ SVD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cfc1846d14db4e65827c69d7a4d2c853",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "91d113c33832489ea151b9bc438ee5a6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.1: 5.772339344024658\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "831a23cf1723460891178317491f736d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c4f4c63c6c9c46c298a6e11aa489038b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.2: 5.920957565307617\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "373ccc4852404fbaa675238cf89a725e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "41ce93ec1a714eb785c753e6be452c23",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.3: 6.2547383308410645\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f3394c63f7b3480285feb13b2bde668a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4c73bae1ecd74ad58645e4568dd20c9c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.4: 6.850766658782959\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f634d10d868c4d3eb17d3c3a0deacbab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c0cc9a78e74346ab9f72c406e891326f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.5: 8.063112258911133\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a95c98e767c242289b3d380657654aa0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "06a53d795f834da1910617882d44e7c7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.6: 13.5851411819458\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "441e954007bc4f7abfadaf2a519c838f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd007a4db74c4a6fa505451089c55c1e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.7: 286.7393493652344\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a26c744dc7884e0c90d1c4b706c37452",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9bef6da8e9204e99b3783bdc6dddc146",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.8: 12499.7509765625\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sparsity_level</th>\n",
       "      <th>ppl</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.1</td>\n",
       "      <td>5.772339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.2</td>\n",
       "      <td>5.920958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.3</td>\n",
       "      <td>6.254738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.4</td>\n",
       "      <td>6.850767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.5</td>\n",
       "      <td>8.063112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.6</td>\n",
       "      <td>13.585141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.7</td>\n",
       "      <td>286.739349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.8</td>\n",
       "      <td>12499.750977</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sparsity_level           ppl\n",
       "0             0.1      5.772339\n",
       "1             0.2      5.920958\n",
       "2             0.3      6.254738\n",
       "3             0.4      6.850767\n",
       "4             0.5      8.063112\n",
       "5             0.6     13.585141\n",
       "6             0.7    286.739349\n",
       "7             0.8  12499.750977"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ppl_data = {\"sparsity_level\": [], \"ppl\": []}\n",
    "for sparsity_level in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:\n",
    "    ppl_data[\"sparsity_level\"].append(sparsity_level)\n",
    "    ppl_data[\"ppl\"].append(\n",
    "        model_eval_ppl(\n",
    "            f\"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/{sparsity_level}\"\n",
    "        )\n",
    "    )\n",
    "pd.DataFrame(ppl_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "de76b552d84f445bb07d8a623b347aaf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "52959ff9a375455c9d98c146cfed6ab7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.55: 9.581928253173828\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "9.581928253173828"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.55\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e1cb1911ff564105b69bd2a2cce121d3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "098d7ba10f734119953298f05d15ee3b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.65: 33.92226791381836\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "33.92226791381836"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.65\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "46e3efdc028b4f5e96345b74774a10ae",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8f13649cf77546ed85e9902859648e90",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.5: 8.063112258911133\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.063112258911133"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4d7e7499484b4bc497d1550b3ed0455e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b30381a4f21d46a2bd3e64f27d65d60d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/2_4: 12.724442481994629\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "12.724442481994629"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fe636dd0241e42a493eb6cbce69d23e2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "086d4b5451464a5c8c6d81933b6c023f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/4_8: 9.289239883422852\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "9.289239883422852"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/sparselo/magnitude/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Iterative Sparselo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Magnitude"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7b960d9182ae4499b2030efbc0846cb3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a7da6c5c6d9c48188778db32358cb7dd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.1: 5.72618293762207\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.72618293762207"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.1\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "441fa4b8a16a49be8c2c7c1ebe588916",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aa4d1645ac6648cf824db2073a6389aa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.2: 5.892498016357422\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.892498016357422"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.2\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa2d1a3b1f4a4f20adbeac2739bc4fc5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d03d5a0afc0840da83c384599dec6f2a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.3: 6.216446399688721\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.216446399688721"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.3\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0a4d21bb178c445cb7bb8094570ad3e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d9caa0abeda445f3be06b52a7f186e8b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.4: 6.817346572875977\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.817346572875977"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d75c4acaab841728821a1bf00310b63",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "38423b8e007146f5becf5f87da2ffe1f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5: 7.974487781524658\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "7.974487781524658"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd300e07f25147ab889581fbd206675e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e86420b3017d465baf9e1bbee154678e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5: 702.6592407226562\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "702.6592407226562"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl_skip_lowrank(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a17bf542671e4845994fc1ec0b5f5e27",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54e1ecaa63a1409180b88af6233ef99e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5: 436568.6875\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "436568.6875"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl_skip_sparse(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d2e7b2975e594eb2ab816a3abb60a03c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f23448a5e3534546ad18bd541ef39590",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.6: 12.137877464294434\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "12.137877464294434"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.6\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c6a0cca102f64cc3be2826153d78409d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ec025c1464184ce892ca0da1d4ce47ff",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.7: 200.09254455566406\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "200.09254455566406"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "22c6a17294b74a3db5a3c4057482e2a6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ddaf861cfab64dfdb9a8d53eacfc1936",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.8: 14475.9248046875\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "14475.9248046875"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/unstructured/0.8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "429bfac9fd2b4bab9f580f302e651484",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "81eeb79ed9524b2f82223a53ae70886d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/2_4: 10.735949516296387\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "10.735949516296387"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1eb753f0d1f747fe9919b958ef161cd4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "783b1bad95424ac29af968c00016762b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/4_8: 8.859417915344238\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.859417915344238"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/magnitude/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Wanda"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0aef5a6319a54627972d14eb68eea5b6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b7a85807c41049c6a90d62de96b2f10e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.1: 5.680875778198242\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.680875778198242"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.1\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "42ff9847a01144efa870a23d86382e5a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c71edd89710549b4866c7af019323d82",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.2: 5.75728178024292\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.75728178024292"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.2\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "080b52be08b64be9a390aa9255f90b25",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "07ad299bd72b46a6af6b4d2c1ace45b0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.3: 5.913062572479248\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.913062572479248"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.3\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "10b4056906b848d985c6ab35b479b222",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8ff2514695904603b22ef0b0b1786d59",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.4: 6.231904029846191\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.231904029846191"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3d4e7865989b4929bb5a8002ea7cecfe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "67224a0cd4f340fbbef66aa5027729f7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.5: 6.920602321624756\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.920602321624756"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "823be453e4c049a1bf03569f2e3b27fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2665892edf9e427ba0d66f78c192ded9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.6: 8.972464561462402\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.972464561462402"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.6\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8f756d679aad45eda608e6c5b8967742",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cff0064354c54933abc4439cba9912fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.7: 32.90485382080078\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "32.90485382080078"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8f2439c1f7ed488e8998a3062e2a7a92",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b8984533fbc1462d9ef8bf5476b6f6ff",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/2_4: 9.1827392578125\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "9.1827392578125"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aa14d91952e14ca1899eb778bca13d7b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3c1925c66c54416ca51fcb091e82fe02",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/4_8: 7.743643760681152\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "7.743643760681152"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/iterative_sparselo/wanda/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PCP with mask"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Magnitude"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d8011c0cf0a04b88845d3bc7ddab1a5d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "262ed07368964a448bb973506b4fbba5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.1: 5.800900459289551\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.800900459289551"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.1\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a93861de381344f09ac4852c4d2e7d41",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db997eb1dda0450da67d194cada96688",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.2: 6.002498149871826\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.002498149871826"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.2\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9bc8a98e200e41a6979443b403217565",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "acbbb1ea82234e30bf9f982dfb2acae7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.3: 6.358213424682617\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.358213424682617"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.3\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8a4858ef30154ca389e3edb0d12bfa9e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "844a051270274e0a82e2a02e933d13a5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.4: 7.0995635986328125\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "7.0995635986328125"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f95bd7d6d644d7990a1f0f21d284372",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "725356eac74b49eba738cd3a68377b19",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.5: 8.69730281829834\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.69730281829834"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "37c8a4611bf545dd867fc45ce092677a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "85bf8980f80b403a9b7c925a8de8e4ce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.6: 16.671098709106445\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "16.671098709106445"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.6\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d2f4c2a026b3414396db968ec0d130b3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5535ae5932d9441e9ead00acbf8f9209",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.7: 727.54296875\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "727.54296875"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a9f5a4eaa78d466eaa98140bbb933f82",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5e9ab62abba9418196aab5996991468d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/2_4: 16.622926712036133\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "16.622926712036133"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bdc8b72c50da482aa301ec7570c058bf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e72ed2a6afc24a97851902bc48378b43",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/4_8: 10.601716995239258\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "10.601716995239258"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/magnitude/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Wanda"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8e21af07754343f8969bbee069efdec7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d2ed25231e604a1399e8fe3f6d41097c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.1: 5.691865921020508\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.691865921020508"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.1\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c65d0d65e6bc41ab85b8b15e2693f679",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "139705ef147e42b198a909d8d4db40e5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.2: 5.833505630493164\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "5.833505630493164"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.2\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a4cd544843cc417ba681389bdee3c722",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5d5847236abd4a4c81e8fac13571d487",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.3: 6.017829418182373\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.017829418182373"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.3\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "717146a64c7842c2968863bc5c55d62d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9267ae30dcf34f77a61047e6ddfc4719",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.4: 6.446828842163086\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6.446828842163086"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "654ad76a8fea4d99964e91cf909698c5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "63da32e08b9547438043290c01b79126",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.5: 7.278290271759033\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "7.278290271759033"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.5\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7ceba727ded5400195c9f9a040efce73",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "009bacc8b0204c929f7865e854116f9b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.6: 10.189253807067871\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "10.189253807067871"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.6\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0955ce933e484c1f999f3979a0ff1625",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3a231fda67e14d8fbd16c538a26fffd8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.7: 47.11124038696289\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "47.11124038696289"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/unstructured/0.7\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "398b241679824a00b84538a8a66a9714",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eae95a1f80e8459d8201c468f59237fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/2_4: 11.22415542602539\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "11.22415542602539"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/2_4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data0/users/tanganke/anaconda3/envs/fusionbench/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:546: UserWarning: `pad_token_id` should be positive but got -1. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly by `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation, and ensure your `input_ids` input does not have negative values.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "10a51f39cea3495bb9047744425ed458",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "evaluating on wikitext2\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dd6e2aaa52234a7483842065d6685f75",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating samples:   0%|          | 0/128 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nsamples 166\n",
      "sample 0\n",
      "sample 50\n",
      "sample 100\n",
      "sample 150\n",
      "PPL for /data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/4_8: 8.633923530578613\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "8.633923530578613"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_eval_ppl(\n",
    "    \"/data0/users/tanganke/projects/fusion_bench/outputs/llama/pcp_sparselo/wanda/semistructured/4_8\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fusionbench",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
