{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "53ae29c2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Collecting autoawq==0.2.8\n",
      "  Using cached autoawq-0.2.8.tar.gz (71 kB)\n",
      "  Preparing metadata (setup.py): started\n",
      "  Preparing metadata (setup.py): finished with status 'done'\n",
      "Building wheels for collected packages: autoawq\n",
      "  Building wheel for autoawq (setup.py): started\n",
      "  Building wheel for autoawq (setup.py): finished with status 'done'\n",
      "  Created wheel for autoawq: filename=autoawq-0.2.8-py3-none-any.whl size=108794 sha256=171abcfe6e6f21622c00e1540030640b8503422d502526a8a8a8bcb7974a27dd\n",
      "  Stored in directory: c:\\users\\admin\\appdata\\local\\pip\\cache\\wheels\\7c\\90\\d3\\aea34561b3aa20db93345211c6634b82210ac2b9a2adc8df8c\n",
      "Successfully built autoawq\n",
      "Installing collected packages: autoawq\n",
      "Successfully installed autoawq-0.2.8\n",
      "Defaulting to user installation because normal site-packages is not writeable\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ERROR: Could not find a version that satisfies the requirement triton (from versions: none)\n",
      "ERROR: No matching distribution found for triton\n"
     ]
    }
   ],
   "source": [
    "!pip install --no-deps autoawq==0.2.8\n",
    "!pip install triton\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "07d6b683",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import pandas as pd\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
    "from tqdm import tqdm\n",
    "import time\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0d4b8b9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>DocID</th>\n",
       "      <th>News_Body</th>\n",
       "      <th>Top_Keyphrases</th>\n",
       "      <th>model headline</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>U100_1</td>\n",
       "      <td>N79048</td>\n",
       "      <td>Curious just how far your dollar goes in Bellt...</td>\n",
       "      <td>[Apartment, rental, dogs, Ave, Fourth Ave]</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>U100_10</td>\n",
       "      <td>N94939</td>\n",
       "      <td>Italian authorities are searching for Swiss fo...</td>\n",
       "      <td>[Boys, Swiss club, Italy, Saturday afternoon, ...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>U100_100</td>\n",
       "      <td>N101378</td>\n",
       "      <td>Etiquette standards have changed throughout th...</td>\n",
       "      <td>[kids, manners, today world, old-fashioned, To...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>U100_101</td>\n",
       "      <td>N75328</td>\n",
       "      <td>DALLAS (CBSDFW.COM)   A plane made an emergenc...</td>\n",
       "      <td>[flight, Health, sickness, DALLAS, Texas]</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>U100_102</td>\n",
       "      <td>N51667</td>\n",
       "      <td>All the pitching stats that's fit to print Yes...</td>\n",
       "      <td>[Astros Batting, Batting, pitching, Astros, Ve...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15543</th>\n",
       "      <td>U37_11</td>\n",
       "      <td>N67982</td>\n",
       "      <td>A 54-year-old man was arrested and charged aft...</td>\n",
       "      <td>[officer, police officer, impersonating office...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15544</th>\n",
       "      <td>U37_110</td>\n",
       "      <td>N40427</td>\n",
       "      <td>Facebook announced plans for its own cryptocur...</td>\n",
       "      <td>[Debbie, Business, logo, kind, Facebook]</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15545</th>\n",
       "      <td>U37_111</td>\n",
       "      <td>N39260</td>\n",
       "      <td>Medical device company Soliton Inc. (NASDAQ: S...</td>\n",
       "      <td>[tattoo, removal, device, RAP device, RAP]</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15546</th>\n",
       "      <td>U37_112</td>\n",
       "      <td>N63210</td>\n",
       "      <td>RENTON, Wash. (AP) Shaquem Griffin is thankful...</td>\n",
       "      <td>[Seahawks, Griffin, season, Seattle, Seattle S...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15547</th>\n",
       "      <td>U37_113</td>\n",
       "      <td>N21484</td>\n",
       "      <td>Ronald Vermeulen is seen in a June 13, 2019, b...</td>\n",
       "      <td>[Street, Warren Street, Riverside Police, Los ...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>15548 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         UserID    DocID                                          News_Body  \\\n",
       "0        U100_1   N79048  Curious just how far your dollar goes in Bellt...   \n",
       "1       U100_10   N94939  Italian authorities are searching for Swiss fo...   \n",
       "2      U100_100  N101378  Etiquette standards have changed throughout th...   \n",
       "3      U100_101   N75328  DALLAS (CBSDFW.COM)   A plane made an emergenc...   \n",
       "4      U100_102   N51667  All the pitching stats that's fit to print Yes...   \n",
       "...         ...      ...                                                ...   \n",
       "15543    U37_11   N67982  A 54-year-old man was arrested and charged aft...   \n",
       "15544   U37_110   N40427  Facebook announced plans for its own cryptocur...   \n",
       "15545   U37_111   N39260  Medical device company Soliton Inc. (NASDAQ: S...   \n",
       "15546   U37_112   N63210  RENTON, Wash. (AP) Shaquem Griffin is thankful...   \n",
       "15547   U37_113   N21484  Ronald Vermeulen is seen in a June 13, 2019, b...   \n",
       "\n",
       "                                          Top_Keyphrases model headline  \n",
       "0             [Apartment, rental, dogs, Ave, Fourth Ave]                 \n",
       "1      [Boys, Swiss club, Italy, Saturday afternoon, ...                 \n",
       "2      [kids, manners, today world, old-fashioned, To...                 \n",
       "3              [flight, Health, sickness, DALLAS, Texas]                 \n",
       "4      [Astros Batting, Batting, pitching, Astros, Ve...                 \n",
       "...                                                  ...            ...  \n",
       "15543  [officer, police officer, impersonating office...                 \n",
       "15544           [Debbie, Business, logo, kind, Facebook]                 \n",
       "15545         [tattoo, removal, device, RAP device, RAP]                 \n",
       "15546  [Seahawks, Griffin, season, Seattle, Seattle S...                 \n",
       "15547  [Street, Warren Street, Riverside Police, Los ...                 \n",
       "\n",
       "[15548 rows x 5 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_path='BTier_Mega_V1_user_document_keyphrases.pkl'\n",
    "with open(file_path, 'rb') as file:\n",
    "    data = pickle.load(file)\n",
    "\n",
    "df=pd.DataFrame(data)\n",
    "\n",
    "df[\"model headline\"] = \"\"\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4639f2ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No CUDA found, replace GEMM with IPEX version to support non-cuda AWQ model.\n",
      "You have loaded an AWQ model without setting device_map, please set 'cpu' or 'xpu' or 'auto'\n",
      "`low_cpu_mem_usage` was None, now default to True since model is quantized.\n",
      "The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n"
     ]
    },
    {
     "ename": "AssertionError",
     "evalue": "Please install IPEX package with `pip install intel_extension_for_pytorch`.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[3], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstelterlab/DeepSeek-R1-Distill-Qwen-14B-AWQ\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstelterlab/DeepSeek-R1-Distill-Qwen-14B-AWQ\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\models\\auto\\auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m    562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m    563\u001b[0m     model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[1;32m--> 564\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    565\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[0;32m    566\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    568\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    569\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    570\u001b[0m )\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\modeling_utils.py:4104\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m   4101\u001b[0m     keep_in_fp32_modules \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m   4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 4104\u001b[0m     \u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpreprocess_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   4105\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeep_in_fp32_modules\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_fp32_modules\u001b[49m\n\u001b[0;32m   4106\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   4108\u001b[0m     \u001b[38;5;66;03m# We store the original dtype for quantized models as we cannot easily retrieve it\u001b[39;00m\n\u001b[0;32m   4109\u001b[0m     \u001b[38;5;66;03m# once the weights have been quantized\u001b[39;00m\n\u001b[0;32m   4110\u001b[0m     \u001b[38;5;66;03m# Note that once you have loaded a quantized model, you can't change its dtype so this will\u001b[39;00m\n\u001b[0;32m   4111\u001b[0m     \u001b[38;5;66;03m# remain a single source of truth\u001b[39;00m\n\u001b[0;32m   4112\u001b[0m     config\u001b[38;5;241m.\u001b[39m_pre_quantization_dtype \u001b[38;5;241m=\u001b[39m torch_dtype\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\quantizers\\base.py:194\u001b[0m, in \u001b[0;36mHfQuantizer.preprocess_model\u001b[1;34m(self, model, **kwargs)\u001b[0m\n\u001b[0;32m    192\u001b[0m model\u001b[38;5;241m.\u001b[39mis_quantized \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m    193\u001b[0m model\u001b[38;5;241m.\u001b[39mquantization_method \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquantization_config\u001b[38;5;241m.\u001b[39mquant_method\n\u001b[1;32m--> 194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_model_before_weight_loading\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\quantizers\\quantizer_awq.py:107\u001b[0m, in \u001b[0;36mAwqQuantizer._process_model_before_weight_loading\u001b[1;34m(self, model, **kwargs)\u001b[0m\n\u001b[0;32m    104\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquantization_config\u001b[38;5;241m.\u001b[39mmodules_to_not_convert \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    105\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodules_to_not_convert\u001b[38;5;241m.\u001b[39mextend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquantization_config\u001b[38;5;241m.\u001b[39mmodules_to_not_convert)\n\u001b[1;32m--> 107\u001b[0m model, has_been_replaced \u001b[38;5;241m=\u001b[39m \u001b[43mreplace_with_awq_linear\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    108\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquantization_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquantization_config\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodules_to_not_convert\u001b[49m\n\u001b[0;32m    109\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    111\u001b[0m model \u001b[38;5;241m=\u001b[39m replace_quantization_scales(model, model\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmodel_type)\n\u001b[0;32m    113\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_been_replaced:\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\integrations\\awq.py:187\u001b[0m, in \u001b[0;36mreplace_with_awq_linear\u001b[1;34m(model, modules_to_not_convert, quantization_config, current_key_name, has_been_replaced)\u001b[0m\n\u001b[0;32m    185\u001b[0m         model\u001b[38;5;241m.\u001b[39m_modules[name]\u001b[38;5;241m.\u001b[39mrequires_grad_(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m    186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mlist\u001b[39m(module\u001b[38;5;241m.\u001b[39mchildren())) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m--> 187\u001b[0m     _, has_been_replaced \u001b[38;5;241m=\u001b[39m \u001b[43mreplace_with_awq_linear\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    188\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    189\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    190\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcurrent_key_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurrent_key_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    191\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquantization_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantization_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    192\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhas_been_replaced\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_been_replaced\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    193\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    194\u001b[0m \u001b[38;5;66;03m# Remove the last key for recursion\u001b[39;00m\n\u001b[0;32m    195\u001b[0m current_key_name\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\integrations\\awq.py:187\u001b[0m, in \u001b[0;36mreplace_with_awq_linear\u001b[1;34m(model, modules_to_not_convert, quantization_config, current_key_name, has_been_replaced)\u001b[0m\n\u001b[0;32m    185\u001b[0m         model\u001b[38;5;241m.\u001b[39m_modules[name]\u001b[38;5;241m.\u001b[39mrequires_grad_(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m    186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mlist\u001b[39m(module\u001b[38;5;241m.\u001b[39mchildren())) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m--> 187\u001b[0m     _, has_been_replaced \u001b[38;5;241m=\u001b[39m \u001b[43mreplace_with_awq_linear\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    188\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    189\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    190\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcurrent_key_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurrent_key_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    191\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquantization_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantization_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    192\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhas_been_replaced\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_been_replaced\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    193\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    194\u001b[0m \u001b[38;5;66;03m# Remove the last key for recursion\u001b[39;00m\n\u001b[0;32m    195\u001b[0m current_key_name\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
      "    \u001b[1;31m[... skipping similar frames: replace_with_awq_linear at line 187 (1 times)]\u001b[0m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\integrations\\awq.py:187\u001b[0m, in \u001b[0;36mreplace_with_awq_linear\u001b[1;34m(model, modules_to_not_convert, quantization_config, current_key_name, has_been_replaced)\u001b[0m\n\u001b[0;32m    185\u001b[0m         model\u001b[38;5;241m.\u001b[39m_modules[name]\u001b[38;5;241m.\u001b[39mrequires_grad_(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m    186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mlist\u001b[39m(module\u001b[38;5;241m.\u001b[39mchildren())) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m--> 187\u001b[0m     _, has_been_replaced \u001b[38;5;241m=\u001b[39m \u001b[43mreplace_with_awq_linear\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    188\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    189\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodules_to_not_convert\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    190\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcurrent_key_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurrent_key_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    191\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquantization_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantization_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    192\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhas_been_replaced\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_been_replaced\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    193\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    194\u001b[0m \u001b[38;5;66;03m# Remove the last key for recursion\u001b[39;00m\n\u001b[0;32m    195\u001b[0m current_key_name\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\transformers\\integrations\\awq.py:174\u001b[0m, in \u001b[0;36mreplace_with_awq_linear\u001b[1;34m(model, modules_to_not_convert, quantization_config, current_key_name, has_been_replaced)\u001b[0m\n\u001b[0;32m    171\u001b[0m in_features \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39min_features\n\u001b[0;32m    172\u001b[0m out_features \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39mout_features\n\u001b[1;32m--> 174\u001b[0m model\u001b[38;5;241m.\u001b[39m_modules[name] \u001b[38;5;241m=\u001b[39m \u001b[43mtarget_cls\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    175\u001b[0m \u001b[43m    \u001b[49m\u001b[43mw_bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantization_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbits\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    176\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgroup_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantization_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroup_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    177\u001b[0m \u001b[43m    \u001b[49m\u001b[43min_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43min_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    178\u001b[0m \u001b[43m    \u001b[49m\u001b[43mout_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mout_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    179\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbias\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m    180\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdev\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    181\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    182\u001b[0m has_been_replaced \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m    184\u001b[0m \u001b[38;5;66;03m# Force requires grad to False to avoid unexpected errors\u001b[39;00m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\awq\\modules\\linear\\gemm_ipex.py:18\u001b[0m, in \u001b[0;36mWQLinear_IPEX.__init__\u001b[1;34m(self, w_bit, group_size, in_features, out_features, bias, dev, training)\u001b[0m\n\u001b[0;32m     16\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, w_bit, group_size, in_features, out_features, bias, dev, training\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m     17\u001b[0m     nn\u001b[38;5;241m.\u001b[39mModule\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m---> 18\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m IPEX_INSTALLED, \\\n\u001b[0;32m     19\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install IPEX package with `pip install intel_extension_for_pytorch`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m     20\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m w_bit \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m4\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOnly 4 bit are supported for now.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m     22\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39muse_bf16 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;66;03m# Intel platform support bf16 even without amx.\u001b[39;00m\n",
      "\u001b[1;31mAssertionError\u001b[0m: Please install IPEX package with `pip install intel_extension_for_pytorch`."
     ]
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(\"stelterlab/DeepSeek-R1-Distill-Qwen-14B-AWQ\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"stelterlab/DeepSeek-R1-Distill-Qwen-14B-AWQ\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f39564b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "model = model.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7f7ae9ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "chunk_size = 1000\n",
    "\n",
    "# Loop over the DataFrame in chunks\n",
    "start_time = time.time()\n",
    "for start_idx in range(0, len(df), chunk_size):\n",
    "    end_idx = min(start_idx + chunk_size, len(df))\n",
    "    print(f\"Processing rows {start_idx} to {end_idx - 1}\")\n",
    "\n",
    "    for idx, row in tqdm(df.iloc[start_idx:end_idx].iterrows(), total=end_idx - start_idx, desc=\"Generating headlines...\"):\n",
    "        prompt = (\n",
    "            \"Generate a one-line complete headline sentence that captures the essence of the following document and must contain these key phrases. \"\n",
    "            \"Stop after headline is written, don't append anything extra as text.\\n\\n\"\n",
    "            f\"Document: {row['News_Body']}\\n\\n\"\n",
    "            f\"Key Phrases: {row['Top_Keyphrases']}\\n\\n\"\n",
    "            \"User-Specific Headline:\"\n",
    "        )\n",
    "\n",
    "        inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
    "\n",
    "        with torch.no_grad():\n",
    "            outputs = model.generate(\n",
    "                **inputs,\n",
    "                max_new_tokens=30,\n",
    "                min_new_tokens=20,\n",
    "                temperature=0.4,\n",
    "                top_p=0.4,\n",
    "                do_sample=True\n",
    "            )\n",
    "\n",
    "        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "        delimiter = \"User-Specific Headline:\"\n",
    "        if delimiter in generated_text:\n",
    "            headline = generated_text.split(delimiter, 1)[1].strip()\n",
    "        else:\n",
    "            headline = generated_text.strip()\n",
    "\n",
    "        df.at[idx, \"model headline\"] = headline\n",
    "\n",
    "    # Save a checkpoint after each chunk\n",
    "    checkpoint_path = f\"headline_checkpoint_{start_idx}_{end_idx - 1}.csv\"\n",
    "    df.iloc[start_idx:end_idx].to_csv(checkpoint_path, index=False)\n",
    "    print(f\"Checkpoint saved: {checkpoint_path}\")\n",
    "\n",
    "end_time = time.time()\n",
    "elapsed_time = end_time - start_time\n",
    "print(f\"Processed {len(df)} rows in {elapsed_time:.2f} seconds\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
