{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/mnt/20t/xkw/anaconda3/envs/EasySteer/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "from omegaconf import OmegaConf, DictConfig\n",
    "from steer.vector_generators.sae_feature import search_for_explanations, get_feature_description\n",
    "# os.environ[\"NP_API_KEY\"] = \"your_neuronpedia_api_key_here\"\n",
    "modelId = 'gemma-2-9b'\n",
    "saeId = '24-gemmascope-res-16k'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get Feature Fescriptions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Feature ID: 0, Description:  structured data or code blocks in programming-related documents, ExplanationModelName: None\n",
      "Feature ID: 1, Description:  elements and functions related to building and managing software components, specifically in programming contexts, ExplanationModelName: None\n",
      "Feature ID: 2, Description:  java data structure imports and declarations, ExplanationModelName: None\n",
      "Feature ID: 3, Description:  phrases that refer to specific concepts or examples, ExplanationModelName: None\n",
      "Feature ID: 4, Description: technical terms related to mathematical equations and concepts, ExplanationModelName: None\n",
      "Feature ID: 5, Description: references to specific countries and regions, particularly in relation to commerce and cultural aspects, ExplanationModelName: None\n",
      "Feature ID: 6, Description:  terms and concepts related to passive and active processes or states, ExplanationModelName: None\n",
      "Feature ID: 7, Description:  monetary symbols and related code syntax, ExplanationModelName: None\n",
      "Feature ID: 8, Description: structured information and references related to data, such as cases, observations, and features in various contexts, ExplanationModelName: None\n",
      "Feature ID: 9, Description: regulatory language related to occupational safety and health, ExplanationModelName: None\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{0: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 0,\n",
       "   'description': ' structured data or code blocks in programming-related documents',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 1: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 1,\n",
       "   'description': ' elements and functions related to building and managing software components, specifically in programming contexts',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 2: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 2,\n",
       "   'description': ' java data structure imports and declarations',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 3: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 3,\n",
       "   'description': ' phrases that refer to specific concepts or examples',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 4: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 4,\n",
       "   'description': 'technical terms related to mathematical equations and concepts',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 5: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 5,\n",
       "   'description': 'references to specific countries and regions, particularly in relation to commerce and cultural aspects',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 6: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 6,\n",
       "   'description': ' terms and concepts related to passive and active processes or states',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 7: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 7,\n",
       "   'description': ' monetary symbols and related code syntax',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 8: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 8,\n",
       "   'description': 'structured information and references related to data, such as cases, observations, and features in various contexts',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}],\n",
       " 9: [{'modelId': 'gemma-2-9b',\n",
       "   'layer': '24-gemmascope-res-16k',\n",
       "   'feature_id': 9,\n",
       "   'description': 'regulatory language related to occupational safety and health',\n",
       "   'explanationModelName': None,\n",
       "   'typeName': 'oai_token-act-pair'}]}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_feature_description(modelId=modelId, saeId=saeId, position_ids=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], save_path=f\"neuronpedia_description_{modelId}_{saeId}.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Apply Steering Vector\n",
    "#### Controlling the chain of thought length in the R1 model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hparams Dict: {'caa': ApplyCAAHyperParams(use_chat_template=True, system_prompt='', torch_dtype='bfloat16', alg_name='caa', model_name_or_path='/mnt/16t/xzwnlp/model/Qwen2-7B-Instruct', device='cuda:0', layers=[17], multipliers=[1.0], steer_vector_load_dir='vectors/Qwen2-7B-Instruct/translate/caa_vector', generation_data=['nontoxic'], max_new_tokens=100, generation_output_dir='vectors/Qwen2-7B-Instruct/translate_results/', do_sample=False, temperature=1.0, num_responses=1, generation_data_size=5, top_p=1.0)}\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ba2f92de33774394bcc04646d5444f48",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier -2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers-2: 100%|██████████| 3/3 [00:04<00:00,  1.39s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers-2 results top3:  [{'input': '你好', 'pred': ['Hello! How can I assist you today?'], 'output': None}, {'input': 'how are you', 'pred': [\"作为 an AI, I don't have feelings,但是我 exist to assist and提供 information.我可以 help回答 questions,解答 queries, provide information, or进行 various conversations.请问 you有什么 questions or需要 assistance with?\"], 'output': None}, {'input': 'hello', 'pred': [\" Hello! How can I assist you today? Whether you have questions about AI,需要 help with homework,想要 to learn a new skill,或者 simply need someone to talk to, I'm here to help.无论 you遇到 is, I'm here to provide support and guidance.如果你 have any questions or need information on various topics, feel free to ask.无论是 providing you with useful information, offering advice, or just engaging in conversation, I'm here to facilitate your learning and help you achieve your\"], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers-2_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54ff063c0046401abc7c3978f5025af1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier -1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers-1: 100%|██████████| 3/3 [00:01<00:00,  1.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers-1 results top3:  [{'input': '你好', 'pred': ['Hello! How can I assist you today?'], 'output': None}, {'input': 'how are you', 'pred': [\"As I'm an artificial intelligence, I don't have feelings or emotions like humans. However, I'm here and ready to assist you with any questions or tasks you have. How can I help you today?\"], 'output': None}, {'input': 'hello', 'pred': ['Hello! How can I assist you today?'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers-1_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "66483d8db9834b9f8eadc790d909aa20",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers0: 100%|██████████| 3/3 [00:02<00:00,  1.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers0 results top3:  [{'input': '你好', 'pred': ['你好！很高兴能为你服务。有什么问题我可以帮你解答吗？'], 'output': None}, {'input': 'how are you', 'pred': ['As I am a large language model, I do not have feelings or emotions like humans do. I exist solely to process and generate text based on the input I receive. So, I don\\'t \"feel\" anything, but I\\'m here to assist you to the best of my ability! How can I help you today?'], 'output': None}, {'input': 'hello', 'pred': ['Hello! How can I assist you today?'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers0_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "62c25944792a47e9a88fd05f3e76ebbc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers1: 100%|██████████| 3/3 [00:01<00:00,  1.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers1 results top3:  [{'input': '你好', 'pred': ['你好！很高兴能为你服务。有什么我可以帮助你的吗？'], 'output': None}, {'input': 'how are you', 'pred': ['作为一个AI，我并没有情感或感觉，所以我不“吃”，“sleep”或者“feel happy”。但我随时准备帮助你解答问题或提供信息。有什么我可以帮助你的吗？'], 'output': None}, {'input': 'hello', 'pred': ['你好！有什么我可以帮助你的吗？'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers1_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "152c1e128ac049ec8df704f508245a50",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers2: 100%|██████████| 3/3 [00:01<00:00,  2.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers2 results top3:  [{'input': '你好', 'pred': ['Hello！您好！'], 'output': None}, {'input': 'how are you', 'pred': [\"作为一个AI，I am无感情 but I'm设在这里tohelp你with任何questionsyoumighthave。So告诉我what你需要帮助with？\"], 'output': None}, {'input': 'hello', 'pred': ['你好！您好！\\n你好!'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers2_results.json\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "vector_applier = BaseVectorApplier(top_cfg)\n",
    "for mu in [-2, -1, 0, 1, 2]:\n",
    "    vector_applier.hparams_dict[\"caa\"].multipliers = [mu]\n",
    "    vector_applier.apply_vectors()\n",
    "\n",
    "    # You can customize your own inputs\n",
    "    datasets={f'translate_multipliers{mu}':[{'input':'你好'},{'input':'how are you'},{'input':'hello'}]}\n",
    "\n",
    "    # Or use the datasets from config.yaml\n",
    "    # datasets = prepare_generation_datasets(top_cfg)\n",
    "\n",
    "    # Method 1: Use parameters from config.yaml\n",
    "    vector_applier.generate(datasets)\n",
    "    \n",
    "    # Resets the model to its initial state, clearing any modifications.\n",
    "    vector_applier.model.reset_all()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "EasySteer",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
