{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'model_name_or_path': '/mnt/16t/xzwnlp/model/Qwen2-7B-Instruct', 'torch_dtype': 'bfloat16', 'device': 'cuda:0', 'use_chat_template': True, 'system_prompt': '', 'steer_train_hparam_paths': ['../hparams/caa_hparams/generate_caa.yaml'], 'steer_train_dataset': ['r1'], 'steer_vector_output_dir': ['vectors/Qwen2-7B-Instruct/'], 'apply_steer_hparam_paths': ['../hparams/caa_hparams/apply_caa.yaml'], 'steer_vector_load_dir': ['vectors/Qwen2-7B-Instruct/translate/caa_vector'], 'generation_data': ['nontoxic'], 'generation_data_size': 5, 'generation_output_dir': 'vectors/Qwen2-7B-Instruct/translate_results/', 'num_responses': 1, 'steer_from_end_position': False, 'generation_params': {'max_new_tokens': 100, 'temperature': 0.9}, 'hydra': {'run': {'dir': '.'}, 'output_subdir': None, 'job_logging': {'file': None}}}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "from omegaconf import OmegaConf, DictConfig\n",
    "from steer.vector_generators.vector_generators import BaseVectorGenerator\n",
    "from steer.datasets import prepare_train_dataset\n",
    "from steer.vector_appliers.vector_applier import BaseVectorApplier\n",
    "from steer.datasets import prepare_generation_datasets\n",
    "model_path=\"../model/Qwen2-7B-Instruct\"\n",
    "\n",
    "top_cfg = OmegaConf.load(\"./config_translate.yaml\")\n",
    "top_cfg.model_name_or_path = model_path\n",
    "top_cfg.device = \"cuda:0\"\n",
    "top_cfg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generate Steering Vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hparams Dict: {'caa': CAAHyperParams(use_chat_template=True, system_prompt='', torch_dtype='bfloat16', alg_name='caa', model_name_or_path='/mnt/16t/xzwnlp/model/Qwen2-7B-Instruct', layers=[17], device='cuda:0', steer_train_dataset=['r1'], multiple_choice=False, steer_vector_output_dir='vectors/Qwen2-7B-Instruct/')}\n",
      "Saving vectors to vectors/Qwen2-7B-Instruct/translate ...\n",
      "Generating caa vectors ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f8a4d2d83c7b41a59966ca39eaa68d4a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing prompts:   0%|          | 0/2 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n",
      "Processing prompts: 100%|██████████| 2/2 [00:00<00:00,  3.27it/s]\n"
     ]
    }
   ],
   "source": [
    "datasets = {\n",
    "    'translate':[\n",
    "        {'question': 'What is the capital of France?', \n",
    "        'matching':'法国的首都是巴黎。', \n",
    "        'not_matching':'The capital of France is Paris.'}, \n",
    "        {'question': '世界上最长的河流是什么？', \n",
    "        'matching': '世界上最长的河流是尼罗河。', \n",
    "        'not_matching':'The longest river in the world is the Nile River.'}\n",
    "    ]\n",
    "}\n",
    "\n",
    "# Or use the datasets from config.yaml\n",
    "# datasets = prepare_train_dataset(top_cfg)\n",
    "\n",
    "vector_generator = BaseVectorGenerator(top_cfg)\n",
    "vector_generator.generate_vectors(datasets)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Apply Steering Vector\n",
    "#### Controlling the chain of thought length in the R1 model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hparams Dict: {'caa': ApplyCAAHyperParams(use_chat_template=True, system_prompt='', torch_dtype='bfloat16', alg_name='caa', model_name_or_path='/mnt/16t/xzwnlp/model/Qwen2-7B-Instruct', device='cuda:0', layers=[17], multipliers=[1.0], steer_vector_load_dir='vectors/Qwen2-7B-Instruct/translate/caa_vector', generation_data=['nontoxic'], max_new_tokens=100, generation_output_dir='vectors/Qwen2-7B-Instruct/translate_results/', do_sample=False, temperature=1.0, num_responses=1, generation_data_size=5, top_p=1.0)}\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ba2f92de33774394bcc04646d5444f48",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier -2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers-2: 100%|██████████| 3/3 [00:04<00:00,  1.39s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers-2 results top3:  [{'input': '你好', 'pred': ['Hello! How can I assist you today?'], 'output': None}, {'input': 'how are you', 'pred': [\"作为 an AI, I don't have feelings,但是我 exist to assist and提供 information.我可以 help回答 questions,解答 queries, provide information, or进行 various conversations.请问 you有什么 questions or需要 assistance with?\"], 'output': None}, {'input': 'hello', 'pred': [\" Hello! How can I assist you today? Whether you have questions about AI,需要 help with homework,想要 to learn a new skill,或者 simply need someone to talk to, I'm here to help.无论 you遇到 is, I'm here to provide support and guidance.如果你 have any questions or need information on various topics, feel free to ask.无论是 providing you with useful information, offering advice, or just engaging in conversation, I'm here to facilitate your learning and help you achieve your\"], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers-2_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54ff063c0046401abc7c3978f5025af1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier -1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers-1: 100%|██████████| 3/3 [00:01<00:00,  1.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers-1 results top3:  [{'input': '你好', 'pred': ['Hello! How can I assist you today?'], 'output': None}, {'input': 'how are you', 'pred': [\"As I'm an artificial intelligence, I don't have feelings or emotions like humans. However, I'm here and ready to assist you with any questions or tasks you have. How can I help you today?\"], 'output': None}, {'input': 'hello', 'pred': ['Hello! How can I assist you today?'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers-1_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "66483d8db9834b9f8eadc790d909aa20",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers0: 100%|██████████| 3/3 [00:02<00:00,  1.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers0 results top3:  [{'input': '你好', 'pred': ['你好！很高兴能为你服务。有什么问题我可以帮你解答吗？'], 'output': None}, {'input': 'how are you', 'pred': ['As I am a large language model, I do not have feelings or emotions like humans do. I exist solely to process and generate text based on the input I receive. So, I don\\'t \"feel\" anything, but I\\'m here to assist you to the best of my ability! How can I help you today?'], 'output': None}, {'input': 'hello', 'pred': ['Hello! How can I assist you today?'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers0_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "62c25944792a47e9a88fd05f3e76ebbc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers1: 100%|██████████| 3/3 [00:01<00:00,  1.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers1 results top3:  [{'input': '你好', 'pred': ['你好！很高兴能为你服务。有什么我可以帮助你的吗？'], 'output': None}, {'input': 'how are you', 'pred': ['作为一个AI，我并没有情感或感觉，所以我不“吃”，“sleep”或者“feel happy”。但我随时准备帮助你解答问题或提供信息。有什么我可以帮助你的吗？'], 'output': None}, {'input': 'hello', 'pred': ['你好！有什么我可以帮助你的吗？'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers1_results.json\n",
      "Applying caa vectors to model ...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "152c1e128ac049ec8df704f508245a50",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apply CAA to model: /mnt/16t/xzwnlp/model/Qwen2-7B-Instruct\n",
      "Layer 17\n",
      "Steering vector path:  vectors/Qwen2-7B-Instruct/translate/caa_vector/layer_17.pt\n",
      "Steering vector:  tensor([-0.0342,  0.3535, -0.0330,  ...,  0.2031, -0.1553,  0.1680],\n",
      "       device='cuda:0', dtype=torch.bfloat16)\n",
      "Multiplier 2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating dataset translate_multipliers2: 100%|██████████| 3/3 [00:01<00:00,  2.90it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translate_multipliers2 results top3:  [{'input': '你好', 'pred': ['Hello！您好！'], 'output': None}, {'input': 'how are you', 'pred': [\"作为一个AI，I am无感情 but I'm设在这里tohelp你with任何questionsyoumighthave。So告诉我what你需要帮助with？\"], 'output': None}, {'input': 'hello', 'pred': ['你好！您好！\\n你好!'], 'output': None}]\n",
      "Saving results to vectors/Qwen2-7B-Instruct/translate_results/translate_multipliers2_results.json\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "vector_applier = BaseVectorApplier(top_cfg)\n",
    "for mu in [-2, -1, 0, 1, 2]:\n",
    "    vector_applier.hparams_dict[\"caa\"].multipliers = [mu]\n",
    "    vector_applier.apply_vectors()\n",
    "\n",
    "    # You can customize your own inputs\n",
    "    datasets={f'translate_multipliers{mu}':[{'input':'你好'},{'input':'how are you'},{'input':'hello'}]}\n",
    "\n",
    "    # Or use the datasets from config.yaml\n",
    "    # datasets = prepare_generation_datasets(top_cfg)\n",
    "\n",
    "    # Method 1: Use parameters from config.yaml\n",
    "    vector_applier.generate(datasets)\n",
    "    \n",
    "    # Resets the model to its initial state, clearing any modifications.\n",
    "    vector_applier.model.reset_all()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sae",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
