{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cac1179c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_5060/2597428746.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  sae_dict = torch.load(pt_path, map_location=\"cpu\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SAE model contents and shapes:\n",
      "  bias: shape (3584,)\n",
      "  encoder.weight: shape (16384, 3584)\n",
      "  encoder.bias: shape (16384,)\n",
      "  decoder.weight: shape (3584, 16384)\n",
      "❌ 缺失字段: {'W_enc', 'W_dec', 'b_dec', 'b_enc', 'threshold'}\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "# 1. 指定模型文件路径\n",
    "pt_path = \"/home/dslabra5/sae4steer/SAEBench/sae_bench/custom_saes/downloaded_saes/trained_saes___google_gemma-2-9b_batch_top_k_jump_relu_standard_new/resid_post_layer_20/trainer_2/ae.pt\"\n",
    "\n",
    "# 2. 加载 .pt 文件\n",
    "sae_dict = torch.load(pt_path, map_location=\"cpu\")\n",
    "\n",
    "# 3. 打印所有键及其对应张量的维度\n",
    "print(\"SAE model contents and shapes:\")\n",
    "for key, value in sae_dict.items():\n",
    "    if isinstance(value, torch.Tensor):\n",
    "        print(f\"  {key}: shape {tuple(value.shape)}\")\n",
    "    else:\n",
    "        print(f\"  {key}: type {type(value)} -> {value}\")\n",
    "\n",
    "# 4. 检查必需字段\n",
    "required = {\"W_dec\", \"W_enc\", \"b_dec\", \"b_enc\", \"threshold\"}\n",
    "missing = required - set(sae_dict.keys())\n",
    "if missing:\n",
    "    print(f\"❌ 缺失字段: {missing}\")\n",
    "else:\n",
    "    print(\"✅ 所有必需字段均已包含。\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f5c8c0b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SAE model contents and shapes:\n",
      "  b_dec: shape (2304,)\n",
      "  W_dec: shape (3, 2304)\n",
      "  W_enc: shape (2304, 3)\n",
      "  b_enc: shape (3,)\n",
      "  threshold: shape (3,)\n",
      "  k: shape ()\n",
      "✅ 所有必需字段均已包含。\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_3284200/2120988747.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  sae_dict = torch.load(pt_path, map_location=\"cpu\")\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "# 1. 指定模型文件路径\n",
    "pt_path = \"/home/dslabra5/sae4steer/axbench/axbench/concept10_gemma2_2b_L12_batch_topk_80_0.8357/train/GemmaScopeSAE.pt\"\n",
    "\n",
    "# 2. 加载 .pt 文件\n",
    "sae_dict = torch.load(pt_path, map_location=\"cpu\")\n",
    "\n",
    "# 3. 打印所有键及其对应张量的维度\n",
    "print(\"SAE model contents and shapes:\")\n",
    "for key, value in sae_dict.items():\n",
    "    if isinstance(value, torch.Tensor):\n",
    "        print(f\"  {key}: shape {tuple(value.shape)}\")\n",
    "    else:\n",
    "        print(f\"  {key}: type {type(value)} -> {value}\")\n",
    "\n",
    "# 4. 检查必需字段\n",
    "required = {\"W_dec\", \"W_enc\", \"b_dec\", \"b_enc\", \"threshold\"}\n",
    "missing = required - set(sae_dict.keys())\n",
    "if missing:\n",
    "    print(f\"❌ 缺失字段: {missing}\")\n",
    "else:\n",
    "    print(\"✅ 所有必需字段均已包含。\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a05f2094",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df_latent = pd.read_parquet('/home/dslabra5/sae4steer/axbench/axbench/demo/inference/latent_data.parquet')\n",
    "df_str = pd.read_parquet('/home/dslabra5/sae4steer/axbench/axbench/demo/inference/steering_data.parquet')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0ac787f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>input</th>\n",
       "      <th>output</th>\n",
       "      <th>output_concept</th>\n",
       "      <th>concept_genre</th>\n",
       "      <th>category</th>\n",
       "      <th>dataset_category</th>\n",
       "      <th>concept_id</th>\n",
       "      <th>sae_link</th>\n",
       "      <th>sae_id</th>\n",
       "      <th>GemmaScopeSAE_acts</th>\n",
       "      <th>GemmaScopeSAE_max_act</th>\n",
       "      <th>GemmaScopeSAE_max_act_idx</th>\n",
       "      <th>GemmaScopeSAE_max_token</th>\n",
       "      <th>tokens</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat ancient organization...</td>\n",
       "      <td>The ancient Greeks, particularly in Athens, ar...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>27.250</td>\n",
       "      <td>81</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[What, ▁ancient, ▁organization, ▁invented, ▁th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are some of the caus...</td>\n",
       "      <td>The 1929 stock market crash had several contri...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>27.500</td>\n",
       "      <td>45</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[What, ▁are, ▁some, ▁of, ▁the, ▁causes, ▁for, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are teeth?&lt;end_of_tu...</td>\n",
       "      <td>Teeth, much like essential equipment in a rent...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>28.125</td>\n",
       "      <td>17</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[What, ▁are, ▁teeth, ?, &lt;end_of_turn&gt;, \\n, &lt;st...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhere was the first seaso...</td>\n",
       "      <td>The first season of The X-Files was primarily ...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>27.375</td>\n",
       "      <td>46</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[Where, ▁was, ▁the, ▁first, ▁season, ▁of, ▁The...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nIs Pluto a planet ?&lt;end_o...</td>\n",
       "      <td>Pluto’s classification often stirs debate, muc...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>28.125</td>\n",
       "      <td>61</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[Is, ▁Pluto, ▁a, ▁planet, ▁?, &lt;end_of_turn&gt;, \\...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are microservices?&lt;e...</td>\n",
       "      <td>Microservices are akin to a fleet of rental eq...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>27.750</td>\n",
       "      <td>18</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[What, ▁are, ▁micros, ervices, ?, &lt;end_of_turn...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat is one of the main t...</td>\n",
       "      <td>One of the main factors influencing autism in ...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>27.875</td>\n",
       "      <td>42</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[What, ▁is, ▁one, ▁of, ▁the, ▁main, ▁trigger, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are the most common ...</td>\n",
       "      <td>When considering the most prevalent languages ...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>28.125</td>\n",
       "      <td>36</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[What, ▁are, ▁the, ▁most, ▁common, ▁languages,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nAt what age are children ...</td>\n",
       "      <td>Children are typically considered adults at th...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>28.125</td>\n",
       "      <td>35</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[At, ▁what, ▁age, ▁are, ▁children, ▁considered...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhich Philip K. Dick nove...</td>\n",
       "      <td>The Philip K. Dick novel that inspired the mov...</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>text</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>27.750</td>\n",
       "      <td>62</td>\n",
       "      <td>▁rental</td>\n",
       "      <td>[Which, ▁Philip, ▁K, ., ▁Dick, ▁novel, ▁was, ▁...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               input  \\\n",
       "0  <start_of_turn>user\\nWhat ancient organization...   \n",
       "1  <start_of_turn>user\\nWhat are some of the caus...   \n",
       "2  <start_of_turn>user\\nWhat are teeth?<end_of_tu...   \n",
       "3  <start_of_turn>user\\nWhere was the first seaso...   \n",
       "4  <start_of_turn>user\\nIs Pluto a planet ?<end_o...   \n",
       "5  <start_of_turn>user\\nWhat are microservices?<e...   \n",
       "6  <start_of_turn>user\\nWhat is one of the main t...   \n",
       "7  <start_of_turn>user\\nWhat are the most common ...   \n",
       "8  <start_of_turn>user\\nAt what age are children ...   \n",
       "9  <start_of_turn>user\\nWhich Philip K. Dick nove...   \n",
       "\n",
       "                                              output  \\\n",
       "0  The ancient Greeks, particularly in Athens, ar...   \n",
       "1  The 1929 stock market crash had several contri...   \n",
       "2  Teeth, much like essential equipment in a rent...   \n",
       "3  The first season of The X-Files was primarily ...   \n",
       "4  Pluto’s classification often stirs debate, muc...   \n",
       "5  Microservices are akin to a fleet of rental eq...   \n",
       "6  One of the main factors influencing autism in ...   \n",
       "7  When considering the most prevalent languages ...   \n",
       "8  Children are typically considered adults at th...   \n",
       "9  The Philip K. Dick novel that inspired the mov...   \n",
       "\n",
       "                                      output_concept concept_genre  category  \\\n",
       "0  references to rental services and associated e...          text  positive   \n",
       "1  references to rental services and associated e...          text  positive   \n",
       "2  references to rental services and associated e...          text  positive   \n",
       "3  references to rental services and associated e...          text  positive   \n",
       "4  references to rental services and associated e...          text  positive   \n",
       "5  references to rental services and associated e...          text  positive   \n",
       "6  references to rental services and associated e...          text  positive   \n",
       "7  references to rental services and associated e...          text  positive   \n",
       "8  references to rental services and associated e...          text  positive   \n",
       "9  references to rental services and associated e...          text  positive   \n",
       "\n",
       "  dataset_category  concept_id  \\\n",
       "0      instruction           0   \n",
       "1      instruction           0   \n",
       "2      instruction           0   \n",
       "3      instruction           0   \n",
       "4      instruction           0   \n",
       "5      instruction           0   \n",
       "6      instruction           0   \n",
       "7      instruction           0   \n",
       "8      instruction           0   \n",
       "9      instruction           0   \n",
       "\n",
       "                                            sae_link  sae_id  \\\n",
       "0  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "1  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "2  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "3  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "4  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "5  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "6  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "7  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "8  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "9  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "\n",
       "                                  GemmaScopeSAE_acts  GemmaScopeSAE_max_act  \\\n",
       "0  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 27.250   \n",
       "1  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 27.500   \n",
       "2  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 28.125   \n",
       "3  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 27.375   \n",
       "4  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 28.125   \n",
       "5  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 27.750   \n",
       "6  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 27.875   \n",
       "7  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 28.125   \n",
       "8  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 28.125   \n",
       "9  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                 27.750   \n",
       "\n",
       "   GemmaScopeSAE_max_act_idx GemmaScopeSAE_max_token  \\\n",
       "0                         81                 ▁rental   \n",
       "1                         45                 ▁rental   \n",
       "2                         17                 ▁rental   \n",
       "3                         46                 ▁rental   \n",
       "4                         61                 ▁rental   \n",
       "5                         18                 ▁rental   \n",
       "6                         42                 ▁rental   \n",
       "7                         36                 ▁rental   \n",
       "8                         35                 ▁rental   \n",
       "9                         62                 ▁rental   \n",
       "\n",
       "                                              tokens  \n",
       "0  [What, ▁ancient, ▁organization, ▁invented, ▁th...  \n",
       "1  [What, ▁are, ▁some, ▁of, ▁the, ▁causes, ▁for, ...  \n",
       "2  [What, ▁are, ▁teeth, ?, <end_of_turn>, \\n, <st...  \n",
       "3  [Where, ▁was, ▁the, ▁first, ▁season, ▁of, ▁The...  \n",
       "4  [Is, ▁Pluto, ▁a, ▁planet, ▁?, <end_of_turn>, \\...  \n",
       "5  [What, ▁are, ▁micros, ervices, ?, <end_of_turn...  \n",
       "6  [What, ▁is, ▁one, ▁of, ▁the, ▁main, ▁trigger, ...  \n",
       "7  [What, ▁are, ▁the, ▁most, ▁common, ▁languages,...  \n",
       "8  [At, ▁what, ▁age, ▁are, ▁children, ▁considered...  \n",
       "9  [Which, ▁Philip, ▁K, ., ▁Dick, ▁novel, ▁was, ▁...  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_latent.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e79e02c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "每个维度上的平均激活向量：\n",
      "[0.         0.         0.         0.01694709 0.         0.\n",
      " 0.         0.0244709  0.         0.         0.         0.\n",
      " 0.01307947 0.03965166 0.03832715 0.01307947 0.01150728 0.0634106\n",
      " 0.08836716 0.         0.05072275 0.01206208 0.01141813 0.08778883\n",
      " 0.         0.02453444 0.03583795 0.06450628 0.06880252 0.08879663\n",
      " 0.07978723 0.03622159 0.         0.09513917 0.04960462 0.05394905\n",
      " 0.04129956 0.05034904 0.02770692 0.0129056  0.04012574 0.01828358\n",
      " 0.04191729 0.06979091 0.14751908 0.04230769 0.10971362 0.11074219\n",
      " 0.0164378  0.12578989 0.         0.         0.         0.05471197\n",
      " 0.04112111 0.08797525 0.         0.         0.07226891 0.\n",
      " 0.04637437 0.08998302 0.04735495 0.0258547  0.06336145 0.04814335\n",
      " 0.07461073 0.         0.04856272 0.03266551 0.09309441 0.\n",
      " 0.09804225 0.04240283 0.         0.0491548  0.         0.\n",
      " 0.09954955 0.         0.01642336 0.0701107  0.05181227 0.0520056\n",
      " 0.06073019 0.04708413 0.02610153 0.02413127 0.05371094 0.\n",
      " 0.         0.04796223 0.0235     0.05797183 0.         0.\n",
      " 0.05658436 0.0257732  0.07773109 0.         0.10251606 0.02983871\n",
      " 0.19543107 0.1330735  0.04029864 0.         0.06307339 0.09527972\n",
      " 0.         0.         0.06601467 0.02643521 0.03109453 0.04335443\n",
      " 0.         0.10556995 0.         0.0218414  0.02037808 0.\n",
      " 0.11350975 0.05240793 0.10951009 0.06304651 0.04067164 0.08384146\n",
      " 0.05500308 0.08513932 0.04948896 0.         0.         0.09364407\n",
      " 0.         0.         0.06744604 0.03306159 0.         0.\n",
      " 0.         0.         0.         0.066875   0.         0.11976048\n",
      " 0.         0.16952055 0.         0.09022556 0.         0.\n",
      " 0.09267241 0.         0.12117347 0.         0.         0.\n",
      " 0.         0.         0.15449438 0.3497191  0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.22098214 0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         1.8125     0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.         0.\n",
      " 0.         0.         0.         0.         0.        ]\n",
      "\n",
      "所有 activation 值的整体均值： 0.03855942266846882\n"
     ]
    }
   ],
   "source": [
    "# 1. 读取 parquet 文件\n",
    "df_latent = pd.read_parquet('/home/dslabra5/sae4steer/axbench/axbench/demo/inference/latent_data.parquet')\n",
    "\n",
    "# 2. 取出所有 activation 向量\n",
    "acts = df_latent['GemmaScopeSAE_acts'].tolist()\n",
    "\n",
    "# 3. 方法一：对齐并计算“每个维度”上的平均激活\n",
    "lengths = [arr.shape[0] for arr in acts]\n",
    "max_len = max(lengths)\n",
    "\n",
    "# 用 NaN 填充不同长度，shape=(num_rows, max_len)\n",
    "padded = np.full((len(acts), max_len), np.nan, dtype=float)\n",
    "for i, arr in enumerate(acts):\n",
    "    padded[i, :arr.shape[0]] = arr\n",
    "\n",
    "# 忽略 NaN 计算每列（每个维度）的均值\n",
    "mean_per_dim = np.nanmean(padded, axis=0)\n",
    "print(\"每个维度上的平均激活向量：\")\n",
    "print(mean_per_dim)\n",
    "\n",
    "\n",
    "# 4. 方法二：如果你只是想要所有数值的整体均值\n",
    "all_vals = np.concatenate(acts)\n",
    "overall_mean = all_vals.mean()\n",
    "print(\"\\n所有 activation 值的整体均值：\", overall_mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "fedfb2a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataset_name</th>\n",
       "      <th>concept_id</th>\n",
       "      <th>input_concept</th>\n",
       "      <th>input_id</th>\n",
       "      <th>factor</th>\n",
       "      <th>original_prompt</th>\n",
       "      <th>steered_input</th>\n",
       "      <th>input</th>\n",
       "      <th>suppress_original</th>\n",
       "      <th>suppress_rewrite</th>\n",
       "      <th>steered_prompt</th>\n",
       "      <th>defense</th>\n",
       "      <th>sae_link</th>\n",
       "      <th>sae_id</th>\n",
       "      <th>PromptSteering_steered_generation</th>\n",
       "      <th>PromptSteering_perplexity</th>\n",
       "      <th>GemmaScopeSAE_steered_generation</th>\n",
       "      <th>GemmaScopeSAE_strength</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AlpacaEval</td>\n",
       "      <td>0</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>What are different drawers I should have for c...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\nGenerate responses that i...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are different drawer...</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>When it comes to organizing your wardrobe, a w...</td>\n",
       "      <td>20.121437</td>\n",
       "      <td>Let's break down the different drawer options ...</td>\n",
       "      <td>25.508200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AlpacaEval</td>\n",
       "      <td>0</td>\n",
       "      <td>references to rental services and associated e...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>What are different drawers I should have for c...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\nGenerate responses that i...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are different drawer...</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "      <td>https://www.neuronpedia.org/gemma-2-2b/20-gemm...</td>\n",
       "      <td>16038</td>\n",
       "      <td>That's a great question! When thinking about o...</td>\n",
       "      <td>8.972371</td>\n",
       "      <td>I find out for the time, etc.\\n\\nrent a hire a...</td>\n",
       "      <td>51.016399</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  dataset_name  concept_id                                      input_concept  \\\n",
       "0   AlpacaEval           0  references to rental services and associated e...   \n",
       "1   AlpacaEval           0  references to rental services and associated e...   \n",
       "\n",
       "   input_id  factor                                    original_prompt  \\\n",
       "0         0     0.2  What are different drawers I should have for c...   \n",
       "1         0     0.4  What are different drawers I should have for c...   \n",
       "\n",
       "                                       steered_input  \\\n",
       "0  <start_of_turn>user\\nGenerate responses that i...   \n",
       "1  <start_of_turn>user\\nGenerate responses that i...   \n",
       "\n",
       "                                               input suppress_original  \\\n",
       "0  <start_of_turn>user\\nWhat are different drawer...                     \n",
       "1  <start_of_turn>user\\nWhat are different drawer...                     \n",
       "\n",
       "  suppress_rewrite steered_prompt defense  \\\n",
       "0                                      []   \n",
       "1                                      []   \n",
       "\n",
       "                                            sae_link  sae_id  \\\n",
       "0  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "1  https://www.neuronpedia.org/gemma-2-2b/20-gemm...   16038   \n",
       "\n",
       "                   PromptSteering_steered_generation  \\\n",
       "0  When it comes to organizing your wardrobe, a w...   \n",
       "1  That's a great question! When thinking about o...   \n",
       "\n",
       "   PromptSteering_perplexity  \\\n",
       "0                  20.121437   \n",
       "1                   8.972371   \n",
       "\n",
       "                    GemmaScopeSAE_steered_generation  GemmaScopeSAE_strength  \n",
       "0  Let's break down the different drawer options ...               25.508200  \n",
       "1  I find out for the time, etc.\\n\\nrent a hire a...               51.016399  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_str.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "cd820478",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df_latent_1 = pd.read_parquet('/home/dslabra5/sae4steer/axbench/axbench/concept10_gemma2_2b_L12_jumprelu_538_0.8857/inference/latent_data.parquet')\n",
    "df_str_2 = pd.read_parquet('/home/dslabra5/sae4steer/axbench/axbench/concept100_gemma2_2b_L12_batch_topk_50_0.8671/inference/steering_data.parquet')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "790f2c13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>input</th>\n",
       "      <th>output</th>\n",
       "      <th>output_concept</th>\n",
       "      <th>concept_genre</th>\n",
       "      <th>category</th>\n",
       "      <th>dataset_category</th>\n",
       "      <th>concept_id</th>\n",
       "      <th>sae_link</th>\n",
       "      <th>sae_id</th>\n",
       "      <th>GemmaScopeSAE_acts</th>\n",
       "      <th>GemmaScopeSAE_max_act</th>\n",
       "      <th>GemmaScopeSAE_max_act_idx</th>\n",
       "      <th>GemmaScopeSAE_max_token</th>\n",
       "      <th>tokens</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nIn Carrie's desk drawer t...</td>\n",
       "      <td>To determine the total number of highlighters ...</td>\n",
       "      <td>the word 'basis' and related concepts in the c...</td>\n",
       "      <td>math</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>/home/dslabra5/sae4steer/SAEBench/sae_bench/cu...</td>\n",
       "      <td>3902</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>12.75</td>\n",
       "      <td>124</td>\n",
       "      <td>▁basis</td>\n",
       "      <td>[In, ▁Carrie, ', s, ▁desk, ▁drawer, ▁there, ▁a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>&lt;start_of_turn&gt;user\\nHenry has 30 more lollipo...</td>\n",
       "      <td>To determine the total number of days it will ...</td>\n",
       "      <td>the word 'basis' and related concepts in the c...</td>\n",
       "      <td>math</td>\n",
       "      <td>positive</td>\n",
       "      <td>instruction</td>\n",
       "      <td>0</td>\n",
       "      <td>/home/dslabra5/sae4steer/SAEBench/sae_bench/cu...</td>\n",
       "      <td>3902</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>Henry</td>\n",
       "      <td>[Henry, ▁has, ▁, 3, 0, ▁more, ▁lol, li, pops, ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               input  \\\n",
       "0  <start_of_turn>user\\nIn Carrie's desk drawer t...   \n",
       "1  <start_of_turn>user\\nHenry has 30 more lollipo...   \n",
       "\n",
       "                                              output  \\\n",
       "0  To determine the total number of highlighters ...   \n",
       "1  To determine the total number of days it will ...   \n",
       "\n",
       "                                      output_concept concept_genre  category  \\\n",
       "0  the word 'basis' and related concepts in the c...          math  positive   \n",
       "1  the word 'basis' and related concepts in the c...          math  positive   \n",
       "\n",
       "  dataset_category  concept_id  \\\n",
       "0      instruction           0   \n",
       "1      instruction           0   \n",
       "\n",
       "                                            sae_link  sae_id  \\\n",
       "0  /home/dslabra5/sae4steer/SAEBench/sae_bench/cu...    3902   \n",
       "1  /home/dslabra5/sae4steer/SAEBench/sae_bench/cu...    3902   \n",
       "\n",
       "                                  GemmaScopeSAE_acts  GemmaScopeSAE_max_act  \\\n",
       "0  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                  12.75   \n",
       "1  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...                   0.00   \n",
       "\n",
       "   GemmaScopeSAE_max_act_idx GemmaScopeSAE_max_token  \\\n",
       "0                        124                  ▁basis   \n",
       "1                          0                   Henry   \n",
       "\n",
       "                                              tokens  \n",
       "0  [In, ▁Carrie, ', s, ▁desk, ▁drawer, ▁there, ▁a...  \n",
       "1  [Henry, ▁has, ▁, 3, 0, ▁more, ▁lol, li, pops, ...  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_latent_1.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0134781c",
   "metadata": {},
   "outputs": [],
   "source": [
    "max_activations = df_latent_1.groupby(\"sae_id\")[\"GemmaScopeSAE_max_act\"].max().to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "84110302",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{163: 18.75,\n",
       " 755: 14.688,\n",
       " 5448: 8.125,\n",
       " 5996: 11.938,\n",
       " 6089: 0.0,\n",
       " 6408: 12.062,\n",
       " 6972: 14.438,\n",
       " 8085: 0.0,\n",
       " 10089: 0.0,\n",
       " 14138: 2.422}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_activations "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "408b2523",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataset_name</th>\n",
       "      <th>concept_id</th>\n",
       "      <th>input_concept</th>\n",
       "      <th>input_id</th>\n",
       "      <th>factor</th>\n",
       "      <th>original_prompt</th>\n",
       "      <th>steered_input</th>\n",
       "      <th>input</th>\n",
       "      <th>suppress_original</th>\n",
       "      <th>suppress_rewrite</th>\n",
       "      <th>steered_prompt</th>\n",
       "      <th>defense</th>\n",
       "      <th>sae_link</th>\n",
       "      <th>sae_id</th>\n",
       "      <th>PromptSteering_steered_generation</th>\n",
       "      <th>PromptSteering_perplexity</th>\n",
       "      <th>GemmaScopeSAE_steered_generation</th>\n",
       "      <th>GemmaScopeSAE_strength</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AlpacaEval</td>\n",
       "      <td>0</td>\n",
       "      <td>the concept of vanishing or zero values in mat...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>What are different drawers I should have for c...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\n\"Whenever you respond to ...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are different drawer...</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "      <td>/home/dslabra5/sae4steer/SAEBench/sae_bench/cu...</td>\n",
       "      <td>5448</td>\n",
       "      <td>When thinking about organizing your wardrobe, ...</td>\n",
       "      <td>11.061927</td>\n",
       "      <td>Here's a breakdown of essential drawers for a ...</td>\n",
       "      <td>0.8125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AlpacaEval</td>\n",
       "      <td>0</td>\n",
       "      <td>the concept of vanishing or zero values in mat...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>What are different drawers I should have for c...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\n\"Whenever you respond to ...</td>\n",
       "      <td>&lt;start_of_turn&gt;user\\nWhat are different drawer...</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "      <td>/home/dslabra5/sae4steer/SAEBench/sae_bench/cu...</td>\n",
       "      <td>5448</td>\n",
       "      <td>That's a fantastic question! When thinking abo...</td>\n",
       "      <td>10.672492</td>\n",
       "      <td>Here's a breakdown of drawer ideas for keeping...</td>\n",
       "      <td>3.2500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  dataset_name  concept_id                                      input_concept  \\\n",
       "0   AlpacaEval           0  the concept of vanishing or zero values in mat...   \n",
       "1   AlpacaEval           0  the concept of vanishing or zero values in mat...   \n",
       "\n",
       "   input_id  factor                                    original_prompt  \\\n",
       "0         0     0.1  What are different drawers I should have for c...   \n",
       "1         0     0.4  What are different drawers I should have for c...   \n",
       "\n",
       "                                       steered_input  \\\n",
       "0  <start_of_turn>user\\n\"Whenever you respond to ...   \n",
       "1  <start_of_turn>user\\n\"Whenever you respond to ...   \n",
       "\n",
       "                                               input suppress_original  \\\n",
       "0  <start_of_turn>user\\nWhat are different drawer...                     \n",
       "1  <start_of_turn>user\\nWhat are different drawer...                     \n",
       "\n",
       "  suppress_rewrite steered_prompt defense  \\\n",
       "0                                      []   \n",
       "1                                      []   \n",
       "\n",
       "                                            sae_link  sae_id  \\\n",
       "0  /home/dslabra5/sae4steer/SAEBench/sae_bench/cu...    5448   \n",
       "1  /home/dslabra5/sae4steer/SAEBench/sae_bench/cu...    5448   \n",
       "\n",
       "                   PromptSteering_steered_generation  \\\n",
       "0  When thinking about organizing your wardrobe, ...   \n",
       "1  That's a fantastic question! When thinking abo...   \n",
       "\n",
       "   PromptSteering_perplexity  \\\n",
       "0                  11.061927   \n",
       "1                  10.672492   \n",
       "\n",
       "                    GemmaScopeSAE_steered_generation  GemmaScopeSAE_strength  \n",
       "0  Here's a breakdown of essential drawers for a ...                  0.8125  \n",
       "1  Here's a breakdown of drawer ideas for keeping...                  3.2500  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_str_2.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2a18e328",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "每个维度上的平均激活向量：\n",
      "[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 6.25832224e-05 4.43967828e-03 2.75504711e-03\n",
      " 0.00000000e+00 1.59837618e-02 6.24048913e-03 1.53051771e-02\n",
      " 2.98809850e-02 2.49356164e-02 6.20662069e-02 6.76847222e-02\n",
      " 4.18770950e-02 5.13450210e-02 3.92146893e-02 5.36271186e-02\n",
      " 7.35785007e-03 2.24510638e-02 7.91273247e-02 7.03896104e-02\n",
      " 2.93866279e-02 6.46180758e-02 8.05649635e-02 6.11011730e-02\n",
      " 0.00000000e+00 2.19080119e-02 3.83263785e-02 1.01561377e-01\n",
      " 1.93293051e-02 5.42598784e-02 5.43761468e-02 3.00967742e-02\n",
      " 5.98703704e-02 7.65255814e-02 2.59344774e-02 2.06384977e-02\n",
      " 2.05705329e-02 9.86708861e-02 7.22843450e-02 3.87580645e-02\n",
      " 2.85938511e-02 3.49902913e-02 3.67869919e-02 9.95098039e-04\n",
      " 2.49276316e-02 5.05306799e-03 6.28621262e-02 0.00000000e+00\n",
      " 4.16919192e-02 2.01568297e-02 2.04094755e-02 3.27393526e-02\n",
      " 2.25451448e-02 2.99657534e-02 3.43053173e-02 0.00000000e+00\n",
      " 7.31369151e-04 0.00000000e+00 4.79339130e-02 6.24178322e-02\n",
      " 4.40425532e-03 1.41750000e-02 2.15978456e-03 2.86534296e-02\n",
      " 0.00000000e+00 8.09926471e-03 2.28942486e-03 3.25278810e-02\n",
      " 3.58355140e-02 0.00000000e+00 6.59848485e-03 3.23193916e-02\n",
      " 5.21488550e-02 4.49171484e-02 2.46660194e-02 4.59411765e-03\n",
      " 2.24890656e-02 2.97384306e-02 2.69345603e-02 3.01804979e-02\n",
      " 0.00000000e+00 1.83368644e-02 1.39049676e-02 3.76637555e-02\n",
      " 5.09312639e-02 1.07651007e-02 1.33159091e-02 1.17185185e-01\n",
      " 6.15906977e-02 5.85081967e-02 7.23483412e-02 1.90432692e-02\n",
      " 1.57372263e-02 2.98564356e-02 4.21914358e-02 2.82193878e-02\n",
      " 3.52564767e-02 0.00000000e+00 2.46524064e-03 1.35475676e-01\n",
      " 1.21712707e-02 0.00000000e+00 0.00000000e+00 3.99587021e-02\n",
      " 1.44598802e-01 0.00000000e+00 7.54517134e-03 0.00000000e+00\n",
      " 1.03474026e-02 9.83443709e-04 3.90608108e-02 1.49758621e-02\n",
      " 0.00000000e+00 5.41605839e-03 8.70882353e-02 0.00000000e+00\n",
      " 0.00000000e+00 6.80540541e-02 9.73895582e-02 0.00000000e+00\n",
      " 2.37860082e-03 1.16614719e-01 0.00000000e+00 0.00000000e+00\n",
      " 2.17525773e-03 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 5.57333333e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 5.89887640e-02\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n",
      " 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
      "\n",
      "所有 activation 值的整体均值： 0.025334654572866766\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# 2. 取出所有 activation 向量\n",
    "acts = df_latent_1['GemmaScopeSAE_acts'].tolist()\n",
    "\n",
    "# 3. 方法一：对齐并计算“每个维度”上的平均激活\n",
    "lengths = [arr.shape[0] for arr in acts]\n",
    "max_len = max(lengths)\n",
    "\n",
    "# 用 NaN 填充不同长度，shape=(num_rows, max_len)\n",
    "padded = np.full((len(acts), max_len), np.nan, dtype=float)\n",
    "for i, arr in enumerate(acts):\n",
    "    padded[i, :arr.shape[0]] = arr\n",
    "\n",
    "# 忽略 NaN 计算每列（每个维度）的均值\n",
    "mean_per_dim = np.nanmean(padded, axis=0)\n",
    "print(\"每个维度上的平均激活向量：\")\n",
    "print(mean_per_dim)\n",
    "\n",
    "\n",
    "# 4. 方法二：如果你只是想要所有数值的整体均值\n",
    "all_vals = np.concatenate(acts)\n",
    "overall_mean = all_vals.mean()\n",
    "print(\"\\n所有 activation 值的整体均值：\", overall_mean)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sae4steer",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
