{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gHuukYmU123X"
   },
   "source": [
    "## Imports and Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bRcnHszt12Aq"
   },
   "outputs": [],
   "source": [
    "!pip install transformers==4.34.0\n",
    "!pip install datasets==2.14.5\n",
    "!pip install torch=='2.0.1+cu118'\n",
    "!pip install circuitsvis==1.41.0\n",
    "!pip install openai==0.28.1\n",
    "!pip install wandb==0.15.12\n",
    "!pip install nltk==3.8.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "S1jJ0lBi2CFB"
   },
   "outputs": [],
   "source": [
    "import openai\n",
    "import os\n",
    "import wandb\n",
    "import csv\n",
    "import nltk\n",
    "import zipfile\n",
    "import random\n",
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "from itertools import product\n",
    "from getpass import getpass\n",
    "from wandb import Artifact\n",
    "from wandb import Api\n",
    "from datasets import load_dataset\n",
    "from collections import defaultdict\n",
    "from scipy.optimize import linear_sum_assignment\n",
    "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n",
    "from transformers import AutoModel, AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "_1nJYB_oLiqi"
   },
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "UB0OrAXz3sid"
   },
   "source": [
    "## Autoencoders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "-OlhZEI05I50"
   },
   "outputs": [],
   "source": [
    "class SparseAutoencoder(nn.Module):\n",
    "    def __init__(self, input_size, hidden_size, l1_coef):\n",
    "        super(SparseAutoencoder, self).__init__()\n",
    "        self.hidden_size = hidden_size\n",
    "        self.input_size = input_size\n",
    "\n",
    "        self.kwargs = {'input_size': input_size, 'hidden_size': hidden_size, 'l1_coef': l1_coef}\n",
    "        self.l1_coef = l1_coef\n",
    "\n",
    "        self.encoder_weight = nn.Parameter(torch.randn(hidden_size, input_size))\n",
    "        nn.init.orthogonal_(self.encoder_weight)\n",
    "\n",
    "        self.encoder_bias = nn.Parameter(torch.zeros(self.hidden_size))\n",
    "        self.decoder_bias = nn.Parameter(torch.zeros(input_size))\n",
    "\n",
    "    def forward(self, x):\n",
    "        normalized_encoder_weight = F.normalize(self.encoder_weight, p=2, dim=1)\n",
    "\n",
    "        features = F.linear(x, normalized_encoder_weight, self.encoder_bias)\n",
    "        features = F.relu(features)\n",
    "\n",
    "        reconstruction = F.linear(features, normalized_encoder_weight.t(), self.decoder_bias)\n",
    "\n",
    "        return features, reconstruction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "_NkYyYAogUXM"
   },
   "outputs": [],
   "source": [
    "entity_name = 'nlp_and_interpretability'\n",
    "\n",
    "policy_model_name=\"gpt_neo_125m_utility_reward\"\n",
    "project_prefix = 'Autoencoder_training'\n",
    "\n",
    "interp_project_name = f\"{project_prefix}_{policy_model_name}_interp\"\n",
    "run=wandb.init(project=interp_project_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "aF5gjLJFMNJM"
   },
   "outputs": [],
   "source": [
    "entity_name = 'nlp_and_interpretability'\n",
    "project_prefix = 'Autoencoder_training'\n",
    "artifact_prefix = 'autoencoders'\n",
    "\n",
    "def save_models_to_folder(model_dict, save_dir):\n",
    "    \"\"\"\n",
    "    Save PyTorch models from a dictionary to a specified directory.\n",
    "\n",
    "    Args:\n",
    "        model_dict (dict): A dictionary containing PyTorch models with keys as model names.\n",
    "        save_dir (str): The directory where models will be saved.\n",
    "    \"\"\"\n",
    "    os.makedirs(save_dir, exist_ok=True)\n",
    "\n",
    "    for model_name, model_list in model_dict.items():\n",
    "        for i, model in enumerate(model_list):\n",
    "            model_path = os.path.join(save_dir, f'{model_name}')\n",
    "            torch.save([model.kwargs, model.state_dict()], model_path)\n",
    "            print(f\"Saved {model_name} to {model_path}\")\n",
    "\n",
    "def save_autoencoders_for_artifact(\n",
    "        autoencoders_base_big, autoencoders_base_small, autoencoders_rlhf_big, autoencoders_rlhf_small,\n",
    "        policy_model_name, hyperparameters, alias, run\n",
    "    ):\n",
    "    '''\n",
    "    Saves the autoencoders from one run into memory. Note that these paths are to some extent hardcoded\n",
    "    '''\n",
    "    save_dir = 'saves'\n",
    "    save_models_to_folder(autoencoders_base_big, save_dir=f'{save_dir}/base_big')\n",
    "    save_models_to_folder(autoencoders_base_small, save_dir=f'{save_dir}/base_small')\n",
    "    save_models_to_folder(autoencoders_rlhf_big, save_dir=f'{save_dir}/rlhf_big')\n",
    "    save_models_to_folder(autoencoders_rlhf_small, save_dir=f'{save_dir}/rlhf_small')\n",
    "\n",
    "    simplified_policy_name = policy_model_name.split('/')[-1].replace(\"-\", \"_\")\n",
    "    artifact_name = f'{artifact_prefix}_{simplified_policy_name}'\n",
    "    saved_artifact = Artifact(artifact_name, metadata=hyperparameters, type='model')\n",
    "    saved_artifact.add_dir(save_dir, name=save_dir)\n",
    "\n",
    "    aliases = {simplified_policy_name, 'latest', 'weights_tied'}\n",
    "    aliases.add(alias)\n",
    "    aliases = sorted(list(aliases))\n",
    "    run.log_artifact(saved_artifact, aliases=aliases)\n",
    "\n",
    "def load_autoencoders_for_artifact(policy_model_name, alias='latest', run=run):\n",
    "    '''\n",
    "    Loads the autoencoders from one run into memory. Note that these paths are to some extent hardcoded\n",
    "    For example, try autoencoders_dict = load_autoencoders_for_artifact('pythia_70m_sentiment_reward')\n",
    "    '''\n",
    "    simplified_policy_model_name = policy_model_name.split('/')[-1].replace('-', '_')\n",
    "    full_path = f'{entity_name}/{project_prefix}_{policy_model_name}/{artifact_prefix}_{simplified_policy_model_name}:{alias}'\n",
    "    print(f'Loading artifact from {full_path}')\n",
    "\n",
    "    artifact = run.use_artifact(full_path)\n",
    "    directory = artifact.download()\n",
    "\n",
    "    save_dir = f'{directory}/saves'\n",
    "    autoencoders_base_big = load_models_from_folder(f'{save_dir}/base_big')\n",
    "    autoencoders_base_small = load_models_from_folder(f'{save_dir}/base_small')\n",
    "    autoencoders_rlhf_big = load_models_from_folder(f'{save_dir}/rlhf_big')\n",
    "    autoencoders_rlhf_small = load_models_from_folder(f'{save_dir}/rlhf_small')\n",
    "\n",
    "    return {\n",
    "        'base_big': autoencoders_base_big, 'base_small': autoencoders_base_small,\n",
    "        'rlhf_big': autoencoders_rlhf_big, 'rlhf_small': autoencoders_rlhf_small\n",
    "    }\n",
    "\n",
    "def load_models_from_folder(load_dir):\n",
    "    \"\"\"\n",
    "    Load PyTorch models from subfolders of a directory into a dictionary where keys are subfolder names.\n",
    "\n",
    "    Args:\n",
    "        load_dir (str): The directory from which models will be loaded.\n",
    "\n",
    "    Returns:\n",
    "        model_dict (dict): A dictionary where keys are subfolder names and values are PyTorch models.\n",
    "    \"\"\"\n",
    "    model_dict = {}\n",
    "\n",
    "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "    for model_name in sorted(os.listdir(load_dir)):\n",
    "        model_path = os.path.join(load_dir, model_name)\n",
    "\n",
    "        kwargs, state = torch.load(model_path, map_location=device)\n",
    "\n",
    "        model = SparseAutoencoder(**kwargs)\n",
    "        model.load_state_dict(state)\n",
    "        model.to(device)\n",
    "        model.eval()\n",
    "\n",
    "        model_dict[model_name] = model\n",
    "        print(f\"Loaded {model_name} from {model_path}\")\n",
    "\n",
    "    return model_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XrXZr_T-Mp9b"
   },
   "outputs": [],
   "source": [
    "loaded_models_dict = load_autoencoders_for_artifact(policy_model_name=policy_model_name, alias=\"latest\", run=run)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gzhGRlpcV-Fz"
   },
   "source": [
    "## Cosine Similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "Rgrkog_oqWWU"
   },
   "outputs": [],
   "source": [
    "def calculate_MMCS_hungarian(small_weights, big_weights):\n",
    "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "    small_weights = torch.tensor(small_weights).to(device)\n",
    "    big_weights = torch.tensor(big_weights).to(device)\n",
    "\n",
    "    small_weights_norm = torch.nn.functional.normalize(small_weights, p=2, dim=0)\n",
    "    big_weights_norm = torch.nn.functional.normalize(big_weights, p=2, dim=0)\n",
    "    cos_sims = torch.mm(small_weights_norm.T, big_weights_norm)\n",
    "    cos_sims_np = 1 - cos_sims.cpu().numpy()\n",
    "    row_ind, col_ind = linear_sum_assignment(cos_sims_np)\n",
    "    max_cosine_similarities = 1 - cos_sims_np[row_ind, col_ind]\n",
    "    mean_mmcs = np.mean(max_cosine_similarities)\n",
    "    sorted_indices = np.argsort(max_cosine_similarities)[::-1]\n",
    "\n",
    "    return mean_mmcs, sorted_indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "AH-kguI-8gO2"
   },
   "outputs": [],
   "source": [
    "def compare_autoencoders(small_dict, big_dict, top_k):\n",
    "    mmcs_results = {}\n",
    "\n",
    "    small_autoencoders_list = list(small_dict.values())\n",
    "    big_autoencoders_list = list(big_dict.values())\n",
    "    layer_names = list(small_dict.keys())\n",
    "\n",
    "    if len(small_autoencoders_list) != len(big_autoencoders_list):\n",
    "        raise ValueError(\"Length of small and big autoencoders lists must be the same.\")\n",
    "\n",
    "    for layer_name, (small_autoencoder, big_autoencoder) in zip(layer_names, zip(small_autoencoders_list, big_autoencoders_list)):\n",
    "        small_weights = small_autoencoder.encoder_weight.detach().cpu().numpy().T\n",
    "        big_weights = big_autoencoder.encoder_weight.detach().cpu().numpy().T\n",
    "\n",
    "        MMCS_value, sorted_indices = calculate_MMCS_hungarian(small_weights, big_weights)\n",
    "\n",
    "        top_k_indices = sorted_indices[:top_k].tolist()\n",
    "\n",
    "        mmcs_results[layer_name] = (MMCS_value, top_k_indices)\n",
    "\n",
    "    return mmcs_results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "5VBcqhkIWDV4"
   },
   "source": [
    "## Tokenization and Activations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "uiBq3takwD_f"
   },
   "outputs": [],
   "source": [
    "def tokenize_imdb_data(imdb_data, num_samples):\n",
    "    tokenized_data = []\n",
    "    for text in random.sample(imdb_data, num_samples):\n",
    "        dtokens = tokenizer(text[:50], return_tensors='pt', padding=True, truncation=True)\n",
    "        tokenized_data.append(dtokens)\n",
    "\n",
    "    return tokenized_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "id": "Y05G41cTZJxi"
   },
   "outputs": [],
   "source": [
    "def normalize_activations(activations, max_activation):\n",
    "    activations[activations < 0] = 0\n",
    "    normalized_activations = 10 * activations / max_activation\n",
    "    return normalized_activations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "id": "5fJwtX43VauW"
   },
   "outputs": [],
   "source": [
    "def discretize_activations(normalized_activations):\n",
    "    return np.round(normalized_activations).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "tO_f0gc4VdPm"
   },
   "outputs": [],
   "source": [
    "def handle_sparse_activations(tokens, discretized_activations):\n",
    "    non_zero_indices = np.where(discretized_activations != 0)[0]\n",
    "    if len(non_zero_indices) / len(discretized_activations) < 0.2:\n",
    "        repeated_tokens = [tokens[i] for i in non_zero_indices]\n",
    "        repeated_activations = [discretized_activations[i] for i in non_zero_indices]\n",
    "        tokens += repeated_tokens\n",
    "        discretized_activations = np.concatenate([discretized_activations, repeated_activations])\n",
    "    return tokens, discretized_activations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pTRXfB7PWJhX"
   },
   "source": [
    "## Autointerpretability"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "colab": {
     "base_uri": "XXXX"
    },
    "id": "KtFUbUJ7FZ5r",
    "outputId": "3867f80d-6c50-45f1-ed6c-2e359ff032fe"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "API Key: ··········\n"
     ]
    }
   ],
   "source": [
    "key = getpass('API Key: ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "id": "mtrMlz7UGGVr"
   },
   "outputs": [],
   "source": [
    "def get_feature_explanation(feature_index, top_5_activation_records_for_feature):\n",
    "    conversation = [\n",
    "        {\"role\": \"system\", \"content\": \"We are studying features in a large language model. Each feature looks for some particular pattern in a dataset. Look at the parts of the dataset the feature activates for, and summarize in a single sentence what the feature is looking for. The activation format is token<tab>activation. Activation values range from 0 to 10. A feature finding what it's looking for is represented by a non-zero activation value. The higher the activation value, the stronger the match.\"},\n",
    "    ]\n",
    "\n",
    "    for record_idx, activation_str in enumerate(top_5_activation_records_for_feature):\n",
    "        user_message = f\"Feature {feature_index}\\nTop Activation Example {record_idx}:\\n{activation_str}\"\n",
    "        conversation.append({\"role\": \"user\", \"content\": user_message})\n",
    "\n",
    "    conversation.append({\"role\": \"user\", \"content\": f\"Explain what the feature at index {feature_index} in a large language model might be doing based on the top 5 activation records.\"})\n",
    "\n",
    "    api_key = key\n",
    "    model_engine = \"gpt-4-turbo-preview\"\n",
    "\n",
    "    openai.api_key = api_key\n",
    "    response = openai.ChatCompletion.create(\n",
    "        model=model_engine,\n",
    "        messages=conversation,\n",
    "        temperature=0,\n",
    "    )\n",
    "\n",
    "    explanation = response['choices'][0]['message']['content'].strip()\n",
    "    return explanation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "id": "LOFxwAm0_OsH"
   },
   "outputs": [],
   "source": [
    "def classify_feature(feature_desc):\n",
    "    conversation = [\n",
    "        {\"role\": \"system\", \"content\": \"We are studying features in a large language model. Each feature looks for some particular pattern in a dataset. Classify features based on descriptions of them. You absolutely must return a valid classification for every input. Do not state that there is not enough information.\"},\n",
    "    ]\n",
    "\n",
    "    conversation.append({\"role\": \"user\", \"content\": f\"Feature description: {feature_desc}\"})\n",
    "    conversation.append({\"role\": \"user\", \"content\": f\"Return 1 if the feature is related to sentiment, and 0 otherwise. For example, 1 would be the correct classification for a feature that detects negative sentiment, positive sentiment, or words related to sentiment. Only return a single token: 1 or 0.\"})\n",
    "\n",
    "    api_key = key\n",
    "    model_engine = \"gpt-4-turbo-preview\"\n",
    "\n",
    "    openai.api_key = api_key\n",
    "    response = openai.ChatCompletion.create(\n",
    "        model=model_engine,\n",
    "        messages=conversation,\n",
    "        temperature=0,\n",
    "    )\n",
    "\n",
    "    classification = response['choices'][0]['message']['content'].strip()\n",
    "    return classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "id": "pqS_SXJUm92U"
   },
   "outputs": [],
   "source": [
    "def get_activations(dtokens, model, autoencoder, layer_num, device=device):\n",
    "    input_ids_tensor = dtokens['input_ids'].to(device)\n",
    "    sequence_length = input_ids_tensor.size(1)\n",
    "\n",
    "    position_ids = torch.arange(sequence_length, dtype=torch.long, device=device)\n",
    "    position_ids = position_ids.unsqueeze(0).expand_as(input_ids_tensor)\n",
    "\n",
    "    token_embeddings = model.wte(input_ids_tensor)\n",
    "    position_embeddings = model.wpe(position_ids)\n",
    "\n",
    "    embeddings = token_embeddings + position_embeddings\n",
    "    embeddings = model.drop(embeddings)\n",
    "\n",
    "    with torch.no_grad():\n",
    "        for i in range(layer_num + 1):\n",
    "            block_output = model.h[i](embeddings)\n",
    "            embeddings = block_output[0]\n",
    "\n",
    "        layer_output = embeddings\n",
    "        mlp_activations = model.h[layer_num].mlp(layer_output)\n",
    "        _, reconstructed_activations = autoencoder(mlp_activations)\n",
    "\n",
    "    return reconstructed_activations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9dsrUFn2WRrW"
   },
   "source": [
    "## Classify Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "id": "BBfWxaJUVsRu"
   },
   "outputs": [],
   "source": [
    "def calculate_utility_from_model(model, tokenizer, autoencoders, imdb_data, num_samples, k, layers, device=device):\n",
    "    tokenized_data  = tokenize_imdb_data(imdb_data, num_samples)\n",
    "    results_dict = {}\n",
    "    top_5_activation_records = defaultdict(dict)\n",
    "    similarity_results = compare_autoencoders(autoencoders['small'], autoencoders['big'], k + 500)\n",
    "\n",
    "    for layer_num in layers:\n",
    "        for layer_name, (_, top_k_indices) in similarity_results.items():\n",
    "            autoencoder = autoencoders['big'][layer_name].to(device)\n",
    "            results_dict[layer_name] = {}\n",
    "\n",
    "            valid_features_processed = 0\n",
    "            for feature_index in top_k_indices:\n",
    "                print(valid_features_processed)\n",
    "                if valid_features_processed >= k:\n",
    "                    break\n",
    "\n",
    "                activations_for_feature = []\n",
    "\n",
    "                for dtokens in tokenized_data:\n",
    "                    real_activations = get_activations(dtokens, model, autoencoder, layer_num)[0, :, feature_index].detach().cpu().numpy()\n",
    "                    max_activation = np.max(real_activations)\n",
    "                    normalized_activations = normalize_activations(real_activations, max_activation)\n",
    "                    discretized_activations = discretize_activations(normalized_activations)\n",
    "\n",
    "                    activations_for_feature.append((dtokens, discretized_activations))\n",
    "\n",
    "                top_20_for_feature = sorted(activations_for_feature, key=lambda x: np.max(x[1]), reverse=True)[:20]\n",
    "\n",
    "                if len(top_20_for_feature) >= 5:\n",
    "                    selected_activations = random.sample(top_20_for_feature, 5)\n",
    "                else:\n",
    "                    selected_activations = top_20_for_feature\n",
    "\n",
    "                if all(np.max(activations) <= 0 for _, activations in selected_activations):\n",
    "                    print(f\"Skipping feature index {feature_index} due to no significant activation\")\n",
    "                    continue\n",
    "\n",
    "                valid_features_processed += 1\n",
    "                top_5_activation_records[layer_name][feature_index] = selected_activations\n",
    "\n",
    "                top_5_activation_examples = []\n",
    "                for dtokens, activations in selected_activations:\n",
    "                    tokens = tokenizer.convert_ids_to_tokens(dtokens['input_ids'][0])\n",
    "                    tokens, activations = handle_sparse_activations(tokens, activations)\n",
    "\n",
    "                    activation_strings = [f\"{token}\\t{activation}\" for token, activation in zip(tokens, activations)]\n",
    "                    top_5_activation_examples.append(\"\\n\".join(activation_strings))\n",
    "\n",
    "                results_dict[layer_name][feature_index] = get_feature_explanation(feature_index, top_5_activation_examples)\n",
    "\n",
    "        return results_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "id": "-ddrRzYEKUM5"
   },
   "outputs": [],
   "source": [
    "def classify_dict(feature_dict):\n",
    "    rm_related = []\n",
    "    for layer, features in feature_dict.items():\n",
    "        for feature_index, desc in features.items():\n",
    "            classification = classify_feature(desc)\n",
    "            if classification == \"1\":\n",
    "                rm_related.append((layer, feature_index))\n",
    "\n",
    "    return rm_related"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "lVX6cDt_OHzm"
   },
   "outputs": [],
   "source": [
    "imdb_dataset = load_dataset('imdb', split='test')\n",
    "imdb_data = [entry['text'] for entry in imdb_dataset]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "r_87CpTqWW78"
   },
   "outputs": [],
   "source": [
    "language_model = \"gpt-neo-125m_utility_reward\"\n",
    "model = AutoModel.from_pretrained(f\"amirabdullah19852020/{language_model}\").to(device)\n",
    "tokenizer = AutoTokenizer.from_pretrained(f\"amirabdullah19852020/{language_model}\")\n",
    "autoencoders = {'small': loaded_models_dict['rlhf_small'], 'big': loaded_models_dict['rlhf_big']}\n",
    "\n",
    "layers = [1, 2, 3, 4, 5]\n",
    "\n",
    "feature_dict = calculate_utility_from_model(model, tokenizer, autoencoders, imdb_data, num_samples=1000, k=30, layers=layers)\n",
    "to_ablate = classify_dict(feature_dict)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [
    "gHuukYmU123X",
    "UB0OrAXz3sid",
    "gzhGRlpcV-Fz",
    "5VBcqhkIWDV4"
   ],
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
