{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b495a568",
   "metadata": {
    "papermill": {
     "duration": 0.003654,
     "end_time": "2025-09-20T22:41:25.711404",
     "exception": false,
     "start_time": "2025-09-20T22:41:25.707750",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3359fbd9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:41:25.718574Z",
     "iopub.status.busy": "2025-09-20T22:41:25.718309Z",
     "iopub.status.idle": "2025-09-20T22:41:35.307158Z",
     "shell.execute_reply": "2025-09-20T22:41:35.306544Z"
    },
    "papermill": {
     "duration": 9.593959,
     "end_time": "2025-09-20T22:41:35.308520",
     "exception": false,
     "start_time": "2025-09-20T22:41:25.714561",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# --- MATS causal interventions + hydra-style analysis (Llama-3.2-3B) ---\n",
    "\n",
    "import os, math, json, random, gc, pathlib\n",
    "from dataclasses import dataclass\n",
    "from typing import Dict, List, Tuple, Optional, Literal\n",
    "\n",
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "SEED = 123\n",
    "random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)\n",
    "\n",
    "\n",
    "\n",
    "# Evaluation settings\n",
    "GEN_MAX_NEW_TOKENS = 64\n",
    "TEMPERATURE = 0.0\n",
    "TOP_K = None\n",
    "\n",
    "# Anchor positions we evaluate hydra/kl at:\n",
    "EVAL_POS = \"last\"   # 'last' or 'first_code' (we compute first code token index)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bb444295",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:41:35.315437Z",
     "iopub.status.busy": "2025-09-20T22:41:35.315114Z",
     "iopub.status.idle": "2025-09-20T22:42:50.444142Z",
     "shell.execute_reply": "2025-09-20T22:42:50.443259Z"
    },
    "papermill": {
     "duration": 75.135098,
     "end_time": "2025-09-20T22:42:50.446726",
     "exception": false,
     "start_time": "2025-09-20T22:41:35.311628",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ce239f46341f4074aeb56d691c472a43",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# ---------------- Config ----------------\n",
    "\n",
    "HF_TOKEN   = os.environ.get(\"HF_TOKEN\")\n",
    "\n",
    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "DTYPE  = torch.float16\n",
    "MODEL_NAME = \"meta-llama/Llama-3.2-3B\"\n",
    "# ---------------- Load model ----------------\n",
    "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN, use_fast=True)\n",
    "if tokenizer.pad_token is None:\n",
    "    tokenizer.pad_token = tokenizer.eos_token\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    MODEL_NAME, token=HF_TOKEN, torch_dtype=DTYPE, low_cpu_mem_usage=True\n",
    ").to(DEVICE).eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "68d6ae76",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:50.457573Z",
     "iopub.status.busy": "2025-09-20T22:42:50.457082Z",
     "iopub.status.idle": "2025-09-20T22:42:50.515918Z",
     "shell.execute_reply": "2025-09-20T22:42:50.515054Z"
    },
    "papermill": {
     "duration": 0.065332,
     "end_time": "2025-09-20T22:42:50.517606",
     "exception": false,
     "start_time": "2025-09-20T22:42:50.452274",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'cpp_top': [1872, 396, 26, 90, 92, 1004, 4930, 693, 2501, 2511], 'python_top': [25, 4155, 755, 1374, 419, 2963, 9866, 496, 11, 1680], 'science': [17947, 13284, 37791, 9125, 2664, 2067, 74441, 76, 4238, 66650], 'medical': [1593, 2945, 91899, 23557, 51105, 24738, 67, 8747, 92724, 9233], 'finance': [10348, 5143, 66163, 19930, 18081, 5271, 5143, 7853, 46458, 970], 'math': [5143, 35676, 4174, 716, 16845, 15487, 3990, 606, 6173, 29359]}\n"
     ]
    }
   ],
   "source": [
    "# Create a dataset pair specific config file that can be passed everywhere\n",
    "\n",
    "token_list_value = {\n",
    "    \"cpp_top\": [\"std\", \"int\", \";\", \"{\", \"}\", \"[i\", \"long\", \"return\", \"<<\", \">>\"],\n",
    "    \"python_top\": [\":\", \"None\", \"def\", \"print\", \"==\", \"len\", \"range\", \"str\", \",\", \"):\"],\n",
    "    \"science\": [\"energy\", \"water\", \"cells\", \"system\", \"body\", \"blod\", \"carbon\", \"molecules\", \"light\", \"atoms\"],\n",
    "    \"medical\": [\"cause\", \"options\", \"clinical\", \"patient\", \"blood\", \"symptoms\", \"disease\", \"diagnosis\", \"pain\", \"condition\"],\n",
    "    \"finance\": [\"company\", \"total\", \"capital\", \"share\", \"tax\", \"assets\", \"total\", \"rate\", \"ratio\", \"value\"],\n",
    "    \"math\": [\"total\", \"many\", \"number\", \"per\", \"cost\", \"times\", \"find\", \"one\", \"amount\", \"money\"]\n",
    "}\n",
    "def tok(s: str):\n",
    "    return tokenizer(s, return_tensors=\"pt\").to(DEVICE)\n",
    "\n",
    "def untok(s: int):\n",
    "    return tokenizer.decode(s)\n",
    "# [tok(i)['input_ids'][0][1].item() for i in config_dict['cpp-python'].a.token_set]\n",
    "token_list = {key: [tok(i)['input_ids'][0][1].item() for i in value] for key, value in token_list_value.items()}\n",
    "\n",
    "print(token_list)\n",
    "class Language:\n",
    "    def __init__(self, name: str, token_set: list[str] | None = None):\n",
    "        self.name = name\n",
    "        if token_set is None:\n",
    "            self.token_set = self.load_specific()\n",
    "        else:\n",
    "            self.token_set = token_set\n",
    "        \n",
    "    def load_tokens(self):\n",
    "        with open(\"token_set_id.json\", 'r') as f:\n",
    "            data = json.load(f)\n",
    "        if self.name == \"cpp_top\":\n",
    "            return data[\"cpp_clean\"]\n",
    "        if self.name == \"python_top\":\n",
    "            return data[\"python_clean\"]\n",
    "        return data[self.name]\n",
    "    def load_specific(self):\n",
    "        return token_list[self.name]\n",
    "    \n",
    "\n",
    "class DatasetPairConfig:\n",
    "    def __init__(self, lang_a: str, lang_b: str, file_path: str):\n",
    "        self.a = Language(lang_a)\n",
    "        self.b = Language(lang_b)\n",
    "        self.prompts = self.load_file(file_path)\n",
    "        \n",
    "    def load_file(self, file_path):\n",
    "        with open(file_path, 'r') as f:\n",
    "            data = json.load(f)\n",
    "        prompts = []\n",
    "        for element in data:\n",
    "            prompts.append((element[self.a.name], element[self.b.name]))\n",
    "        return prompts\n",
    "        \n",
    "        \n",
    "# CPP_TOKENS = {\"def\", \"import\", \":\", \"pass\"}     # Python-ish indicators\n",
    "# PYTHON_TOKENS = {\";\", \"::\", \"std\", \"#\"}            # C++-ish indicators; '#' approximates #include\n",
    "\n",
    "config_dict = {\n",
    "    \"cpp-python\": DatasetPairConfig(\"cpp_top\", \"python_top\", \"prompt_set/cpp_python_100.json\"),\n",
    "    # \"sci-math\": DatasetPairConfig(\"science\", \"math\", \"prompt_set/sci_math_prompts.json\"),\n",
    "    # \"medical-finance\": DatasetPairConfig(\"medical\", \"finance\", \"prompt_set/fin_med_prompts.json\"),\n",
    "}\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "040a35e3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:50.534209Z",
     "iopub.status.busy": "2025-09-20T22:42:50.533973Z",
     "iopub.status.idle": "2025-09-20T22:42:50.537784Z",
     "shell.execute_reply": "2025-09-20T22:42:50.536963Z"
    },
    "papermill": {
     "duration": 0.015764,
     "end_time": "2025-09-20T22:42:50.538984",
     "exception": false,
     "start_time": "2025-09-20T22:42:50.523220",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# def one_token_ids_scan(cands):\n",
    "#     og_len = len(cands)\n",
    "#     ids, keep, remove = [], [], []\n",
    "#     for s in cands:\n",
    "#         t = tokenizer.encode(s, add_special_tokens=False)\n",
    "#         if len(t) == 1:\n",
    "#             ids.append(t[0]); keep.append(s)\n",
    "#         else:\n",
    "#             print(f\" ids = {t}, token = {s}\")\n",
    "#             remove.append(s)\n",
    "            \n",
    "#     new_len = len(keep)\n",
    "#     print(f\"removed {og_len} - {new_len} = {og_len - new_len} tokens\")\n",
    "#     print(\"These tokens were removed: \")\n",
    "#     print(remove)\n",
    "#     return ids, keep\n",
    "\n",
    "# ids, tokens = one_token_ids_scan(config.a.token_set)\n",
    "# ids, tokens = one_token_ids_scan(config.b.token_set)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "18442a7c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:50.548301Z",
     "iopub.status.busy": "2025-09-20T22:42:50.547821Z",
     "iopub.status.idle": "2025-09-20T22:42:50.581507Z",
     "shell.execute_reply": "2025-09-20T22:42:50.580642Z"
    },
    "papermill": {
     "duration": 0.040078,
     "end_time": "2025-09-20T22:42:50.582761",
     "exception": false,
     "start_time": "2025-09-20T22:42:50.542683",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ---------------- Utilities ----------------\n",
    "def tok(s: str):\n",
    "    return tokenizer(s, return_tensors=\"pt\").to(DEVICE)\n",
    "\n",
    "def generate_logits(prompt: str, max_new_tokens=0):\n",
    "    \"\"\"Return logits over next token for the prompt (no generation if max_new_tokens=0).\"\"\"\n",
    "    with torch.no_grad():\n",
    "        enc = tok(prompt)\n",
    "        out = model(**enc)\n",
    "        logits = out.logits  # [B, T, V]\n",
    "    return enc, logits\n",
    "\n",
    "def pick_eval_index(input_ids: torch.Tensor, text: str, mode: str) -> int:\n",
    "    # mode 'last': last token in the prompt; 'first_code': first token after a code marker\n",
    "    if mode == \"last\":\n",
    "        return input_ids.shape[1] - 1\n",
    "    if mode == \"first_code\":\n",
    "        # Heuristic: find \"```\" or a language marker in the prompt text\n",
    "        idx = text.find(\"```\")\n",
    "        if idx < 0: return input_ids.shape[1] - 1\n",
    "        # tokenize up to the backticks and use its length - 1\n",
    "        prefix = text[:idx+3]\n",
    "        with torch.no_grad():\n",
    "            ids = tokenizer(prefix, return_tensors=\"pt\").to(DEVICE)[\"input_ids\"]\n",
    "        return min(ids.shape[1]-1, input_ids.shape[1]-1)\n",
    "    return input_ids.shape[1] - 1\n",
    "\n",
    "def kl_divergence(p_logits, q_logits):\n",
    "    # KL(p || q) at a single position; stable softmax\n",
    "    p = F.log_softmax(p_logits, dim=-1)\n",
    "    q = F.log_softmax(q_logits, dim=-1)\n",
    "    return torch.sum(torch.exp(p) * (p - q), dim=-1)\n",
    "\n",
    "def l2(x): return torch.norm(x.float(), dim=-1)\n",
    "\n",
    "def token_prob_sum(logits, token_strs: List[str]) -> float:\n",
    "    ids = []\n",
    "    for s in token_strs:\n",
    "        toks = tokenizer.encode(s, add_special_tokens=False)\n",
    "        if len(toks) == 1:\n",
    "            ids.append(toks[0])\n",
    "    if not ids: return 0.0\n",
    "    probs = F.softmax(logits, dim=-1)[..., ids]\n",
    "    return probs.sum(-1).item()\n",
    "\n",
    "# ---------------- Hooking helpers ----------------\n",
    "@dataclass(frozen=True)\n",
    "class LayerSpec:\n",
    "    kind: Literal[\"attn\",\"mlp\"]\n",
    "    idx: int\n",
    "\n",
    "def get_layer_module(m: nn.Module, spec: LayerSpec) -> nn.Module:\n",
    "    # Llama: model.model.layers[i].self_attn / .mlp\n",
    "    return getattr(m.model.layers[spec.idx], \"self_attn\" if spec.kind==\"attn\" else \"mlp\")\n",
    "\n",
    "def _as_tensor(output):\n",
    "    # Some HF modules may return tuple; we normalize to a tensor for replacement logic\n",
    "    if isinstance(output, tuple):\n",
    "        return output[0]\n",
    "    return output\n",
    "\n",
    "def _repack_like(original_output, new_tensor):\n",
    "    # Put new_tensor back into the original structure if needed\n",
    "    if isinstance(original_output, tuple):\n",
    "        lst = list(original_output)\n",
    "        lst[0] = new_tensor\n",
    "        return tuple(lst)\n",
    "    return new_tensor\n",
    "\n",
    "class Capture:\n",
    "    \"\"\"Capture module outputs (pre-residual) at all sequence positions.\"\"\"\n",
    "    def __init__(self, model: nn.Module, specs: List[LayerSpec]):\n",
    "        self.handles = []\n",
    "        self.data: Dict[LayerSpec, torch.Tensor] = {}\n",
    "        for s in specs:\n",
    "            mod = get_layer_module(model, s)\n",
    "            h = mod.register_forward_hook(self._make_hook(s))\n",
    "            self.handles.append(h)\n",
    "    def _make_hook(self, spec):\n",
    "        def hook(module, inp, out):\n",
    "            self.data[spec] = _as_tensor(out).detach()\n",
    "        return hook\n",
    "    def remove(self):\n",
    "        for h in self.handles: h.remove()\n",
    "        self.handles = []\n",
    "\n",
    "class Intervention:\n",
    "    \"\"\"Generic intervention hook using a function f(spec, output)->new_output.\"\"\"\n",
    "    def __init__(self, model: nn.Module, specs: List[LayerSpec], fn):\n",
    "        self.handles = []\n",
    "        self.fn = fn\n",
    "        for s in specs:\n",
    "            mod = get_layer_module(model, s)\n",
    "            h = mod.register_forward_hook(self._make_hook(s))\n",
    "            self.handles.append(h)\n",
    "    def _make_hook(self, spec):\n",
    "        def hook(module, inp, out):\n",
    "            out_t = _as_tensor(out)\n",
    "            new_t = self.fn(spec, out_t)\n",
    "            return _repack_like(out, new_t)\n",
    "        return hook\n",
    "    def remove(self):\n",
    "        for h in self.handles: h.remove()\n",
    "        self.handles = []\n",
    "\n",
    "# ---------------- Core experiments ----------------\n",
    "def run_baseline(prompt: str):\n",
    "    with torch.no_grad():\n",
    "        enc = tok(prompt)\n",
    "        out = model(**enc)\n",
    "        return enc, out.logits, out.hidden_states if hasattr(out, \"hidden_states\") else None\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def cross_swap_once(config: DatasetPairConfig, idx, spec: LayerSpec, eval_pos=\"last\"):\n",
    "    \"\"\"Swap the module output at spec between A and B runs (last-token position).\"\"\"\n",
    "    with torch.no_grad():\n",
    "        prompt_a = config.prompts[idx][0]\n",
    "        prompt_b = config.prompts[idx][1]\n",
    "        enc_a = tok(prompt_a); enc_b = tok(prompt_b)\n",
    "\n",
    "        # Capture outputs\n",
    "        cap_a = Capture(model, [spec]); _ = model(**enc_a); cap_a.remove()\n",
    "        cap_b = Capture(model, [spec]); _ = model(**enc_b); cap_b.remove()\n",
    "        A = cap_a.data[spec]; B = cap_b.data[spec]  # [1, T, d]\n",
    "\n",
    "        # Align lengths; use last token only to be safe\n",
    "        def swap_fn_factory(src_vec):\n",
    "            def fn(s, out):\n",
    "                out2 = out.clone()\n",
    "                out2[:, -1, :] = src_vec[:, -1, :].to(out2.dtype).to(out2.device)\n",
    "                return out2\n",
    "            return fn\n",
    "\n",
    "        pos_a = pick_eval_index(enc_a[\"input_ids\"], prompt_a, eval_pos)\n",
    "        pos_b = pick_eval_index(enc_b[\"input_ids\"], prompt_b, eval_pos)\n",
    "\n",
    "        # Baseline logits at pos\n",
    "        base_a = model(**enc_a).logits[:, pos_a, :]\n",
    "        base_b = model(**enc_b).logits[:, pos_b, :]\n",
    "\n",
    "        # A <- B\n",
    "        hook = Intervention(model, [spec], swap_fn_factory(B))\n",
    "        out = model(**enc_a); hook.remove()\n",
    "        swap_a = out.logits[:, pos_a, :]\n",
    "\n",
    "        # B <- A\n",
    "        hook = Intervention(model, [spec], swap_fn_factory(A))\n",
    "        out = model(**enc_b); hook.remove()\n",
    "        swap_b = out.logits[:, pos_b, :]\n",
    "\n",
    "        # Language-bias scores\n",
    "        score_a_before = token_prob_sum(base_a, list(config.a.token_set)) - token_prob_sum(base_a, list(config.b.token_set))\n",
    "        score_a_after  = token_prob_sum(swap_a, list(config.a.token_set)) - token_prob_sum(swap_a, list(config.b.token_set))\n",
    "        score_b_before = token_prob_sum(base_b, list(config.b.token_set)) - token_prob_sum(base_b, list(config.a.token_set))\n",
    "        score_b_after  = token_prob_sum(swap_b, list(config.b.token_set)) - token_prob_sum(swap_b, list(config.a.token_set))\n",
    "        push_b = token_prob_sum(swap_a, list(config.b.token_set)) - token_prob_sum(base_a, list(config.b.token_set))\n",
    "        push_a = token_prob_sum(swap_b, list(config.a.token_set)) - token_prob_sum(base_b, list(config.a.token_set))\n",
    "        kl_a = kl_divergence(base_a, swap_a).item()\n",
    "        kl_b = kl_divergence(base_b, swap_b).item()\n",
    "\n",
    "        return {\n",
    "            \"kl_a\": kl_a, \"kl_b\": kl_b,\n",
    "            # \"score_a_before\": score_a_before, \"score_a_after\": score_a_after,\n",
    "            # \"score_b_before\": score_b_before, \"score_b_after\": score_b_after,\n",
    "            \"push_b_in_a\": push_b, \"push_a_in_b\": push_a\n",
    "        }\n",
    "\n",
    "def compute_concept_means(samples: List[str], specs: List[LayerSpec], max_samples: int = 200):\n",
    "    \"\"\"Mean module output per spec over samples (last token).\"\"\"\n",
    "    means: Dict[LayerSpec, torch.Tensor] = {}\n",
    "    sums: Dict[LayerSpec, torch.Tensor] = {}\n",
    "    count = 0\n",
    "    with torch.no_grad():\n",
    "        for s in samples[:max_samples]:\n",
    "            enc = tok(s)\n",
    "            cap = Capture(model, specs); _ = model(**enc); cap.remove()\n",
    "            for spec in specs:\n",
    "                vec = cap.data[spec][:, -1, :]  # [1, d], last token\n",
    "                if spec not in sums: sums[spec] = vec.clone().to(\"cpu\")\n",
    "                else: sums[spec] += vec.to(\"cpu\")\n",
    "            count += 1\n",
    "    for spec in specs:\n",
    "        means[spec] = (sums[spec] / count)\n",
    "    return means\n",
    "\n",
    "\n",
    "# ---------------- Plot helpers ----------------\n",
    "def plot_kl_curve(kl_avg: Dict[LayerSpec, float], title=\"KL after zero-ablation by layer\"):\n",
    "    xs_attn, ys_attn = zip(*sorted([(s.idx, v) for s,v in kl_avg.items() if s.kind==\"attn\"]))\n",
    "    xs_mlp,  ys_mlp  = zip(*sorted([(s.idx, v) for s,v in kl_avg.items() if s.kind==\"mlp\"]))\n",
    "    plt.figure()\n",
    "    plt.plot(xs_attn, ys_attn, marker=\"o\", label=\"Attention\")\n",
    "    plt.plot(xs_mlp, ys_mlp, marker=\"o\", label=\"MLP\")\n",
    "    plt.xlabel(\"Layer index\"); plt.ylabel(\"KL(baseline || ablated)\")\n",
    "    plt.title(title); plt.legend(); plt.show()\n",
    "\n",
    "def plot_hydra(hydra_avg: Dict[LayerSpec, Tuple[float,float]], title=\"Hydra: Δembed vs Δunembed (KL)\"):\n",
    "    xs, ys, cs = [], [], []\n",
    "    for s,(demb, kl) in hydra_avg.items():\n",
    "        xs.append(demb); ys.append(kl); cs.append(\"attn\" if s.kind==\"attn\" else \"mlp\")\n",
    "    plt.figure()\n",
    "    for kind in [\"attn\",\"mlp\"]:\n",
    "        X = [x for x,c in zip(xs,cs) if c==kind]\n",
    "        Y = [y for y,c in zip(ys,cs) if c==kind]\n",
    "        plt.scatter(X, Y, label=kind, alpha=0.8)\n",
    "    plt.xlabel(\"‖Δembed‖ at intervention\"); plt.ylabel(\"KL at output\")\n",
    "    plt.title(title); plt.legend(); plt.show()\n",
    "\n",
    "def plot_alpha_curve(results, title=\"Concept-vector injection\"):\n",
    "    alphas = [a for a,_,_ in results]\n",
    "    scores = [s for _,s,_ in results]\n",
    "    kls    = [k for *_,k in results]\n",
    "    plt.figure(); plt.plot(alphas, scores, marker=\"o\"); plt.xlabel(\"alpha\"); plt.ylabel(\"Language-bias score\"); plt.title(title + \" (bias)\"); plt.show()\n",
    "    plt.figure(); plt.plot(alphas, kls, marker=\"o\"); plt.xlabel(\"alpha\"); plt.ylabel(\"KL\"); plt.title(title + \" (KL)\"); plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c28d8d61",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:50.593168Z",
     "iopub.status.busy": "2025-09-20T22:42:50.592644Z",
     "iopub.status.idle": "2025-09-20T22:42:50.640069Z",
     "shell.execute_reply": "2025-09-20T22:42:50.639236Z"
    },
    "papermill": {
     "duration": 0.055289,
     "end_time": "2025-09-20T22:42:50.642016",
     "exception": false,
     "start_time": "2025-09-20T22:42:50.586727",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# ========= INTERPRETABLE METRICS =========\n",
    "import torch\n",
    "import torch.nn.functional as F\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from collections import defaultdict\n",
    "import json, math\n",
    "\n",
    "# Indicator tokens (single-token only; we filter to those)\n",
    "# PY_CANDIDATES  = [\"def\", \"import\", \":\", \"pass\", \"lambda\", \"print\"]\n",
    "# CPP_CANDIDATES = [\";\", \"::\", \"std\", \"using\", \"cout\", \"#\"]\n",
    "\n",
    "def one_token_ids(cands):\n",
    "    og_len = len(cands)\n",
    "    ids, keep = [], []\n",
    "    for s in cands:\n",
    "        t = tokenizer.encode(s, add_special_tokens=False)\n",
    "        if len(t) == 1:\n",
    "            ids.append(t[0]); keep.append(s)\n",
    "    return ids, keep\n",
    "\n",
    "# ids, tokens = one_token_ids(config.a.token_set)\n",
    "# print(ids, tokens)\n",
    "# ids, tokens = one_token_ids(config.b.token_set)\n",
    "# print(ids, tokens)\n",
    "\n",
    "\n",
    "def prob_mass_for_ids(logits, ids):\n",
    "    probs = F.softmax(logits, dim=-1)[..., ids]\n",
    "    return probs.sum(-1).item()\n",
    "\n",
    "def kl_divergence(p_logits, q_logits):\n",
    "    p = F.log_softmax(p_logits, dim=-1)\n",
    "    q = F.log_softmax(q_logits, dim=-1)\n",
    "    return torch.sum(torch.exp(p) * (p - q), dim=-1)\n",
    "\n",
    "def next_token_logits_at_first_code(prompt: str):\n",
    "    enc = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
    "    pos = pick_eval_index(enc[\"input_ids\"], prompt, mode=\"last\")\n",
    "    with torch.no_grad():\n",
    "        out = model(**enc)\n",
    "    return out.logits[:, pos, :].squeeze(0)  # [V]\n",
    "\n",
    "def cross_swap_verbose(config: DatasetPairConfig, prompt_a: str, prompt_b: str, spec: LayerSpec):\n",
    "    # Baselines\n",
    "    base_a = next_token_logits_at_first_code(prompt_a)\n",
    "    base_b  = next_token_logits_at_first_code(prompt_b)\n",
    "\n",
    "    # Capture layer outputs on last prompt position\n",
    "    enc_a = tokenizer(prompt_a, return_tensors=\"pt\").to(DEVICE)\n",
    "    enc_b  = tokenizer(prompt_b,  return_tensors=\"pt\").to(DEVICE)\n",
    "    capA = Capture(model, [spec]); _ = model(**enc_a); capA.remove()\n",
    "    capB = Capture(model, [spec]); _ = model(**enc_b);  capB.remove()\n",
    "    A = capA.data[spec][:, -1, :]  # C++ layer output at last prompt token\n",
    "    B = capB.data[spec][:, -1, :]  # Python layer output\n",
    "\n",
    "    def swap_fn(vec):\n",
    "        def fn(s, out):\n",
    "            out2 = out.clone()\n",
    "            out2[:, -1, :] = vec.to(out2)\n",
    "            return out2\n",
    "        return fn\n",
    "\n",
    "    # C++ side: insert Python vec\n",
    "    hook = Intervention(model, [spec], swap_fn(B))\n",
    "    out_a = model(**enc_a); hook.remove()\n",
    "    pos_a = pick_eval_index(enc_a[\"input_ids\"], prompt_a, \"last\")\n",
    "    swap_a = out_a.logits[:, pos_a, :].squeeze(0)\n",
    "\n",
    "    # Python side: insert C++ vec\n",
    "    hook = Intervention(model, [spec], swap_fn(A))\n",
    "    out_b = model(**enc_b); hook.remove()\n",
    "    pos_b = pick_eval_index(enc_b[\"input_ids\"], prompt_b, \"last\")\n",
    "    swap_b = out_b.logits[:, pos_b, :].squeeze(0)\n",
    "\n",
    "    # a_token_ids, _  = one_token_ids(config.a.token_set)\n",
    "    # b_token_ids, _  = one_token_ids(config.b.token_set)\n",
    "\n",
    "    # Metrics per side (note: bias = P(Py tokens) - P(C++ tokens), same sign for both)\n",
    "    def summarize(base, new, token_set_a, token_set_b):\n",
    "        # assume A <- B\n",
    "\n",
    "        kl = kl_divergence(base.unsqueeze(0), new.unsqueeze(0)).item()\n",
    "        shift_b = prob_mass_for_ids(new, token_set_b) - prob_mass_for_ids(base, token_set_b)\n",
    "        return dict(\n",
    "            KL=kl,\n",
    "            shift_other=shift_b\n",
    "        )\n",
    "\n",
    "    return {\n",
    "        \"spec\": (spec.kind, spec.idx),\n",
    "        config.a.name: summarize(base_a, swap_a, config.a.token_set, config.b.token_set),\n",
    "        config.b.name: summarize(base_b,  swap_b, config.b.token_set, config.a.token_set),\n",
    "    }\n",
    "    \n",
    "\n",
    "\n",
    "# ========= BATCH & AVERAGING =========\n",
    "def run_cross_swap_batch(config: DatasetPairConfig, layer_specs):\n",
    "    \"\"\"\n",
    "    paired_prompts: list of (cpp_prompt, py_prompt)\n",
    "    returns dict keyed by (kind, idx) with averages across pairs\n",
    "    \"\"\"\n",
    "    paired_prompts = config.prompts\n",
    "    agg = defaultdict(lambda: {config.a.name: defaultdict(list), config.b.name: defaultdict(list)})\n",
    "    for (a_p, b_p) in paired_prompts:\n",
    "        for spec in layer_specs:\n",
    "            res = cross_swap_verbose(config, a_p, b_p, spec)\n",
    "            kind, idx = res[\"spec\"]\n",
    "            k = f\"{kind}-{idx}\"\n",
    "            for side in [config.a.name,config.b.name]:\n",
    "                for key, val in res[side].items():\n",
    "                    if key in [\"side\"]: continue\n",
    "                    agg[k][side][key].append(val)\n",
    "\n",
    "    # average\n",
    "    # out = {}\n",
    "    # for k, sides in agg.items():\n",
    "    #     out[k] = {}\n",
    "    #     for side, metrics in sides.items():\n",
    "    #         out[k][side] = {m: float(np.mean(vals)) for m, vals in metrics.items()}\n",
    "    # return out\n",
    "    \n",
    "    # compute mean and std\n",
    "    out = {}\n",
    "    for k, sides in agg.items():\n",
    "        out[k] = {}\n",
    "        for side, metrics in sides.items():\n",
    "            out[k][side] = {\n",
    "                m: {\n",
    "                    \"mean\": float(np.mean(vals)),\n",
    "                    \"std\": float(np.std(vals, ddof=1)) if len(vals) > 1 else 0.0\n",
    "                }\n",
    "                for m, vals in metrics.items()\n",
    "            }\n",
    "    return out\n",
    "\n",
    "def run_cross_swap_average(config, layer_specs):\n",
    "    \"\"\"\n",
    "    Runs cross_swap_verbose over all prompt pairs in config.prompts\n",
    "    and averages KL divergence and shift metrics for each layer spec.\n",
    "\n",
    "    Returns dict keyed by (kind, idx), with averages for both sides.\n",
    "    \"\"\"\n",
    "    paired_prompts = config.prompts\n",
    "\n",
    "    # aggregate storage\n",
    "    agg = defaultdict(lambda: {config.a.name: defaultdict(list),\n",
    "                               config.b.name: defaultdict(list)})\n",
    "\n",
    "    # run cross swap for all prompt pairs and layer specs\n",
    "    for (a_p, b_p) in paired_prompts:\n",
    "        for spec in layer_specs:\n",
    "            res = cross_swap_verbose(config, a_p, b_p, spec)\n",
    "            k = res[\"spec\"]\n",
    "            for side in [config.a.name, config.b.name]:\n",
    "                for key, val in res[side].items():\n",
    "                    if key == \"side\": \n",
    "                        continue\n",
    "                    agg[k][side][key].append(val)\n",
    "\n",
    "    # compute averages\n",
    "    out = {}\n",
    "    for k, sides in agg.items():\n",
    "        out[k] = {}\n",
    "        for side, metrics in sides.items():\n",
    "            out[k][side] = {m: float(np.mean(vals)) for m, vals in metrics.items()}\n",
    "    return out\n",
    "\n",
    "\n",
    "def plot_cross_swap_kl(config:DatasetPairConfig, results, title=\"Cross-swap KL by layer (C++ & Python sides)\", save=\"crossswap_kl.png\"):\n",
    "    a = config.a.name\n",
    "    b = config.b.name\n",
    "    labels = []\n",
    "    kl_a, kl_b = [], []\n",
    "    std_a, std_b = [], []\n",
    "\n",
    "    # collect means and stds\n",
    "    # for (kind, idx), sides in sorted(results.items(), key=lambda x: (x[0][0], x[0][1])):\n",
    "    for (kind, idx), sides in results.items():\n",
    "        labels.append(f\"{kind.upper()}-{idx}\")\n",
    "        kl_a.append(sides[a][\"KL\"][\"mean\"])\n",
    "        kl_b.append(sides[b][\"KL\"][\"mean\"])\n",
    "        std_a.append(sides[a][\"KL\"][\"std\"])\n",
    "        std_b.append(sides[b][\"KL\"][\"std\"])\n",
    "\n",
    "    x = np.arange(len(labels))\n",
    "    w = 0.38\n",
    "    plt.figure(figsize=(10, 4))\n",
    "\n",
    "    # bars with error bars (std)\n",
    "    plt.bar(x - w/2, kl_a, width=w, label=\"KL on \" + a.upper() + \" prompt\", yerr=std_a, capsize=5)\n",
    "    plt.bar(x + w/2, kl_b,  width=w, label=\"KL on \" + b.upper() + \" prompt\", yerr=std_b, capsize=5)\n",
    "\n",
    "    plt.xticks(x, labels, rotation=45, ha=\"right\")\n",
    "    plt.ylabel(\"KL(baseline || swapped)\")\n",
    "    plt.title(title)\n",
    "    plt.legend()\n",
    "    plt.tight_layout()\n",
    "    directory = os.path.dirname(save)\n",
    "    if directory:\n",
    "        os.makedirs(directory, exist_ok=True)\n",
    "    plt.savefig(save, dpi=220, bbox_inches=\"tight\")\n",
    "    plt.show()\n",
    "    \n",
    "    print(f\"Saved to {save}\")\n",
    "\n",
    "def plot_cross_swap_bias(config: DatasetPairConfig, results, title=\"Language-bias shift Δ (Py mass − C++ mass)\", save=\"crossswap_bias.png\"):\n",
    "    a = config.a.name\n",
    "    b = config.b.name\n",
    "    labels = []\n",
    "    db_a, db_b = [], []\n",
    "    std_a, std_b = [], []\n",
    "\n",
    "    # collect means and stds\n",
    "    #for (kind, idx), sides in sorted(results.items(), key=lambda x:(x[0][0], x[0][1])):\n",
    "    for (kind, idx), sides in results.items():\n",
    "        labels.append(f\"{kind.upper()}-{idx}\")\n",
    "        db_a.append(sides[a][\"delta_bias\"][\"mean\"])\n",
    "        db_b.append(sides[b][\"delta_bias\"][\"mean\"])\n",
    "        std_a.append(sides[a][\"delta_bias\"][\"std\"])\n",
    "        std_b.append(sides[b][\"delta_bias\"][\"std\"])\n",
    "\n",
    "    x = np.arange(len(labels)); w = 0.38\n",
    "    plt.figure(figsize=(10,4))\n",
    "\n",
    "    # bars with error bars (std)\n",
    "    plt.bar(x - w/2, db_a, width=w, label=\"Δ bias on \"+ a.upper() + \" prompt\", yerr=std_a, capsize=5)\n",
    "    plt.bar(x + w/2, db_b,  width=w, label=\"Δ bias on \" + b.upper() + \" prompt\", yerr=std_b, capsize=5)\n",
    "\n",
    "    plt.axhline(0, linestyle=\"--\", linewidth=1)\n",
    "    plt.xticks(x, labels, rotation=45, ha=\"right\")\n",
    "    plt.ylabel(f\"After − Before  (P[{b} tokens] − P[{a} tokens])\")\n",
    "    plt.title(title)\n",
    "    plt.legend()\n",
    "    plt.tight_layout()\n",
    "    plt.savefig(save, dpi=220, bbox_inches=\"tight\")\n",
    "    plt.show()\n",
    "    # print(f\"Saved {save}\")\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5fa66ec3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:50.655696Z",
     "iopub.status.busy": "2025-09-20T22:42:50.655395Z",
     "iopub.status.idle": "2025-09-20T22:42:52.338831Z",
     "shell.execute_reply": "2025-09-20T22:42:52.337845Z"
    },
    "papermill": {
     "duration": 1.69407,
     "end_time": "2025-09-20T22:42:52.341633",
     "exception": false,
     "start_time": "2025-09-20T22:42:50.647563",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn.functional as F\n",
    "from typing import Dict, Tuple, List\n",
    "\n",
    "# Assume the following classes and functions are defined from your provided context:\n",
    "# - model, tokenizer, DEVICE\n",
    "# - DatasetPairConfig, LayerSpec\n",
    "# - Capture, Intervention\n",
    "# - next_token_logits_at_first_code, pick_eval_index\n",
    "\n",
    "\n",
    "def calculate_token_shifts_for_layer(\n",
    "    config: DatasetPairConfig,\n",
    "    prompt_a: str,\n",
    "    prompt_b: str,\n",
    "    spec: LayerSpec\n",
    ") -> Dict[str, torch.Tensor]:\n",
    "    \"\"\"\n",
    "    Calculates the shift in probability for every token in the vocabulary after a causal swap.\n",
    "\n",
    "    For a given layer (`spec`), this function swaps the activation from `prompt_b` into the\n",
    "    forward pass of `prompt_a` (and vice-versa) and calculates the difference in the\n",
    "    final probability distribution compared to the baseline.\n",
    "\n",
    "    Args:\n",
    "        config: Configuration object for the dataset pair (e.g., containing names 'cpp', 'python').\n",
    "        prompt_a: The prompt for the first domain (e.g., the C++ prompt).\n",
    "        prompt_b: The prompt for the second domain (e.g., the Python prompt).\n",
    "        spec: The LayerSpec object defining the component and layer index for the intervention.\n",
    "\n",
    "    Returns:\n",
    "        A dictionary containing the raw probability shift tensors for each intervention direction.\n",
    "        - Keys are descriptive strings like 'shift_cpp_<-_python'.\n",
    "        - Values are tensors of shape [vocab_size] where each element is the change in\n",
    "          probability for that token ID.\n",
    "    \"\"\"\n",
    "    # 1. Get baseline next-token logits for both original prompts.\n",
    "    base_a_logits = next_token_logits_at_first_code(prompt_a)\n",
    "    base_b_logits = next_token_logits_at_first_code(prompt_b)\n",
    "\n",
    "    # 2. Tokenize prompts and capture the internal activations at the specified layer.\n",
    "    enc_a = tokenizer(prompt_a, return_tensors=\"pt\").to(DEVICE)\n",
    "    enc_b = tokenizer(prompt_b, return_tensors=\"pt\").to(DEVICE)\n",
    "\n",
    "    capA = Capture(model, [spec]); _ = model(**enc_a); capA.remove()\n",
    "    capB = Capture(model, [spec]); _ = model(**enc_b); capB.remove()\n",
    "\n",
    "    # Extract the activation at the final token position for each prompt.\n",
    "    activation_A = capA.data[spec][:, -1, :]\n",
    "    activation_B = capB.data[spec][:, -1, :]\n",
    "\n",
    "    # 3. Define the intervention function that will replace the activation.\n",
    "    def swap_function_factory(vector_to_insert):\n",
    "        def intervention_fn(spec, current_output):\n",
    "            intervened_output = current_output.clone()\n",
    "            # Replace the activation at the last sequence position with the swapped vector.\n",
    "            intervened_output[:, -1, :] = vector_to_insert.to(current_output.dtype)\n",
    "            return intervened_output\n",
    "        return intervention_fn\n",
    "\n",
    "    # 4. Perform the causal swaps and get the new, intervened logits.\n",
    "    # Intervention on Prompt A: Run prompt_a's input but swap in prompt_b's activation.\n",
    "    with torch.no_grad():\n",
    "        hook = Intervention(model, [spec], swap_function_factory(activation_B))\n",
    "        out_a_swapped = model(**enc_a); hook.remove()\n",
    "        pos_a = pick_eval_index(enc_a[\"input_ids\"], prompt_a, \"last\")\n",
    "        swap_a_logits = out_a_swapped.logits[:, pos_a, :].squeeze(0)\n",
    "    \n",
    "        # Intervention on Prompt B: Run prompt_b's input but swap in prompt_a's activation.\n",
    "        hook = Intervention(model, [spec], swap_function_factory(activation_A))\n",
    "        out_b_swapped = model(**enc_b); hook.remove()\n",
    "        pos_b = pick_eval_index(enc_b[\"input_ids\"], prompt_b, \"last\")\n",
    "        swap_b_logits = out_b_swapped.logits[:, pos_b, :].squeeze(0)\n",
    "\n",
    "    # 5. Calculate the shifts in probability distributions.\n",
    "    # Shift = P(token | intervened) - P(token | baseline)\n",
    "    base_a_probs = F.softmax(base_a_logits, dim=-1)\n",
    "    swap_a_probs = F.softmax(swap_a_logits, dim=-1)\n",
    "    shift_a_receives_b = swap_a_probs - base_a_probs\n",
    "\n",
    "    base_b_probs = F.softmax(base_b_logits, dim=-1)\n",
    "    swap_b_probs = F.softmax(swap_b_logits, dim=-1)\n",
    "    shift_b_receives_a = swap_b_probs - base_b_probs\n",
    "\n",
    "    return {\n",
    "        f\"shift_{config.a.name}_<-_{config.b.name}\": shift_a_receives_b,\n",
    "        f\"shift_{config.b.name}_<-_{config.a.name}\": shift_b_receives_a,\n",
    "    }\n",
    "\n",
    "def analyze_all_layers_with_token_shifts(\n",
    "    config: DatasetPairConfig,\n",
    "    prompt_a: str,\n",
    "    prompt_b: str,\n",
    "    layers_to_probe: List[int],\n",
    "    components_to_probe: List[str],\n",
    "    top_k: int = 20\n",
    ") -> Dict[Tuple[str, int], Dict[str, Dict[str, List[Tuple[str, float]]]]]:\n",
    "    \"\"\"\n",
    "    Iterates through layers to calculate token shifts, returning two separate lists\n",
    "    for the top_k promoted (positive shift) and demoted (negative shift) tokens.\n",
    "    \"\"\"\n",
    "    full_results = {}\n",
    "    for comp in components_to_probe:\n",
    "        for L in layers_to_probe:\n",
    "            spec = LayerSpec(comp, L)\n",
    "            print(f\"--- Analyzing {spec} ---\")\n",
    "\n",
    "            # Get the raw shift tensors for the current layer\n",
    "            shift_tensors = calculate_token_shifts_for_layer(config, prompt_a, prompt_b, spec)\n",
    "\n",
    "            layer_results = {}\n",
    "            for key, shift_tensor in shift_tensors.items():\n",
    "                # Get the top k largest positive shifts (promoted tokens)\n",
    "                top_pos_vals, top_pos_indices = torch.topk(shift_tensor, top_k)\n",
    "\n",
    "                # Get the top k largest negative shifts (demoted tokens)\n",
    "                top_neg_vals, top_neg_indices = torch.topk(-shift_tensor, top_k)\n",
    "\n",
    "                # --- MODIFICATION START ---\n",
    "                # Create a list for promoted tokens\n",
    "                promoted_tokens = []\n",
    "                for val, idx in zip(top_pos_vals, top_pos_indices):\n",
    "                    token = tokenizer.decode([idx.item()])\n",
    "                    promoted_tokens.append((token, val.item()))\n",
    "\n",
    "                # Create a list for demoted tokens\n",
    "                demoted_tokens = []\n",
    "                for val, idx in zip(top_neg_vals, top_neg_indices):\n",
    "                    token = tokenizer.decode([idx.item()])\n",
    "                    # Negate the value back to its original negative shift\n",
    "                    demoted_tokens.append((token, -val.item()))\n",
    "\n",
    "                # Store the two lists in a dictionary\n",
    "                layer_results[key] = {\n",
    "                    'promoted': promoted_tokens,\n",
    "                    'demoted': demoted_tokens\n",
    "                }\n",
    "                # --- MODIFICATION END ---\n",
    "\n",
    "            full_results[(spec.kind, spec.idx)] = layer_results\n",
    "            print(f\"Finished {spec}.\\n\")\n",
    "\n",
    "    return full_results\n",
    "\n",
    "# results = analyze_all_layers_with_token_shifts(config_dict['medical-finance'], 'In medicine, a prediction of future recovery is a', 'In finance, a prediction of future earnings is a', [2], ['mlp', 'attn'], 100)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8dd5a97d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:52.357548Z",
     "iopub.status.busy": "2025-09-20T22:42:52.357293Z",
     "iopub.status.idle": "2025-09-20T22:42:52.366106Z",
     "shell.execute_reply": "2025-09-20T22:42:52.365503Z"
    },
    "papermill": {
     "duration": 0.014615,
     "end_time": "2025-09-20T22:42:52.367214",
     "exception": false,
     "start_time": "2025-09-20T22:42:52.352599",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from typing import Dict, List, Tuple\n",
    "\n",
    "def analyze_average_token_shifts(\n",
    "    config: DatasetPairConfig,\n",
    "    layers_to_probe: List[int],\n",
    "    components_to_probe: List[str],\n",
    "    top_k: int | None = None\n",
    ") -> Dict[Tuple[str, int], Dict[str, Dict[str, List[Tuple[str, float]]]]]:\n",
    "    \"\"\"\n",
    "    Computes the AVERAGE token shifts by iterating over a list of prompt pairs.\n",
    "\n",
    "    For each layer, this function accumulates the shift tensors from all prompts in\n",
    "    `config.prompts`, averages them, and then finds the top-k promoted and demoted tokens.\n",
    "    \"\"\"\n",
    "    if top_k is None:\n",
    "        top_k = tokenizer.vocab_size\n",
    "    full_results = {}\n",
    "    num_prompts = len(config.prompts)\n",
    "    if num_prompts == 0:\n",
    "        print(\"Warning: No prompts found in the config. Returning empty results.\")\n",
    "        return {}\n",
    "\n",
    "    # Define the keys for the two intervention directions\n",
    "    key_a_receives_b = f\"shift_{config.a.name}_<-_{config.b.name}\"\n",
    "    key_b_receives_a = f\"shift_{config.b.name}_<-_{config.a.name}\"\n",
    "\n",
    "    for comp in components_to_probe:\n",
    "        for L in layers_to_probe:\n",
    "            spec = LayerSpec(comp, L)\n",
    "            print(f\"--- Analyzing {spec} ---\")\n",
    "\n",
    "            # Initialize accumulators for the shift tensors for this layer\n",
    "            # We initialize with 0.0, which will broadcast to the tensor shape upon the first addition\n",
    "            accumulated_shifts = {\n",
    "                key_a_receives_b: 0.0,\n",
    "                key_b_receives_a: 0.0\n",
    "            }\n",
    "\n",
    "            # 1. Accumulate shifts over all prompt pairs\n",
    "            for i in range(num_prompts):\n",
    "                prompt_a = config.prompts[i][0]\n",
    "                prompt_b = config.prompts[i][1]\n",
    "\n",
    "                # This helper function gets the shifts for a single prompt pair\n",
    "                shift_tensors_for_prompt = calculate_token_shifts_for_layer(config, prompt_a, prompt_b, spec)\n",
    "\n",
    "                accumulated_shifts[key_a_receives_b] += shift_tensors_for_prompt[key_a_receives_b]\n",
    "                accumulated_shifts[key_b_receives_a] += shift_tensors_for_prompt[key_b_receives_a]\n",
    "\n",
    "            # 2. Average the accumulated shifts\n",
    "            average_shift_tensors = {\n",
    "                key: total_shift / num_prompts\n",
    "                for key, total_shift in accumulated_shifts.items()\n",
    "            }\n",
    "\n",
    "            # 3. Process the averaged tensors to find top-k tokens (same logic as before)\n",
    "            layer_results = {}\n",
    "            for key, avg_shift_tensor in average_shift_tensors.items():\n",
    "                top_pos_vals, top_pos_indices = torch.topk(avg_shift_tensor, top_k)\n",
    "                top_neg_vals, top_neg_indices = torch.topk(-avg_shift_tensor, top_k)\n",
    "\n",
    "                promoted_tokens = []\n",
    "                for val, idx in zip(top_pos_vals, top_pos_indices):\n",
    "                    promoted_tokens.append((tokenizer.decode([idx.item()]), val.item()))\n",
    "\n",
    "                demoted_tokens = []\n",
    "                for val, idx in zip(top_neg_vals, top_neg_indices):\n",
    "                    demoted_tokens.append((tokenizer.decode([idx.item()]), -val.item()))\n",
    "\n",
    "                layer_results[key] = {\n",
    "                    'promoted': promoted_tokens,\n",
    "                    'demoted': demoted_tokens\n",
    "                }\n",
    "\n",
    "            full_results[(spec.kind, spec.idx)] = layer_results\n",
    "            print(f\"Finished {spec} (averaged over {num_prompts} prompts).\\n\")\n",
    "\n",
    "    return full_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "844278a3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:52.376008Z",
     "iopub.status.busy": "2025-09-20T22:42:52.375745Z",
     "iopub.status.idle": "2025-09-20T22:42:52.382423Z",
     "shell.execute_reply": "2025-09-20T22:42:52.381622Z"
    },
    "papermill": {
     "duration": 0.012459,
     "end_time": "2025-09-20T22:42:52.383526",
     "exception": false,
     "start_time": "2025-09-20T22:42:52.371067",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "from typing import Dict, List, Tuple\n",
    "\n",
    "def get_domain_representative_tokens(\n",
    "    results: Dict[Tuple[str, int], Dict[str, Dict[str, List[Tuple[str, float]]]]],\n",
    "    intervention_key: str,\n",
    "    top_n: int = 50\n",
    ") -> List[str]:\n",
    "    \"\"\"\n",
    "    Aggregates shifts across all layers to find the most representative tokens for a domain.\n",
    "\n",
    "    Args:\n",
    "        results: The output dictionary from analyze_average_token_shifts.\n",
    "        intervention_key: The shift direction that promotes the desired domain's tokens.\n",
    "                          For example, to get 'science' tokens, use 'shift_science_<-_math'.\n",
    "        top_n: The number of representative tokens to return.\n",
    "\n",
    "    Returns:\n",
    "        A sorted list of the top_n most representative token strings.\n",
    "    \"\"\"\n",
    "    # 1. Use a defaultdict to store the cumulative shift score for each token.\n",
    "    token_scores = defaultdict(float)\n",
    "\n",
    "    # 2. Iterate through the results for each layer.\n",
    "    for layer_data in results.values():\n",
    "        if intervention_key in layer_data:\n",
    "            # Get the list of promoted tokens for the current layer\n",
    "            promoted_list = layer_data[intervention_key]['promoted']\n",
    "\n",
    "            # 3. Add the shift score to the token's cumulative total.\n",
    "            for token, shift in promoted_list:\n",
    "                token_scores[token] += shift\n",
    "\n",
    "    # 4. Sort the tokens by their final cumulative score in descending order.\n",
    "    sorted_by_score = sorted(token_scores.items(), key=lambda item: item[1], reverse=True)\n",
    "\n",
    "    # 5. Extract just the token strings and return the top_n.\n",
    "    representative_tokens = [token for token, score in sorted_by_score]\n",
    "\n",
    "    return representative_tokens[:top_n]\n",
    "\n",
    "# --- EXAMPLE USAGE ---\n",
    "\n",
    "# Assume 'results' is the output from your analysis function\n",
    "# set_a = 'cpp_top'\n",
    "# set_b = 'python_top'\n",
    "# intervention = f'shift_{set_a}_<-_{set_b}'\n",
    "# rep_tokens = get_domain_representative_tokens(results, intervention, top_n=50)\n",
    "\n",
    "# print(\"Top 50 most representative 'python' tokens:\")\n",
    "# print(rep_tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "633eb75f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-20T22:42:52.392442Z",
     "iopub.status.busy": "2025-09-20T22:42:52.392267Z",
     "iopub.status.idle": "2025-09-21T04:02:40.028647Z",
     "shell.execute_reply": "2025-09-21T04:02:40.027798Z"
    },
    "papermill": {
     "duration": 19187.642576,
     "end_time": "2025-09-21T04:02:40.029983",
     "exception": false,
     "start_time": "2025-09-20T22:42:52.387407",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting analysis for 1 dataset pairs...\n",
      "Probing layers: range(1, 28) for components: ['mlp', 'attn']\n",
      "\n",
      "--- Processing dataset pair: 'cpp-python' (cpp_top vs python_top) ---\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=1) ---\n",
      "Finished LayerSpec(kind='mlp', idx=1) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=2) ---\n",
      "Finished LayerSpec(kind='mlp', idx=2) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=3) ---\n",
      "Finished LayerSpec(kind='mlp', idx=3) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=4) ---\n",
      "Finished LayerSpec(kind='mlp', idx=4) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=5) ---\n",
      "Finished LayerSpec(kind='mlp', idx=5) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=6) ---\n",
      "Finished LayerSpec(kind='mlp', idx=6) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=7) ---\n",
      "Finished LayerSpec(kind='mlp', idx=7) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=8) ---\n",
      "Finished LayerSpec(kind='mlp', idx=8) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=9) ---\n",
      "Finished LayerSpec(kind='mlp', idx=9) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=10) ---\n",
      "Finished LayerSpec(kind='mlp', idx=10) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=11) ---\n",
      "Finished LayerSpec(kind='mlp', idx=11) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=12) ---\n",
      "Finished LayerSpec(kind='mlp', idx=12) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=13) ---\n",
      "Finished LayerSpec(kind='mlp', idx=13) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=14) ---\n",
      "Finished LayerSpec(kind='mlp', idx=14) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=15) ---\n",
      "Finished LayerSpec(kind='mlp', idx=15) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=16) ---\n",
      "Finished LayerSpec(kind='mlp', idx=16) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=17) ---\n",
      "Finished LayerSpec(kind='mlp', idx=17) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=18) ---\n",
      "Finished LayerSpec(kind='mlp', idx=18) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=19) ---\n",
      "Finished LayerSpec(kind='mlp', idx=19) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=20) ---\n",
      "Finished LayerSpec(kind='mlp', idx=20) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=21) ---\n",
      "Finished LayerSpec(kind='mlp', idx=21) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=22) ---\n",
      "Finished LayerSpec(kind='mlp', idx=22) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=23) ---\n",
      "Finished LayerSpec(kind='mlp', idx=23) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=24) ---\n",
      "Finished LayerSpec(kind='mlp', idx=24) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=25) ---\n",
      "Finished LayerSpec(kind='mlp', idx=25) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=26) ---\n",
      "Finished LayerSpec(kind='mlp', idx=26) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='mlp', idx=27) ---\n",
      "Finished LayerSpec(kind='mlp', idx=27) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=1) ---\n",
      "Finished LayerSpec(kind='attn', idx=1) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=2) ---\n",
      "Finished LayerSpec(kind='attn', idx=2) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=3) ---\n",
      "Finished LayerSpec(kind='attn', idx=3) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=4) ---\n",
      "Finished LayerSpec(kind='attn', idx=4) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=5) ---\n",
      "Finished LayerSpec(kind='attn', idx=5) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=6) ---\n",
      "Finished LayerSpec(kind='attn', idx=6) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=7) ---\n",
      "Finished LayerSpec(kind='attn', idx=7) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=8) ---\n",
      "Finished LayerSpec(kind='attn', idx=8) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=9) ---\n",
      "Finished LayerSpec(kind='attn', idx=9) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=10) ---\n",
      "Finished LayerSpec(kind='attn', idx=10) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=11) ---\n",
      "Finished LayerSpec(kind='attn', idx=11) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=12) ---\n",
      "Finished LayerSpec(kind='attn', idx=12) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=13) ---\n",
      "Finished LayerSpec(kind='attn', idx=13) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=14) ---\n",
      "Finished LayerSpec(kind='attn', idx=14) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=15) ---\n",
      "Finished LayerSpec(kind='attn', idx=15) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=16) ---\n",
      "Finished LayerSpec(kind='attn', idx=16) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=17) ---\n",
      "Finished LayerSpec(kind='attn', idx=17) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=18) ---\n",
      "Finished LayerSpec(kind='attn', idx=18) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=19) ---\n",
      "Finished LayerSpec(kind='attn', idx=19) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=20) ---\n",
      "Finished LayerSpec(kind='attn', idx=20) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=21) ---\n",
      "Finished LayerSpec(kind='attn', idx=21) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=22) ---\n",
      "Finished LayerSpec(kind='attn', idx=22) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=23) ---\n",
      "Finished LayerSpec(kind='attn', idx=23) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=24) ---\n",
      "Finished LayerSpec(kind='attn', idx=24) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=25) ---\n",
      "Finished LayerSpec(kind='attn', idx=25) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=26) ---\n",
      "Finished LayerSpec(kind='attn', idx=26) (averaged over 100 prompts).\n",
      "\n",
      "--- Analyzing LayerSpec(kind='attn', idx=27) ---\n",
      "Finished LayerSpec(kind='attn', idx=27) (averaged over 100 prompts).\n",
      "\n",
      "Aggregating results for domain: 'cpp_top'...\n",
      "Aggregating results for domain: 'python_top'...\n",
      "--- Finished processing 'cpp-python' ---\n",
      "\n",
      "========================================\n",
      "      ANALYSIS COMPLETE      \n",
      "========================================\n",
      "\n",
      "--- Top 20 of 100 Representative Tokens for 'CPP_TOP' ---\n",
      "['cpp', 'c', ' def', ' c', ' //', ' #', ' public', 'def', 'python', 'C', 'public', ' class', 'class', ' C', 'java', '#', ' import', 'import', 'function', 'void']\n",
      "\n",
      "--- Top 20 of 100 Representative Tokens for 'PYTHON_TOP' ---\n",
      "['python', ' def', 'def', ' #', '#', ' python', 'c', ' class', 'class', ' import', ' c', 'import', 'cpp', 'Python', ' //', 'public', ' ', ' public', '//', 'from']\n"
     ]
    }
   ],
   "source": [
    "from typing import Dict, List, Tuple\n",
    "\n",
    "def find_all_domain_representatives(\n",
    "    config_dict: Dict[str, 'DatasetPairConfig'],\n",
    "    layers_to_probe: List[int],\n",
    "    components_to_probe: List[str],\n",
    "    top_n: int = 100\n",
    ") -> Dict[str, List[str]]:\n",
    "    \"\"\"\n",
    "    Analyzes all dataset pairs in a config dictionary to find the most representative tokens for each domain.\n",
    "\n",
    "    This function orchestrates the entire process:\n",
    "    1. Loops through each `DatasetPairConfig`.\n",
    "    2. Runs the average token shift analysis for that pair.\n",
    "    3. Extracts the top N representative tokens for BOTH domains in the pair.\n",
    "    4. Returns a single dictionary mapping each domain name to its list of tokens.\n",
    "\n",
    "    Args:\n",
    "        config_dict: A dictionary mapping names to DatasetPairConfig objects.\n",
    "        layers_to_probe: A list of layer indices to analyze.\n",
    "        components_to_probe: A list of component names (e.g., ['mlp', 'attn']).\n",
    "        top_n: The number of representative tokens to find for each domain.\n",
    "\n",
    "    Returns:\n",
    "        A dictionary where keys are domain names (e.g., 'cpp', 'python', 'science')\n",
    "        and values are the lists of top_n representative token strings.\n",
    "    \"\"\"\n",
    "    all_domain_tokens = {}\n",
    "\n",
    "    print(f\"Starting analysis for {len(config_dict)} dataset pairs...\")\n",
    "    print(f\"Probing layers: {layers_to_probe} for components: {components_to_probe}\\n\")\n",
    "\n",
    "    for name, config in config_dict.items():\n",
    "        print(f\"--- Processing dataset pair: '{name}' ({config.a.name} vs {config.b.name}) ---\")\n",
    "\n",
    "        # 1. Run the core analysis, averaging over all prompts for this config.\n",
    "        #    We use the default top_k=None to analyze the full vocabulary,\n",
    "        #    ensuring the subsequent aggregation is as accurate as possible.\n",
    "        results_for_pair = analyze_average_token_shifts(\n",
    "            config,\n",
    "            layers_to_probe,\n",
    "            components_to_probe,\n",
    "            top_k=None\n",
    "        )\n",
    "\n",
    "        # 2. Define the intervention keys to extract tokens for each domain.\n",
    "        #    To get tokens for domain 'a', we look at what's promoted when 'a' receives 'b'.\n",
    "        intervention_key_for_a = f\"shift_{config.a.name}_<-_{config.b.name}\"\n",
    "        intervention_key_for_b = f\"shift_{config.b.name}_<-_{config.a.name}\"\n",
    "\n",
    "        # 3. Extract the representative tokens for the first domain in the pair.\n",
    "        print(f\"Aggregating results for domain: '{config.a.name}'...\")\n",
    "        tokens_for_a = get_domain_representative_tokens(\n",
    "            results_for_pair,\n",
    "            intervention_key_for_a,\n",
    "            top_n=top_n\n",
    "        )\n",
    "        all_domain_tokens[config.a.name] = tokens_for_a\n",
    "\n",
    "        # 4. Extract the representative tokens for the second domain in the pair.\n",
    "        print(f\"Aggregating results for domain: '{config.b.name}'...\")\n",
    "        tokens_for_b = get_domain_representative_tokens(\n",
    "            results_for_pair,\n",
    "            intervention_key_for_b,\n",
    "            top_n=top_n\n",
    "        )\n",
    "        all_domain_tokens[config.b.name] = tokens_for_b\n",
    "        print(f\"--- Finished processing '{name}' ---\\n\")\n",
    "\n",
    "    print(\"=\"*40)\n",
    "    print(\"      ANALYSIS COMPLETE      \")\n",
    "    print(\"=\"*40)\n",
    "    return all_domain_tokens\n",
    "\n",
    "# --- EXAMPLE USAGE ---\n",
    "\n",
    "# Assume 'config_dict' is your dictionary of all DatasetPairConfig objects.\n",
    "# For example:\n",
    "# config_dict = {\n",
    "#     'sci-math': science_math_config,\n",
    "#     'code': cpp_python_config,\n",
    "#     'lang': english_french_config\n",
    "# }\n",
    "\n",
    "# Define which parts of the model to analyze.\n",
    "# Analyzing mid-to-late layers is often most informative for abstract concepts.\n",
    "LAYERS_TO_PROBE = range(1, 28)\n",
    "COMPONENTS_TO_PROBE = ['mlp', 'attn']\n",
    "\n",
    "# Run the full analysis\n",
    "representative_tokens_by_domain = find_all_domain_representatives(\n",
    "    config_dict,\n",
    "    layers_to_probe=LAYERS_TO_PROBE,\n",
    "    components_to_probe=COMPONENTS_TO_PROBE,\n",
    "    top_n=300\n",
    ")\n",
    "\n",
    "# # Print the results for inspection\n",
    "for domain, tokens in representative_tokens_by_domain.items():\n",
    "    print(f\"\\n--- Top 20 of 100 Representative Tokens for '{domain.upper()}' ---\")\n",
    "    # Print the first 20 for a quick look\n",
    "    print(tokens[:20])\n",
    "\n",
    "with open(\"token_set.json\", 'w') as f:\n",
    "    json.dump(representative_tokens_by_domain, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ac2b549",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-21T04:02:40.052598Z",
     "iopub.status.busy": "2025-09-21T04:02:40.052364Z",
     "iopub.status.idle": "2025-09-21T04:02:40.056999Z",
     "shell.execute_reply": "2025-09-21T04:02:40.056472Z"
    },
    "papermill": {
     "duration": 0.017107,
     "end_time": "2025-09-21T04:02:40.058012",
     "exception": false,
     "start_time": "2025-09-21T04:02:40.040905",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'cpp_top': [11055, 66, 711, 272, 443, 674, 586, 755, 12958, 34, 898, 538, 1058, 356, 10248, 2, 1179, 475, 1723, 1019, 322, 528, 742, 734, 47316, 720, 47924, 1085, 1012, 738, 220, 10344, 928, 1181, 925, 1799, 396, 8144, 1040, 9842, 333, 2039, 3, 2707, 257, 791, 1416, 31380, 31, 1845, 1527, 128001, 505, 1487, 1674, 422, 1757, 578, 256, 40, 16, 400, 959, 14402, 1872, 55375, 42333, 571, 5688, 2997, 879, 27, 37942, 71, 77, 644, 262, 353, 4077, 87, 1379, 763, 90, 12761, 2580, 79, 5560, 939, 308, 3990, 9, 58, 4724, 767, 2028, 985, 47375, 13325, 2000, 2485, 366, 3788, 22818, 2675, 5040, 69, 1118, 2566, 1686, 1988, 5321, 5618, 7, 320, 6462, 1075, 0, 358, 8872, 4942, 1115, 13798, 1342, 817, 510, 32, 1264, 2020, 369, 1472, 3368, 2465, 2900, 7003, 4110, 1580, 362, 3295, 4, 73, 1701, 10464, 4324, 28121, 14, 7531, 1442, 5830, 4429, 865, 39, 47, 471, 74, 305, 15391, 4194, 16234, 16644, 71742, 1358, 11502, 2033, 3556, 3427, 82, 4815, 9528, 260, 7927, 330, 72, 3350, 10086, 258, 1257, 6403, 2746, 10091, 9290, 1095, 5659, 693, 13809, 50, 3460, 2355, 22098, 1784, 7860, 4719, 5263, 2201, 3979, 64, 88, 7181, 996, 1985, 6245, 5159, 36810, 282, 36888, 4302, 5810, 17, 838, 70, 314, 50560, 1194, 286, 2080, 3081, 1169, 1005, 2127, 3261, 12382, 707, 1583, 15, 1034, 1, 1432, 75, 8754, 6425, 14504, 13617, 30834, 1230, 1374, 482, 4718, 1638, 264, 13, 11087, 8102, 30, 4258, 1062, 27920, 18031, 8169, 8586, 67, 4930, 10267, 33, 12, 16067, 67006, 281, 3810, 856, 19866, 611, 17010, 1418, 1868, 1198, 13178, 13688, 12362, 415, 76426, 3733, 2694, 9442, 1835, 567, 1556, 26992, 4438, 2057, 2360, 22491, 5021, 8366, 16834, 93, 36694, 5207, 1328, 64389, 285, 2822, 1762, 83, 5546, 496, 1505, 6, 85, 5142, 473, 52050], 'python_top': [12958, 711, 755, 674, 2, 10344, 66, 538, 1058, 1179, 272, 475, 11055, 31380, 443, 898, 220, 586, 322, 1527, 10248, 505, 1723, 13325, 47316, 734, 34, 720, 3368, 128001, 256, 1019, 257, 928, 356, 8144, 333, 14402, 9842, 31, 2039, 262, 791, 3, 925, 578, 396, 1012, 1757, 959, 8872, 1040, 1799, 528, 400, 738, 71, 37942, 422, 55375, 87, 644, 12382, 47375, 16, 879, 9, 1160, 763, 1194, 58, 1638, 1416, 12761, 77, 40, 90, 4077, 2707, 27, 742, 42333, 939, 353, 5688, 5040, 1374, 1674, 1379, 32, 2580, 767, 571, 2000, 362, 260, 5321, 7860, 1472, 2675, 567, 320, 308, 3990, 1181, 1580, 5560, 7, 1075, 14, 510, 47, 22818, 985, 7003, 2465, 865, 4, 2485, 1115, 471, 996, 286, 39, 2028, 817, 2566, 5618, 4324, 4815, 369, 1988, 1342, 11502, 1085, 3788, 3350, 4110, 4194, 20761, 1432, 258, 6519, 17010, 79, 18031, 4611, 69, 2900, 1257, 6462, 366, 30, 1845, 5659, 3427, 5830, 7181, 6403, 330, 4942, 3460, 73, 0, 13178, 1985, 5159, 16644, 13798, 3261, 7531, 496, 3556, 2997, 7927, 1784, 9290, 10464, 72, 358, 1442, 47924, 15391, 4930, 2020, 1169, 9528, 1835, 14711, 17, 50, 415, 1701, 64, 28121, 71742, 856, 2355, 3979, 707, 693, 82, 1264, 1095, 2470, 6425, 36810, 75, 88, 305, 27920, 19866, 4718, 19690, 13, 29, 611, 32175, 861, 1761, 8169, 1005, 13617, 4719, 1, 10086, 74, 2127, 13688, 67006, 14500, 264, 1118, 3364, 3733, 1796, 9442, 5207, 2746, 14149, 285, 326, 50560, 695, 473, 13925, 1686, 16067, 3810, 10267, 4933, 4302, 5810, 2256, 1881, 1038, 5263, 310, 8846, 22098, 12362, 14504, 1456, 5021, 6, 64080, 10134, 282, 8754, 3081, 64389, 838, 2057, 38691, 1583, 364, 2080, 1198, 3092, 70, 1328, 15, 6245, 692, 26992, 32121, 64185, 10137, 1505, 8102, 2342, 3915, 37787, 8586, 1893, 16834, 11087, 2360, 1556, 11377]}\n",
      "cpp_top 300\n",
      "python_top 300\n"
     ]
    }
   ],
   "source": [
    "output_dir = \"token_sets\"\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "with open(\"token_sets/tokens.json\", 'w') as f:\n",
    "    json.dump(representative_tokens_by_domain, f)\n",
    "    \n",
    "with open(\"token_sets/tokens.json\", \"r\") as f:\n",
    "    data = json.load(f)\n",
    "    \n",
    "result = {}\n",
    "for dataset, token_list in data.items():\n",
    "    token_id_list = []\n",
    "    for element in token_list:\n",
    "        id = tokenizer.encode(element)[1]\n",
    "        # print(tokenizer.decode(id), id)\n",
    "        token_id_list.append(id)\n",
    "        \n",
    "    result[dataset] = token_id_list\n",
    "    \n",
    "print(result)\n",
    "for dataset, list in data.items():\n",
    "    print(dataset, len(list))\n",
    "    \n",
    "with open(\"token_sets/ids.json\", \"w\") as f:\n",
    "    json.dump(result, f)"
   ]
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "gpu",
   "dataSources": [
    {
     "databundleVersionId": 13806307,
     "datasetId": 8311723,
     "sourceId": 13121174,
     "sourceType": "datasetVersion"
    }
   ],
   "dockerImageVersionId": 31089,
   "isGpuEnabled": true,
   "isInternetEnabled": true,
   "language": "python",
   "sourceType": "notebook"
  },
  "kernelspec": {
   "display_name": "causal",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 19282.508481,
   "end_time": "2025-09-21T04:02:43.611991",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2025-09-20T22:41:21.103510",
   "version": "2.6.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "01232c9b927546c4811883c380dd08c0": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_e3c8c0763247478b854303466e934c6b",
       "placeholder": "​",
       "style": "IPY_MODEL_f73d02e9622f4f8a945995f26ae10121",
       "tabbable": null,
       "tooltip": null,
       "value": "special_tokens_map.json: 100%"
      }
     },
     "05482d47f8694866b421df0f50ce8d72": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "078287aeba524eb880e8ae42c7a2fb3f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "07a2af6dc8884ddd8c5c3ea748144558": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "0b3b6207109d4600b880572c8a50e3f8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "0d33f329944a4c12a46c38fe88ca59a8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_4d84d925ef24471896f552e888459cc2",
        "IPY_MODEL_aa0ede7ddcbd44cc90aa6d22c5b1764a",
        "IPY_MODEL_ffb6cdd492d24effb2f7b20ad5e6d1e7"
       ],
       "layout": "IPY_MODEL_2a48d4cfca1e4bf7bb6b0c9ed76e7575",
       "tabbable": null,
       "tooltip": null
      }
     },
     "14f94398ec1c4fb4850368d1395d841e": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_ebdcc0b2aef54e15b756e8c9a6edb88a",
       "max": 301,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_7a9649f57a22416d87d4dba798b920d5",
       "tabbable": null,
       "tooltip": null,
       "value": 301
      }
     },
     "1900e2a29cfc4a1db6fd0813e2400070": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_7ffe71f5527d4151b21594f1eaa9d691",
        "IPY_MODEL_1a88f86824854382a31d0cbce08d82f0",
        "IPY_MODEL_f126e9833a6c4dd1b8690127bc472d6a"
       ],
       "layout": "IPY_MODEL_078287aeba524eb880e8ae42c7a2fb3f",
       "tabbable": null,
       "tooltip": null
      }
     },
     "19adb09f04e344dbb8b9940b0e320b81": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_36398b64cc6545a79a73622708bfba20",
        "IPY_MODEL_7ff1fe8e8cc2420782cc137d57033060",
        "IPY_MODEL_31caf404808b47cd97a59b6e925d61ee"
       ],
       "layout": "IPY_MODEL_b1121de606cb41c791d58b02a3217028",
       "tabbable": null,
       "tooltip": null
      }
     },
     "1a88f86824854382a31d0cbce08d82f0": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_26b7212d1ca24ae2a98415155699dff0",
       "max": 2,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_5d54726957ff42c0b46dc68f9746869e",
       "tabbable": null,
       "tooltip": null,
       "value": 2
      }
     },
     "1ae05115dfeb43ecb2af67d5a6d811e9": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "1d5bedaa7eea4367be93f010183f69e1": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "1e7f496b37754e99a22e70aa167ce9e2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_6ed7c6b8bd02429ba770d923b582828f",
        "IPY_MODEL_b443968aeae64090aa7d789333ebea00",
        "IPY_MODEL_7e495a3fa2874270967b3b448acec4a3"
       ],
       "layout": "IPY_MODEL_8f73401d11d546ccac80dd7846213425",
       "tabbable": null,
       "tooltip": null
      }
     },
     "213b98140baa46289f5aefca7906446c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_91fc37721a6c49dc8ef5dc94f95ea38d",
       "max": 4965799096,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_f5bdefd281c64bada3b4a0f96d24c7ad",
       "tabbable": null,
       "tooltip": null,
       "value": 4965799096
      }
     },
     "26b7212d1ca24ae2a98415155699dff0": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "2a48d4cfca1e4bf7bb6b0c9ed76e7575": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "2bb1eb31c6624fd181b7597829a0c5a8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "2e60af8cbbf946ca935f4e455f2e3370": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "31caf404808b47cd97a59b6e925d61ee": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_39016139e53143d4981aac6cdb606dc9",
       "placeholder": "​",
       "style": "IPY_MODEL_4847721ecde24c5b96b1c82d3a95887b",
       "tabbable": null,
       "tooltip": null,
       "value": " 1.46G/1.46G [00:39&lt;00:00, 19.2MB/s]"
      }
     },
     "31d477d2c1e3441e803125abf3f30079": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "32498aba8bd948769d6adf849c7a4698": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "347d3c6837cf40dd80463eb92eacbdac": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_9085173b32304b36a1ce011c96e26fbe",
       "placeholder": "​",
       "style": "IPY_MODEL_9368b1d60b9f40e09de02593a6e36d5c",
       "tabbable": null,
       "tooltip": null,
       "value": "generation_config.json: 100%"
      }
     },
     "3536c197ed3b465f8527791380c572c0": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "36398b64cc6545a79a73622708bfba20": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_0b3b6207109d4600b880572c8a50e3f8",
       "placeholder": "​",
       "style": "IPY_MODEL_e98242c5922c4265a200c4b5df99e0de",
       "tabbable": null,
       "tooltip": null,
       "value": "model-00002-of-00002.safetensors: 100%"
      }
     },
     "39016139e53143d4981aac6cdb606dc9": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "390d38120ad2409d9311359f985662cb": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3c3586553f6647c89be8e8714e0401af": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_7f96cb48efc1484e9d8edb3a1c544af4",
        "IPY_MODEL_62d82d79658f45ab80d78934692a64a5",
        "IPY_MODEL_a9e9e70cb1744406a6bd0e4812b7eab0"
       ],
       "layout": "IPY_MODEL_390d38120ad2409d9311359f985662cb",
       "tabbable": null,
       "tooltip": null
      }
     },
     "3d8883cb49ef44088c79ef6f305ebc67": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "4237211dd2ae48228a5284018cbc244f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "441f88e43e7f473ba58a30b3c6ebb447": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "4847721ecde24c5b96b1c82d3a95887b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "4a1aa51251504cd58d78113661ea597a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "4c3a4fc8688c486fb2ec2767342952da": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "4d84d925ef24471896f552e888459cc2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_e02d5aab894241b9abfd866ff358c009",
       "placeholder": "​",
       "style": "IPY_MODEL_8be44ff82a2f4f88a1c4fb2e4663e82f",
       "tabbable": null,
       "tooltip": null,
       "value": "model.safetensors.index.json: 100%"
      }
     },
     "4fe1005c53034bfea49e590f98f1f9c9": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "537a36d7497d47a8b5618f744c2f0b34": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_d31e45f6dba84b61bcda0d75a4c2941c",
       "placeholder": "​",
       "style": "IPY_MODEL_88655d345301482b8c06bbe3f2d87824",
       "tabbable": null,
       "tooltip": null,
       "value": " 185/185 [00:00&lt;00:00, 13.8kB/s]"
      }
     },
     "53c9c9a1085c4305acd27740c2350f24": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "54fa65dc26d744a3959fc1f5ba0a2831": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "5610015596ea469a86c55125c374a03a": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "56b79dcd502146db9d399150e702f53a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_d0ce65b523a14ea08031248bd780a412",
       "max": 9085657,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_ae43992e59b6480d902503fbe539e202",
       "tabbable": null,
       "tooltip": null,
       "value": 9085657
      }
     },
     "5af90f5d75c7437bad5ca655c8d843ec": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_bf1a1048628c46938c6b4598a33ff8a0",
       "placeholder": "​",
       "style": "IPY_MODEL_badb591736864279a682edb4d864cf62",
       "tabbable": null,
       "tooltip": null,
       "value": "tokenizer.json: 100%"
      }
     },
     "5d54726957ff42c0b46dc68f9746869e": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "5e78387eb0a74dddb740b8bb248ab531": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "62d82d79658f45ab80d78934692a64a5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5e78387eb0a74dddb740b8bb248ab531",
       "max": 2,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_c4e03696d5764103a91d2d31e21b0ebe",
       "tabbable": null,
       "tooltip": null,
       "value": 2
      }
     },
     "6bf2f02df1b64a7780175a37df070d32": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "6ed7c6b8bd02429ba770d923b582828f": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_07a2af6dc8884ddd8c5c3ea748144558",
       "placeholder": "​",
       "style": "IPY_MODEL_4fe1005c53034bfea49e590f98f1f9c9",
       "tabbable": null,
       "tooltip": null,
       "value": "config.json: 100%"
      }
     },
     "717407be0e8044a7bb973f18725e63a1": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "72949f5e33a94aee9c8d36cf8d735aeb": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_1ae05115dfeb43ecb2af67d5a6d811e9",
       "max": 185,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_e89627fd4955478eb885eec78598aedf",
       "tabbable": null,
       "tooltip": null,
       "value": 185
      }
     },
     "7569d43e16854ecea44cf81d144086c8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "77fe5289d6df4879a662a85407c7e10b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_6bf2f02df1b64a7780175a37df070d32",
       "max": 50500,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_54fa65dc26d744a3959fc1f5ba0a2831",
       "tabbable": null,
       "tooltip": null,
       "value": 50500
      }
     },
     "78a09fe203194f8a810d64decba69198": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "7a9649f57a22416d87d4dba798b920d5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "7ac77519021c41deacb2aa8bfe396d91": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_2e60af8cbbf946ca935f4e455f2e3370",
       "placeholder": "​",
       "style": "IPY_MODEL_e0dce68c310c41099f07ef5d16bf5848",
       "tabbable": null,
       "tooltip": null,
       "value": " 4.97G/4.97G [00:41&lt;00:00, 221MB/s]"
      }
     },
     "7e495a3fa2874270967b3b448acec4a3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_9d099d94ea0b4f3eb90459d0bb4c6154",
       "placeholder": "​",
       "style": "IPY_MODEL_828802f6200442fab5e186a1b5714eb6",
       "tabbable": null,
       "tooltip": null,
       "value": " 844/844 [00:00&lt;00:00, 105kB/s]"
      }
     },
     "7f8db1a6d1a34e28949518c6a734fad7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_347d3c6837cf40dd80463eb92eacbdac",
        "IPY_MODEL_72949f5e33a94aee9c8d36cf8d735aeb",
        "IPY_MODEL_537a36d7497d47a8b5618f744c2f0b34"
       ],
       "layout": "IPY_MODEL_f0e9a33aa6244f079ff6deac55f43388",
       "tabbable": null,
       "tooltip": null
      }
     },
     "7f96cb48efc1484e9d8edb3a1c544af4": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3536c197ed3b465f8527791380c572c0",
       "placeholder": "​",
       "style": "IPY_MODEL_adfcb71f97354b559abe404ec610962a",
       "tabbable": null,
       "tooltip": null,
       "value": "Fetching 2 files: 100%"
      }
     },
     "7ff1fe8e8cc2420782cc137d57033060": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_c51e71a225464336abcc170c13fa10a5",
       "max": 1459729952,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_e9c748ec5fab4062b4137f168011339e",
       "tabbable": null,
       "tooltip": null,
       "value": 1459729952
      }
     },
     "7ffe71f5527d4151b21594f1eaa9d691": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_d64217bea403414ea9ad229ee6e7d2c8",
       "placeholder": "​",
       "style": "IPY_MODEL_92c79f85b8664163b6be2c0b52c11e58",
       "tabbable": null,
       "tooltip": null,
       "value": "Loading checkpoint shards: 100%"
      }
     },
     "810e9a74e0f343ca9741c33160ed9207": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "828802f6200442fab5e186a1b5714eb6": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "8461da3c664b47fb92b18116a5b690c7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_01232c9b927546c4811883c380dd08c0",
        "IPY_MODEL_14f94398ec1c4fb4850368d1395d841e",
        "IPY_MODEL_e44f926d65ed473bafe5c8be1496baa1"
       ],
       "layout": "IPY_MODEL_862148b9c0a4400e98da81004700673c",
       "tabbable": null,
       "tooltip": null
      }
     },
     "862148b9c0a4400e98da81004700673c": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "87df91cc2d3b42259501c4676715080c": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "88655d345301482b8c06bbe3f2d87824": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "8be44ff82a2f4f88a1c4fb2e4663e82f": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "8f73401d11d546ccac80dd7846213425": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "9085173b32304b36a1ce011c96e26fbe": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "91fc37721a6c49dc8ef5dc94f95ea38d": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "92c79f85b8664163b6be2c0b52c11e58": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "92f2252202b845fbaebfafce3c43b328": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "9368b1d60b9f40e09de02593a6e36d5c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "9d099d94ea0b4f3eb90459d0bb4c6154": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "a327c102bd2d487290a0e610c97fb4a3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_dc9067c01285494da3f77ed08b677dbe",
        "IPY_MODEL_213b98140baa46289f5aefca7906446c",
        "IPY_MODEL_7ac77519021c41deacb2aa8bfe396d91"
       ],
       "layout": "IPY_MODEL_bf3bc3d7251143528bd8c7e4d47e033b",
       "tabbable": null,
       "tooltip": null
      }
     },
     "a47250f913444e80b668ff56055bd46e": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "a713a71e0dd7471c96ec7bbf0bf66daf": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5610015596ea469a86c55125c374a03a",
       "placeholder": "​",
       "style": "IPY_MODEL_ed2d5f93449f411a96a68bd6bebd50db",
       "tabbable": null,
       "tooltip": null,
       "value": "tokenizer_config.json: 100%"
      }
     },
     "a9e9e70cb1744406a6bd0e4812b7eab0": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_92f2252202b845fbaebfafce3c43b328",
       "placeholder": "​",
       "style": "IPY_MODEL_78a09fe203194f8a810d64decba69198",
       "tabbable": null,
       "tooltip": null,
       "value": " 2/2 [00:41&lt;00:00, 41.82s/it]"
      }
     },
     "aa0ede7ddcbd44cc90aa6d22c5b1764a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_cf1b95bf38b54e89ac32326e94af5361",
       "max": 20919,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_1d5bedaa7eea4367be93f010183f69e1",
       "tabbable": null,
       "tooltip": null,
       "value": 20919
      }
     },
     "adfcb71f97354b559abe404ec610962a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "ae43992e59b6480d902503fbe539e202": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "b1121de606cb41c791d58b02a3217028": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "b443968aeae64090aa7d789333ebea00": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_31d477d2c1e3441e803125abf3f30079",
       "max": 844,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_4a1aa51251504cd58d78113661ea597a",
       "tabbable": null,
       "tooltip": null,
       "value": 844
      }
     },
     "badb591736864279a682edb4d864cf62": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "bf1a1048628c46938c6b4598a33ff8a0": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "bf3bc3d7251143528bd8c7e4d47e033b": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "c09ef532e4f54e13ae56bb5155bbaf98": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_7569d43e16854ecea44cf81d144086c8",
       "placeholder": "​",
       "style": "IPY_MODEL_3d8883cb49ef44088c79ef6f305ebc67",
       "tabbable": null,
       "tooltip": null,
       "value": " 50.5k/50.5k [00:00&lt;00:00, 5.79MB/s]"
      }
     },
     "c4e03696d5764103a91d2d31e21b0ebe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "c51e71a225464336abcc170c13fa10a5": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cf1b95bf38b54e89ac32326e94af5361": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d0ce65b523a14ea08031248bd780a412": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d31e45f6dba84b61bcda0d75a4c2941c": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d64217bea403414ea9ad229ee6e7d2c8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "dc9067c01285494da3f77ed08b677dbe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_87df91cc2d3b42259501c4676715080c",
       "placeholder": "​",
       "style": "IPY_MODEL_53c9c9a1085c4305acd27740c2350f24",
       "tabbable": null,
       "tooltip": null,
       "value": "model-00001-of-00002.safetensors: 100%"
      }
     },
     "e02d5aab894241b9abfd866ff358c009": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e0833c488e8f4658b5646d13f8e00193": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_5af90f5d75c7437bad5ca655c8d843ec",
        "IPY_MODEL_56b79dcd502146db9d399150e702f53a",
        "IPY_MODEL_f15b6d26988b4ec8a9a4575e239b9139"
       ],
       "layout": "IPY_MODEL_a47250f913444e80b668ff56055bd46e",
       "tabbable": null,
       "tooltip": null
      }
     },
     "e0dce68c310c41099f07ef5d16bf5848": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "e3c8c0763247478b854303466e934c6b": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e44f926d65ed473bafe5c8be1496baa1": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_717407be0e8044a7bb973f18725e63a1",
       "placeholder": "​",
       "style": "IPY_MODEL_441f88e43e7f473ba58a30b3c6ebb447",
       "tabbable": null,
       "tooltip": null,
       "value": " 301/301 [00:00&lt;00:00, 38.7kB/s]"
      }
     },
     "e89627fd4955478eb885eec78598aedf": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "e98242c5922c4265a200c4b5df99e0de": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "e9c748ec5fab4062b4137f168011339e": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "ebdcc0b2aef54e15b756e8c9a6edb88a": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "ed2d5f93449f411a96a68bd6bebd50db": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "ed5d8298bca447a0b17317b62e92dd86": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_a713a71e0dd7471c96ec7bbf0bf66daf",
        "IPY_MODEL_77fe5289d6df4879a662a85407c7e10b",
        "IPY_MODEL_c09ef532e4f54e13ae56bb5155bbaf98"
       ],
       "layout": "IPY_MODEL_f6540ec6c8c64b9db05f69129e995ebf",
       "tabbable": null,
       "tooltip": null
      }
     },
     "f0e9a33aa6244f079ff6deac55f43388": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "f126e9833a6c4dd1b8690127bc472d6a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_4237211dd2ae48228a5284018cbc244f",
       "placeholder": "​",
       "style": "IPY_MODEL_05482d47f8694866b421df0f50ce8d72",
       "tabbable": null,
       "tooltip": null,
       "value": " 2/2 [00:05&lt;00:00,  2.71s/it]"
      }
     },
     "f15b6d26988b4ec8a9a4575e239b9139": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_32498aba8bd948769d6adf849c7a4698",
       "placeholder": "​",
       "style": "IPY_MODEL_2bb1eb31c6624fd181b7597829a0c5a8",
       "tabbable": null,
       "tooltip": null,
       "value": " 9.09M/9.09M [00:00&lt;00:00, 26.3MB/s]"
      }
     },
     "f5bdefd281c64bada3b4a0f96d24c7ad": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "f6540ec6c8c64b9db05f69129e995ebf": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "f73d02e9622f4f8a945995f26ae10121": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "ffb6cdd492d24effb2f7b20ad5e6d1e7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_4c3a4fc8688c486fb2ec2767342952da",
       "placeholder": "​",
       "style": "IPY_MODEL_810e9a74e0f343ca9741c33160ed9207",
       "tabbable": null,
       "tooltip": null,
       "value": " 20.9k/20.9k [00:00&lt;00:00, 2.69MB/s]"
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
