{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08061c02",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import os\n",
    "from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score, silhouette_score\n",
    "from collections import Counter\n",
    "import numpy as np\n",
    "\n",
    "import sys\n",
    "igloo_path = os.path.abspath(os.path.join(os.getcwd(), \"..\", \"..\"))\n",
    "print(f\"Adding {igloo_path} to sys.path\")\n",
    "sys.path.append(igloo_path)\n",
    "from model.vqvae import VQVAE\n",
    "from dataset import LoopSequenceDataset\n",
    "from dataset import Alphabet, proteinseq_toks\n",
    "from evals.metrics import eval_clusters, dihedral_distance_pairwise, eval_clusters_length_independent#"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "648002b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "loop_df = pd.read_parquet(\"preprocessed_data/sabdab_2025-05-06-paired_loops_with_sequence_id.parquet\")\n",
    "loop_df['loop_id'] = loop_df.apply(lambda x: f\"{x['sabdab_id']}_{x['loop_type']}\", axis=1)\n",
    "loop_to_canonical = loop_df.set_index('loop_id')['assigned_cluster'].to_dict()\n",
    "loop_to_canonical_strict = loop_df.set_index('loop_id')['assigned_cluster_D=0.1'].to_dict()\n",
    "loop_to_canonical_ssc_comparison = loop_df.set_index('loop_id')['assigned_cluster_D=0.61'].to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "42cda11b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Codebook size: 8192\n",
      "Number of parameters: 1932703\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "VQVAE(\n",
       "  (encoder): LoopTransformer(\n",
       "    (embed_tokens): Embedding(25, 128, padding_idx=3)\n",
       "    (dihedral_projection): Linear(in_features=6, out_features=128, bias=True)\n",
       "    (layers): ModuleList(\n",
       "      (0-3): 4 x TransformerLayer(\n",
       "        (self_attn): MultiheadAttention(\n",
       "          (k_proj): Linear(in_features=128, out_features=128, bias=True)\n",
       "          (v_proj): Linear(in_features=128, out_features=128, bias=True)\n",
       "          (q_proj): Linear(in_features=128, out_features=128, bias=True)\n",
       "          (out_proj): Linear(in_features=128, out_features=128, bias=True)\n",
       "          (rot_emb): RotaryEmbedding()\n",
       "        )\n",
       "        (self_attn_layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "        (fc1): Linear(in_features=128, out_features=512, bias=True)\n",
       "        (fc2): Linear(in_features=512, out_features=128, bias=True)\n",
       "        (final_layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "      )\n",
       "    )\n",
       "    (emb_layer_norm_after): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "    (lm_head): RobertaLMHead(\n",
       "      (dense): Linear(in_features=128, out_features=128, bias=True)\n",
       "      (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "    )\n",
       "    (dihedral_decoder): Sequential(\n",
       "      (0): Linear(in_features=128, out_features=128, bias=True)\n",
       "      (1): ReLU()\n",
       "      (2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "      (3): Linear(in_features=128, out_features=6, bias=True)\n",
       "    )\n",
       "    (context_encoder): Linear(in_features=408, out_features=128, bias=True)\n",
       "  )\n",
       "  (quantizer): VectorQuantize(\n",
       "    (project_in): Identity()\n",
       "    (project_out): Identity()\n",
       "    (_codebook): EuclideanCodebook()\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_ckpt = \"../../checkpoints/igloo_weights.pt\" \n",
    "model_config = \"../../checkpoints/igloo_config.json\"\n",
    "\n",
    "model = VQVAE.load_from_config_and_weights(model_config, model_ckpt, strict=False)\n",
    "print(\"Codebook size:\", model.codebook_size)\n",
    "print(\"Number of parameters:\", sum(p.numel() for p in model.parameters() if p.requires_grad))\n",
    "model.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ccb2a53b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset size: test=7442, train=108167, val=7090\n",
      "Number of unique sequences: test=2041, train=17820, val=1899\n",
      "Test, val overlap: 0 sequences\n",
      "Test, train overlap: 0 sequences\n",
      "Train, val overlap: 0 sequences\n"
     ]
    }
   ],
   "source": [
    "USE_CONTEXT = False\n",
    "USE_H3 = True\n",
    "\n",
    "dataset_path_suffix = \".jsonl\" if USE_H3 else \"_no_H3.jsonl\"\n",
    "context_path = \"preprocessed_data/sabdab_2025-05-06-paired_chains_lobster_24M_representations.parquet\"\n",
    "test_dataset = LoopSequenceDataset(f\"data/test_loop_len_all_seed_42{dataset_path_suffix}\", max_length=36, context_path=context_path if USE_CONTEXT else None)\n",
    "test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=test_dataset.collate_fn)\n",
    "train_dataset = LoopSequenceDataset(f\"data/train_loop_len_all_seed_42{dataset_path_suffix}\", max_length=36, context_path=context_path if USE_CONTEXT else None)\n",
    "train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False, collate_fn=train_dataset.collate_fn)\n",
    "val_dataset = LoopSequenceDataset(f\"data/val_loop_len_all_seed_42{dataset_path_suffix}\", max_length=36, context_path=context_path if USE_CONTEXT else None)\n",
    "val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=val_dataset.collate_fn)\n",
    "print(f\"Dataset size: test={len(test_dataset)}, train={len(train_dataset)}, val={len(val_dataset)}\")\n",
    "\n",
    "alphabet = Alphabet(standard_toks=proteinseq_toks)\n",
    "\n",
    "test_sequences = set([x['loop_sequence'] for x in test_dataset.data])\n",
    "train_sequences = set([x['loop_sequence'] for x in train_dataset.data])\n",
    "val_sequences = set([x['loop_sequence'] for x in val_dataset.data])\n",
    "print(f\"Number of unique sequences: test={len(test_sequences)}, train={len(train_sequences)}, val={len(val_sequences)}\")\n",
    "print(f\"Test, val overlap: {len(test_sequences.intersection(val_sequences))} sequences\")\n",
    "print(f\"Test, train overlap: {len(test_sequences.intersection(train_sequences))} sequences\")\n",
    "print(f\"Train, val overlap: {len(train_sequences.intersection(val_sequences))} sequences\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "73072643",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All dataset size: 122699\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3835/3835 [03:46<00:00, 16.92it/s]\n"
     ]
    }
   ],
   "source": [
    "all_dataset = LoopSequenceDataset(data_path=None, max_length=36)\n",
    "all_dataset.data = test_dataset.data + train_dataset.data + val_dataset.data\n",
    "all_dataloader = DataLoader(all_dataset, batch_size=32, shuffle=False, collate_fn=all_dataset.collate_fn)\n",
    "print(f\"All dataset size: {len(all_dataset)}\")\n",
    "\n",
    "dataset = all_dataset\n",
    "dataloader = all_dataloader\n",
    "dataset.inference = True\n",
    "\n",
    "loss_fn = torch.nn.MSELoss()\n",
    "all_quantized_indices = []\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "model = model.to(device)\n",
    "model.eval()\n",
    "with torch.no_grad():\n",
    "    for batch in tqdm(dataloader, total=len(dataloader)):\n",
    "        for key in batch:\n",
    "            if isinstance(batch[key], torch.Tensor):\n",
    "                if key == 'id':\n",
    "                    continue\n",
    "                batch[key] = batch[key].to(device)\n",
    "        output = model(batch, val=True)\n",
    "        all_quantized_indices.append(output.quantized_indices.detach().cpu())\n",
    "all_quantized_indices = torch.cat(all_quantized_indices, dim=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6a62656b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3835/3835 [05:14<00:00, 12.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Masked AA Recovery: 0.6994\n",
      "Masked AA Recovery for loop type L1: 0.7929\n",
      "Masked AA Recovery for loop type L2: 0.7083\n",
      "Masked AA Recovery for loop type L3: 0.6529\n",
      "Masked AA Recovery for loop type L4: 0.885\n",
      "Masked AA Recovery for loop type H1: 0.7592\n",
      "Masked AA Recovery for loop type H2: 0.6515\n",
      "Masked AA Recovery for loop type H3: 0.5573\n",
      "Masked AA Recovery for loop type H4: 0.7583\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAARRpJREFUeJzt3Xl8TGf///H3RGQTiSXEFiIotSt37NwqrVZtXdTWhhR1U1u1VbtSe229SymKtqhouXVxV6sqpfatyrfKTak1RFViaYPk+v3RX6amSZiJiYnj9Xw85vEw17nOOZ9rMsm8nXOdMzZjjBEAAIBFeHm6AAAAAHci3AAAAEsh3AAAAEsh3AAAAEsh3AAAAEsh3AAAAEsh3AAAAEsh3AAAAEsh3AAAAEsh3ACZiIuLk81m08cff5yt+wkPD1eXLl2ydR/wnKNHj8pms2ny5MmeLgW4ZxBukCMtXLhQNptNNptN3333XbrlxhiFhYXJZrOpRYsWHqjQcy5cuCA/Pz/ZbDbt37//lv33798vm80mPz8/Xbhwwen9vPbaa/afgc1mU+7cuRUeHq6+ffu6tB3cGWm/Mzt27PB0KU658b11s0dcXJynS8VdyNvTBQA34+fnpyVLlqhBgwYO7d9++61OnDghX19fD1XmOR999JFsNpuKFCmixYsXa8yYMTftv2jRIhUpUkS//fabPv74Y3Xr1s2l/c2aNUuBgYG6fPmy1q5dq7feeku7du3KMHQCzvrggw8cnr///vtas2ZNuvb777//TpYFiyDcIEdr3ry5PvroI/373/+Wt/dfb9clS5aoZs2aOnfunAer84xFixapefPmKlWqlJYsWXLTcGOM0ZIlS9SxY0cdOXJEixcvdjncPPXUUwoJCZEk9ejRQ+3bt1dsbKy2bdumyMjI2xpLTnf9+nWlpqbKx8fH06VYzjPPPOPwfMuWLVqzZk26diArOC2FHK1Dhw769ddftWbNGnvb1atX9fHHH6tjx44ZrjN58mTVq1dPBQsWlL+/v2rWrJnhvJk1a9aoQYMGypcvnwIDA1W+fHkNGTLkpvUkJyerRYsWCg4O1qZNmyRJqampmj59uipVqiQ/Pz+FhoaqR48e+u233xzWNcZozJgxKlGihAICAtSkSRP93//9n0uvx7Fjx7Rhwwa1b99e7du315EjR+x1ZGTjxo06evSovf/69et14sQJl/b5dw0bNpQkHT582KF969ateuSRRxQcHKyAgAA1btxYGzduTLf+yZMn1bVrVxUrVky+vr4qXbq0evbsqatXr9r7/Pzzz2rbtq0KFCiggIAA1alTR6tWrbIvP3PmjLy9vTVq1Kh02z9w4IBsNptmzJhhb7tw4YL69++vsLAw+fr6qmzZspo4caJSU1PtfW6cGzN9+nSVKVNGvr6+2rZtm/LkyaN+/fql29eJEyeUK1cujR8/3qnXbtq0aSpVqpT8/f3VuHFj7du3z75swYIFstls2r17d7r1xo0bp1y5cunkyZNO7edmdu/erUcffVRBQUEKDAxU06ZNtWXLlnT9bvUzkP6alxYbG6shQ4aoSJEiypMnj1q1aqXjx4/fVp2dO3dWSEiIrl27lm7Zww8/rPLly9uf22w29e7dW4sXL1b58uXl5+enmjVrav369enWPXnypJ577jmFhobK19dXlSpV0vz582+rVuRABsiBFixYYCSZ7du3m3r16plnn33WvmzlypXGy8vLnDx50pQqVco89thjDuuWKFHC9OrVy8yYMcNMnTrVREZGGknm888/t/fZt2+f8fHxMbVq1TJvvvmmmT17tnn55ZdNo0aN7H3WrVtnJJmPPvrIGGPMlStXzEMPPWTy589vtm3bZu/XrVs34+3tbbp3725mz55tXn31VZMnTx7zj3/8w1y9etXeb9iwYUaSad68uZkxY4Z57rnnTLFixUxISIjp3LmzU6/LhAkTTGBgoLly5YoxxpgyZcqYXr16Zdr/X//6lylTpoy9/sDAQDNp0iSn9jVy5EgjySQkJDi0v/zyy0aS+eKLL+xta9euNT4+PqZu3bpmypQpZtq0aaZq1arGx8fHbN261d7v5MmTplixYiYgIMD079/fzJ492wwfPtzcf//95rfffjPGGBMfH29CQ0NN3rx5zdChQ83UqVNNtWrVjJeXl1mxYoV9Ww8++KCpWLFiurpHjRplcuXKZeLj440xxly+fNlUrVrVFCxY0AwZMsTMnj3bREdHG5vNZvr162df78iRI0aSqVixoomIiDATJkww06ZNM7/88ovp1KmTCQ0NNdevX3fY16RJk4zNZjO//PJLpq9j2narVKliwsPDzcSJE82oUaNMgQIFTKFChex1JiUlGX9/f/PSSy+l20bFihXNgw8+mOk+jHH8ncnMvn37TJ48eUzRokXN66+/biZMmGBKly5tfH19zZYtW+z9nP0ZpP2OVKlSxVStWtVMnTrVDBo0yPj5+Zn77rvP/j51xgsvvGBu/Ehas2aNkWQ+++wzh36nT582uXLlMqNHj7a3STKVK1c2ISEhZvTo0WbixImmVKlSxt/f3+zdu9dhXCVKlDBhYWFm9OjRZtasWaZVq1ZGkpk2bZrTtSLnI9wgR7rxD/WMGTNM3rx57X8o27Zta5o0aWKMMRmGm7//Qb169aqpXLmyw4fDtGnTMvzgvtGN4ebixYumcePGJiQkxOzevdveZ8OGDUaSWbx4scO6q1evdmg/e/as8fHxMY899phJTU219xsyZIiR5HS4qVKliunUqZPD+iEhIebatWvp+l69etUULFjQDB061N7WsWNHU61aNaf2lRZuDhw4YBISEszRo0fN/Pnzjb+/vylUqJC5fPmyMcaY1NRUU65cOdOsWTOHsV25csWULl3aPPTQQ/a26Oho4+XlleEHcNq6/fv3N5LMhg0b7MsuXrxoSpcubcLDw01KSooxxph33nnHSHL48DImfRB4/fXXTZ48eczBgwcd+g0aNMjkypXLHDt2zBjzVwgJCgoyZ8+edej75Zdfpgt0xhhTtWpV07hx45u+jmnb9ff3NydOnLC3b9261UgyL774or2tQ4cOplixYvYxGmPMrl27jCSzYMGCm+7HmXDTpk0b4+PjYw4fPmxvO3XqlMmbN69DsHf2Z5D2O1K8eHGTlJRk77ts2TIjybz55ps3rflGfw83KSkppkSJEqZdu3YO/aZOnWpsNpv5+eef7W2SjCSzY8cOe9svv/xi/Pz8zOOPP25v69q1qylatKg5d+6cwzbbt29vgoODXQpjyNkIN8iRbvxDffbsWePt7W2WLVtm/9/t3LlzjTEZh5sbnT9/3iQkJJiePXuafPnypdv+vHnzHD5IbpT2h3vevHmmbt26JjQ01Ozbt8+hT9++fU1wcLA5e/asSUhIcHgEBgaabt26GWOMWbJkiZFkVq9e7bD+2bNnnQ43e/bsSXcEau/evena0nzyySdGkkPNn332Wbq2zKSFm78/qlSp4vAhkvbh+95776V7Dbp162Z8fX1NSkqKSUlJMUFBQaZ169Y33e99991nIiMj07WPHz/eIcwkJCQYb29vM2zYsHSvxzvvvGNvq1q1qnnkkUfS1fb1118bSWbRokXGmL9CSExMTLp9p6SkmGLFiplnnnkm3b7S3ouZSdtuhw4d0i2rXbu2KV++vP35F198YSSZr7/+2t720ksvGX9/f4fwkJFbhZvr16+bgIAA8/TTT6db1qNHD+Pl5WUSExONMc7/DNJ+RwYPHuzQLzU11RQtWtQ0a9bspjXf6O/hxhhjXn311XRjr1mzpqlfv75DP0mmbt266bbZrl07ExAQYK5fv25SU1NNvnz5zPPPP5/uvZD22n333XdO14ucjTk3yPEKFSqkqKgoLVmyRCtWrFBKSoqeeuqpTPt//vnnqlOnjvz8/FSgQAEVKlRIs2bNUmJior1Pu3btVL9+fXXr1k2hoaFq3769li1b5jAHI03//v21fft2ff3116pUqZLDsv/9739KTExU4cKFVahQIYfHpUuXdPbsWUnSL7/8IkkqV65curHlz5/fqddh0aJFypMnjyIiInTo0CEdOnRIfn5+Cg8P1+LFizPsX7p0afn6+tr7lylTRgEBARn2z8zy5cu1Zs0aLVmyRHXq1NHZs2fl7+/v8BpIf86R+PtrMG/ePCUnJysxMVEJCQlKSkpS5cqVb7q/X375xWE+RZq0q2bSXsuQkBA1bdpUy5Yts/eJjY2Vt7e3nnjiCYf6Vq9ena62qKgoSbL/jNKULl063b69vLzUqVMnrVy5UleuXJEkLV68WH5+fmrbtu1Nx5Pm7z97Sbrvvvt09OhR+/OHHnpIRYsWtf98UlNT9eGHH6p169bKmzevU/vJTEJCgq5cuZLpa5uammqfJ+PszyDN38dms9lUtmxZh7FlRXR0tH7//Xf95z//kfTnfKqdO3fq2WefTdc3s9f3ypUrSkhIUEJCgi5cuKA5c+akey/ExMRISv9ewN2Lq6VwV+jYsaO6d++u+Ph4Pfroo8qXL1+G/TZs2KBWrVqpUaNGevvtt1W0aFHlzp1bCxYs0JIlS+z9/P39tX79eq1bt06rVq3S6tWrFRsbqwcffFBfffWVcuXKZe/bunVrLV26VBMmTND7778vL6+//k+QmpqqwoULZxoWChUq5JbxG2P04Ycf6vLly6pYsWK65WfPntWlS5cUGBgoSUpKStJnn32mP/74I8M/+kuWLNHYsWNls9luue9GjRrZr5Zq2bKlqlSpok6dOmnnzp3y8vKyB8I33nhD1atXz3AbgYGBOn/+vLPDdVr79u0VExOj77//XtWrV9eyZcvUtGlTe73Snz+jhx56SAMHDsxwG/fdd5/D8xuD242io6P1xhtvaOXKlerQoYOWLFlin1zuLrly5VLHjh01d+5cvf3229q4caNOnTp1z15BVLFiRdWsWVOLFi1SdHS0Fi1aJB8fHz399NMubyvtffrMM8+oc+fOGfapWrXqbdWLnINwg7vC448/rh49emjLli2KjY3NtN/y5cvl5+enL7/80uEeOAsWLEjX18vLS02bNlXTpk01depUjRs3TkOHDtW6devs/6uXpDZt2ujhhx9Wly5dlDdvXs2aNcu+rEyZMvr6669Vv379TD8UJalUqVKS/jyKEBERYW9PSEhId1VVRtLu6zN69Oh09/347bff9Pzzz2vlypX2D8EVK1bojz/+0KxZsxw+6KU///c7bNgwbdy4Md39g24lMDBQI0eOVExMjJYtW6b27durTJkykqSgoCCH1+3vChUqpKCgIIcrhDJSqlQpHThwIF37Tz/9ZF+epk2bNurRo4f9PXHw4EENHjzYYb0yZcro0qVLN63NGZUrV1aNGjW0ePFilShRQseOHdNbb73l9PppR7hudPDgQYWHhzu0RUdHa8qUKfrss8/0xRdfqFChQmrWrNlt1S79+foHBARk+tp6eXkpLCxMkms/Ayn92IwxOnTokFvCQnR0tAYMGKDTp09ryZIleuyxxzI82pnZ6xsQEGD/T0bevHmVkpJy2+8F3AU8fV4MyEhG8wcWLlxoXnvtNYdJf3+fczNgwAATEBBgn+xqzJ9zHgICAhzO5//666/p9rlq1SqH+St/v1rqrbfeMpLMwIED7evExcVlOOfAGGOuXbtmvwLo7NmzJnfu3FmeUNy1a1eTJ08e8/vvv2e4vFy5cuaRRx6xP2/atKmJiIjIsO8ff/xhAgMDzb/+9a+b7jOzq6WuXr1qSpQoYapXr26M+XM+SpkyZUy5cuXMxYsX023nxsm5rkwo3rRpk33ZpUuXTEREhMNk1jQtW7Y0ERER5tVXXzU+Pj721zzNa6+9luF8J2OM+e233+yTsdPmxrzxxhuZviZTp0413t7e5vHHHzcFCxZ0uBouM7eaUNy/f/9061StWtU8/PDDJigoyPTp0+eW+zDG+QnFvr6+5siRI/a2+Ph4ExQUlOGE4lv9DG41oXj69OlO1W5MxnNujDH2OXdt27Y1kszy5cvT9dH/nw+2c+dOe9uxY8eMn5+fadOmjb2tS5cuxsfHJ90k9LT9wDoIN8iRnPlDbUz6cLN27VojyTRs2NDMmjXLjBo1yhQuXNhUrVrV4Q9nv379TI0aNcywYcPM3LlzzdixY03x4sVNiRIlzIULF4wx6cONMcaMHTvWSDJjx461t/Xo0cNIMo8++qiZNm2amTFjhunXr58pVqyYw7qDBw92uBS8a9euTl0K/scff5h8+fI5/JH+u5deesl4e3ubM2fOmJMnTxovL68MPzTTPPnkk7f8cM4s3BhjzBtvvOFw9dC6deuMn5+fKVmypBk5cqSZM2eOGTlypGnUqJFp0aKFfb0TJ06YIkWK2C8Ff+edd8xrr71mKlWqlO5S8ODgYDN8+HAzbdo0U716dWOz2RwuQ06zaNEiI8nkzZvXtGzZMt3yy5cvmwceeMB4e3ubbt26mVmzZpnJkyebzp07mzx58tjH50y4iY+PN97e3kaS6dmzZ6b9bpTRpeCjR482BQoUMAULFjSnTp1Kt87kyZPtH9g3Xkp/M2m/Mz179jSvv/56ukdSUpL9UvDixYubsWPHmokTJ5qIiIhMLwW/1c/g75eCT5s2zX4peNmyZR3+k3ErmYUbY4xp0aKFkWTy5ctn/vjjj3TLlcml4H5+fmbPnj0O4ypVqpQJCAgw/fr1M++8844ZP368adu2rcmfP7/TtSLnI9wgR8pquDHGmHfffdeUK1fO+Pr6mgoVKpgFCxbYP6jTrF271rRu3doUK1bM+Pj4mGLFipkOHTo4XC6cUbgxxpiBAwcaSWbGjBn2tjlz5piaNWsaf39/kzdvXlOlShUzcOBAhw+ulJQUM2rUKFO0aFHj7+9v/vnPf5p9+/aZUqVK3TTcLF++3Egy7777bqZ90o4gvfnmm2bKlClGklm7dm2m/RcuXGgkmU8++STTPjcLN4mJiSY4ONjhMujdu3ebJ554whQsWND4+vqaUqVKmaeffjpdHb/88ouJjo42hQoVMr6+viYiIsK88MILJjk52d7n8OHD5qmnnjL58uUzfn5+JjIyMsMrwoz56/4wuuHKp7+7ePGiGTx4sClbtqzx8fExISEhpl69emby5Mn2gOdMuDHGmObNm6c7qnEzN253ypQpJiwszPj6+pqGDRs6fPDeKO1eLvfdd59T+zDmr9+ZzB7Hjx83xvx5dVuzZs1MYGCgCQgIME2aNMlwLM78DNJ+Rz788EMzePBgU7hwYePv728ee+yxm977JyM3CzdpR4Kef/75DJdLMi+88IJZtGiR/Xe/Ro0aZt26den6njlzxrzwwgsmLCzM5M6d2xQpUsQ0bdrUzJkzx6V6kbPZjDHmts9tAcA94vHHH9fevXt16NChbNvHuXPnVLRoUY0YMULDhw/Ptv3crri4ODVp0kQfffTRTa9gvF2ffPKJ2rRpo/Xr19vvkH0jm82mF154weGu1Li3cSk4ADjp9OnTWrVqVYaXIrvTwoULlZKSku37uVvMnTtXERERLk+Ax72Lq6UA4BaOHDmijRs3at68ecqdO7d69OiRLfv55ptv9OOPP2rs2LFq06ZNuiup7jVLly7VDz/8oFWrVunNN9906tYFgES4AYBb+vbbbxUTE6OSJUvqvffeU5EiRbJlP6NHj9amTZtUv359ly4zt6oOHTooMDBQXbt2Va9evTxdDu4izLkBAACWwpwbAABgKYQbAABgKffcnJvU1FSdOnVKefPmZXIaAAB3CWOMLl68qGLFijl8x19G7rlwc+rUKfv3pwAAgLvL8ePHVaJEiZv2uefCTd68eSX9+eIEBQV5uBoAAOCMpKQkhYWF2T/Hb+aeCzdpp6KCgoIINwAA3GWcmVLChGIAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAphBsAAGAp3p4uAIBnhQ9a5ekSsuTohMc8XQKAHIojNwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFIINwAAwFI8Hm5mzpyp8PBw+fn5qXbt2tq2bdtN+0+fPl3ly5eXv7+/wsLC9OKLL+qPP/64Q9UCAICczqPhJjY2VgMGDNDIkSO1a9cuVatWTc2aNdPZs2cz7L9kyRINGjRII0eO1P79+/Xuu+8qNjZWQ4YMucOVAwCAnMqj4Wbq1Knq3r27YmJiVLFiRc2ePVsBAQGaP39+hv03bdqk+vXrq2PHjgoPD9fDDz+sDh063PJoDwAAuHd4LNxcvXpVO3fuVFRU1F/FeHkpKipKmzdvznCdevXqaefOnfYw8/PPP+u///2vmjdvfkdqBgAAOZ+3p3Z87tw5paSkKDQ01KE9NDRUP/30U4brdOzYUefOnVODBg1kjNH169f1r3/966anpZKTk5WcnGx/npSU5J4BAACAHMnjE4pdERcXp3Hjxuntt9/Wrl27tGLFCq1atUqvv/56puuMHz9ewcHB9kdYWNgdrBgAANxpHjtyExISoly5cunMmTMO7WfOnFGRIkUyXGf48OF69tln1a1bN0lSlSpVdPnyZT3//PMaOnSovLzSZ7XBgwdrwIAB9udJSUkEHAAALMxj4cbHx0c1a9bU2rVr1aZNG0lSamqq1q5dq969e2e4zpUrV9IFmFy5ckmSjDEZruPr6ytfX1/3FX4L4YNW3bF9udPRCY95ugQAANzCY+FGkgYMGKDOnTurVq1aioyM1PTp03X58mXFxMRIkqKjo1W8eHGNHz9ektSyZUtNnTpVNWrUUO3atXXo0CENHz5cLVu2tIccAABwb/NouGnXrp0SEhI0YsQIxcfHq3r16lq9erV9kvGxY8ccjtQMGzZMNptNw4YN08mTJ1WoUCG1bNlSY8eO9dQQAABADmMzmZ3PsaikpCQFBwcrMTFRQUFBbt8+p6Vwt+E9C+Bu4Mrn9111tRQAAMCtEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClePS7pQDgTuFrJnC3uVvfs5Ln37ccuQEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJZCuAEAAJbi7ekCcHcKH7TK0yVkydEJj3m6BABANuPIDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBRvTxcA5FThg1Z5uoQsOzrhMU+XAAAeQ7gBAAu5W0M5gRzuxGkpAABgKYQbAABgKS6Hm86dO2v9+vXZUQsAAMBtczncJCYmKioqSuXKldO4ceN08uTJ7KgLAAAgS1wONytXrtTJkyfVs2dPxcbGKjw8XI8++qg+/vhjXbt2LTtqBAAAcFqW5twUKlRIAwYM0J49e7R161aVLVtWzz77rIoVK6YXX3xR//vf/9xdJwAAgFNua0Lx6dOntWbNGq1Zs0a5cuVS8+bNtXfvXlWsWFHTpk1zV40AAABOczncXLt2TcuXL1eLFi1UqlQpffTRR+rfv79OnTql9957T19//bWWLVum0aNHZ0e9AAAAN+XyTfyKFi2q1NRUdejQQdu2bVP16tXT9WnSpIny5cvnhvIAAABc43K4mTZtmtq2bSs/P79M++TLl09Hjhy5rcIAAACywqXTUteuXVNMTIwOHTqUXfUAAADcFpfCTe7cuVWyZEmlpKRkVz0AAAC3xeUJxUOHDtWQIUN0/vz57KgHAADgtrg852bGjBk6dOiQihUrplKlSilPnjwOy3ft2uW24gAAAFzlcrhp06ZNNpQBAADgHi6Hm5EjR7q1gJkzZ+qNN95QfHy8qlWrprfeekuRkZGZ9r9w4YKGDh2qFStW6Pz58ypVqpSmT5+u5s2bu7UuAABwd8rSHYovXLigefPmafDgwfa5N7t27XL5SzRjY2M1YMAAjRw5Urt27VK1atXUrFkznT17NsP+V69e1UMPPaSjR4/q448/1oEDBzR37lwVL148K8MAAAAW5PKRmx9++EFRUVEKDg7W0aNH1b17dxUoUEArVqzQsWPH9P777zu9ralTp6p79+6KiYmRJM2ePVurVq3S/PnzNWjQoHT958+fr/Pnz2vTpk3KnTu3JCk8PNzVIQAAAAtz+cjNgAED1KVLF/3vf/9zuJFf8+bNtX79eqe3c/XqVe3cuVNRUVF/FePlpaioKG3evDnDdT799FPVrVtXL7zwgkJDQ1W5cmWNGzfuppemJycnKykpyeEBAACsy+Vws337dvXo0SNde/HixRUfH+/0ds6dO6eUlBSFhoY6tIeGhma6nZ9//lkff/yxUlJS9N///lfDhw/XlClTNGbMmEz3M378eAUHB9sfYWFhTtcIAADuPi6HG19f3wyPfhw8eFCFChVyS1GZSU1NVeHChTVnzhzVrFlT7dq109ChQzV79uxM1xk8eLASExPtj+PHj2drjQAAwLNcDjetWrXS6NGjde3aNUmSzWbTsWPH9Oqrr+rJJ590ejshISHKlSuXzpw549B+5swZFSlSJMN1ihYtqvvuu0+5cuWyt91///2Kj4/X1atXM1zH19dXQUFBDg8AAGBdLoebKVOm6NKlSypcuLB+//13NW7cWGXLllXevHk1duxYp7fj4+OjmjVrau3atfa21NRUrV27VnXr1s1wnfr16+vQoUNKTU21tx08eFBFixaVj4+Pq0MBAAAW5PLVUsHBwVqzZo2+++47/fDDD7p06ZIeeOABh4nBzhowYIA6d+6sWrVqKTIyUtOnT9fly5ftV09FR0erePHiGj9+vCSpZ8+emjFjhvr166c+ffrof//7n8aNG6e+ffu6vG8AAGBNLoeb48ePKywsTA0aNFCDBg1ua+ft2rVTQkKCRowYofj4eFWvXl2rV6+2TzI+duyYvLz+OrgUFhamL7/8Ui+++KKqVq2q4sWLq1+/fnr11Vdvqw4AAGAdLoeb8PBwNWjQQM8884yeeuop5c+f/7YK6N27t3r37p3hsri4uHRtdevW1ZYtW25rnwAAwLpcnnOzY8cORUZGavTo0SpatKjatGmjjz/+WMnJydlRHwAAgEtcDjc1atTQG2+8oWPHjumLL75QoUKF9Pzzzys0NFTPPfdcdtQIAADgtCx9t5T05yXgTZo00dy5c/X111+rdOnSeu+999xZGwAAgMuyHG5OnDihSZMmqXr16oqMjFRgYKBmzpzpztoAAABc5vKE4nfeeUdLlizRxo0bVaFCBXXq1EmffPKJSpUqlR31AQAAuMTlcDNmzBh16NBB//73v1WtWrXsqAkAACDLXA43x44dk81my45aAAAAbpvL4cZms+nChQt69913tX//fklSxYoV1bVrVwUHB7u9QAAAAFdk6T43ZcqU0bRp03T+/HmdP39e06ZNU5kyZbRr167sqBEAAMBpLh+5efHFF9WqVSvNnTtX3t5/rn79+nV169ZN/fv31/r1691eJAAAgLNcDjc7duxwCDaS5O3trYEDB6pWrVpuLQ4AgIyED1rl6RKy5OiExzxdwj3B5dNSQUFBOnbsWLr248ePK2/evG4pCgAAIKtcDjft2rVT165dFRsbq+PHj+v48eNaunSpunXrpg4dOmRHjQAAAE5z+bTU5MmTZbPZFB0drevXr0uScufOrZ49e2rChAluLxAAAMAVLocbHx8fvfnmmxo/frwOHz4sSSpTpowCAgLcXhwAAICrXA43iYmJSklJUYECBVSlShV7+/nz5+Xt7a2goCC3FggAAOAKl+fctG/fXkuXLk3XvmzZMrVv394tRQEAAGSVy+Fm69atatKkSbr2f/7zn9q6datbigIAAMgql8NNcnKyfSLxja5du6bff//dLUUBAABklcvhJjIyUnPmzEnXPnv2bNWsWdMtRQEAAGSVyxOKx4wZo6ioKO3Zs0dNmzaVJK1du1bbt2/XV1995fYCAQAAXOHykZv69etr8+bNKlGihJYtW6bPPvtMZcuW1Q8//KCGDRtmR40AAABOc/nIjSRVr15dS5YscXctAAAAt83lIzeSdPjwYQ0bNkwdO3bU2bNnJUlffPGF/u///s+txQEAALjK5XDz7bffqkqVKtq6dauWL1+uS5cuSZL27NmjkSNHur1AAAAAV7gcbgYNGqQxY8ZozZo18vHxsbc/+OCD2rJli1uLAwAAcJXL4Wbv3r16/PHH07UXLlxY586dc0tRAAAAWeVyuMmXL59Onz6drn337t0qXry4W4oCAADIqix9t9Srr76q+Ph42Ww2paamauPGjXr55ZcVHR2dHTUCAAA4zeVwM27cOFWoUEFhYWG6dOmSKlasqEaNGqlevXoaOnRodtQIAADgNJfvc+Pj46O5c+dqxIgR2rt3ry5duqQaNWqoXLly2VEfAACAS7J0Ez9JCgsLU1hYmP35ihUr9Nprr+mHH35wS2EAAABZ4dJpqXfeeUdPPfWUOnbsqK1bt0qSvvnmG9WoUUPPPvus6tevny1FAgAAOMvpcDNhwgT16dNHR48e1aeffqoHH3xQ48aNU6dOndSuXTudOHFCs2bNys5aAQAAbsnp01ILFizQ3Llz1blzZ23YsEGNGzfWpk2bdOjQIeXJkyc7awQAAHCa00dujh07pgcffFCS1LBhQ+XOnVujRo0i2AAAgBzF6XCTnJwsPz8/+3MfHx8VKFAgW4oCAADIKpeulho+fLgCAgIkSVevXtWYMWMUHBzs0Gfq1Knuqw4AAMBFToebRo0a6cCBA/bn9erV088//+zQx2azua8yAACALHA63MTFxWVjGQAAAO7h8tcvAAAA5GSEGwAAYCmEGwAAYCmEGwAAYCluDTf79u1z5+YAAABcdtvh5uLFi5ozZ44iIyNVrVo1d9QEAACQZVkON+vXr1fnzp1VtGhRTZ48WQ8++KC2bNniztoAAABc5tIdiuPj47Vw4UK9++67SkpK0tNPP63k5GStXLlSFStWzK4aAQAAnOb0kZuWLVuqfPny+uGHHzR9+nSdOnVKb731VnbWBgAA4DKnj9x88cUX6tu3r3r27Kly5cplZ00AAABZ5vSRm++++04XL15UzZo1Vbt2bc2YMUPnzp3LztoAAABc5nS4qVOnjubOnavTp0+rR48eWrp0qYoVK6bU1FStWbNGFy9ezM46AQAAnOLy1VJ58uTRc889p++++0579+7VSy+9pAkTJqhw4cJq1apVdtQIAADgtNu6z0358uU1adIknThxQh9++KG7agIAAMgyly4Fz0yuXLnUqlUreXu7ZXMAAABZdttp5NChQ5o/f74WLlyohIQEXbt2zR11AQAAZEmWTkv9/vvvev/999WoUSOVL19emzZt0ogRI3TixAl31wcAAOASl47cbN++XfPmzdPSpUtVpkwZderUSZs2bdLbb7/NHYoBAECO4HS4qVq1qpKSktSxY0dt2rRJlSpVkiQNGjQo24oDAABwldOnpQ4cOKBGjRqpSZMmHKUBAAA5ltPh5ueff1b58uXVs2dPlShRQi+//LJ2794tm82WnfUBAAC4xOlwU7x4cQ0dOlSHDh3SBx98oPj4eNWvX1/Xr1/XwoULdfDgweysEwAAwClZulrqwQcf1KJFi3T69GnNmDFD33zzjSpUqKCqVau6uz4AAACX3NYdioODg9WrVy/t2LFDu3bt0j//+U83lQUAAJA1txVublS9enX9+9//dtfmAAAAssRt4QYAACAnINwAAABLIdwAAABLyRHhZubMmQoPD5efn59q166tbdu2ObXe0qVLZbPZ1KZNm+wtEAAA3DWc+voFVyYK9+3b16UCYmNjNWDAAM2ePVu1a9fW9OnT1axZMx04cECFCxfOdL2jR4/q5ZdfVsOGDV3aHwAAsDanws20adMcnickJOjKlSvKly+fJOnChQsKCAhQ4cKFXQ43U6dOVffu3RUTEyNJmj17tlatWqX58+dn+r1VKSkp6tSpk0aNGqUNGzbowoULLu0TAABYl1OnpY4cOWJ/jB07VtWrV9f+/ft1/vx5nT9/Xvv379cDDzyg119/3aWdX716VTt37lRUVNRfBXl5KSoqSps3b850vdGjR6tw4cLq2rWrS/sDAADW5/S3gqcZPny4Pv74Y5UvX97eVr58eU2bNk1PPfWUOnXq5PS2zp07p5SUFIWGhjq0h4aG6qeffspwne+++07vvvuuvv/+e6f2kZycrOTkZPvzpKQkp+sDAAB3H5cnFJ8+fVrXr19P156SkqIzZ864pajMXLx4Uc8++6zmzp2rkJAQp9YZP368goOD7Y+wsLBsrREAAHiWy+GmadOm6tGjh3bt2mVv27lzp3r27OlweskZISEhypUrV7pQdObMGRUpUiRd/8OHD+vo0aNq2bKlvL295e3trffff1+ffvqpvL29dfjw4XTrDB48WImJifbH8ePHXaoRAADcXVwON/Pnz1eRIkVUq1Yt+fr6ytfXV5GRkQoNDdW8efNc2paPj49q1qyptWvX2ttSU1O1du1a1a1bN13/ChUqaO/evfr+++/tj1atWqlJkyb6/vvvMzwq4+vrq6CgIIcHAACwLpfn3BQqVEj//e9/dfDgQfu8mAoVKui+++7LUgEDBgxQ586dVatWLUVGRmr69Om6fPmy/eqp6OhoFS9eXOPHj5efn58qV67ssH7aFVt/bwcAAPcml8NNmvDwcBljVKZMGXl7Z3kzateunRISEjRixAjFx8erevXqWr16tX2S8bFjx+TllSPuNQgAAO4CLqeSK1euqE+fPnrvvfckSQcPHlRERIT69Omj4sWLZ3pvmpvp3bu3evfuneGyuLi4m667cOFCl/cHAACsy+VDIoMHD9aePXsUFxcnPz8/e3tUVJRiY2PdWhwAAICrXD5ys3LlSsXGxqpOnTqy2Wz29kqVKmV4tRIAAMCd5PKRm4SEhAy/8+ny5csOYQcAAMATXA43tWrV0qpVq+zP0wLNvHnzMrx8GwAA4E5y+bTUuHHj9Oijj+rHH3/U9evX9eabb+rHH3/Upk2b9O2332ZHjQAAAE5z+chNgwYN9P333+v69euqUqWKvvrqKxUuXFibN29WzZo1s6NGAAAAp7l85Gbfvn2qXLmy5s6dm27ZypUr1aZNG3fUBQAAkCUuH7lp1qyZjhw5kq59+fLlLn0jOAAAQHZwOdx069ZNUVFRio+Pt7fFxsYqOjqaG+oBAACPc/m01KhRo3T+/HlFRUVp/fr1Wr16tbp166YPPvhATz75ZHbUCAAA4LQsfSnUW2+9pU6dOqlOnTo6efKkPvzwQ7Vu3drdtQEAALjMqXDz6aefpmt74okntGHDBnXo0EE2m83ep1WrVu6tEAAAwAVOhZubXQE1f/58zZ8/X9KfN/RLSUlxS2EAAABZ4VS4SU1Nze46AAAA3MLlq6UycuHCBXdsBgAA4La5HG4mTpyo2NhY+/O2bduqQIECKl68uPbs2ePW4gAAAFzlcriZPXu2wsLCJElr1qzR119/rdWrV+vRRx/VK6+84vYCAQAAXOHypeDx8fH2cPP555/r6aef1sMPP6zw8HDVrl3b7QUCAAC4wuUjN/nz59fx48clSatXr1ZUVJQkyRjDlVIAAMDjXD5y88QTT6hjx44qV66cfv31Vz366KOSpN27d6ts2bJuLxAAAMAVLoebadOmKTw8XMePH9ekSZMUGBgoSTp9+rR69erl9gIBAABc4XK4yZ07t15++eV07S+++KJbCgIAALgdWfpuKUn68ccfdezYMV29etWhna9fAAAAnuRyuPn555/1+OOPa+/evbLZbDLGSPrzqxckMakYAAB4lMtXS/Xr10+lS5fW2bNnFRAQoP/7v//T+vXrVatWLcXFxWVDiQAAAM5z+cjN5s2b9c033ygkJEReXl7y8vJSgwYNNH78ePXt21e7d+/OjjoBAACc4vKRm5SUFOXNm1eSFBISolOnTkmSSpUqpQMHDri3OgAAABe5fOSmcuXK2rNnj0qXLq3atWtr0qRJ8vHx0Zw5cxQREZEdNQIAADjN5XAzbNgwXb58WZI0evRotWjRQg0bNlTBggUdvlATAADAE1wON82aNbP/u2zZsvrpp590/vx55c+f337FFAAAgKdk+T43NypQoIA7NgMAAHDbnA43zz33nFP95s+fn+ViAAAAbpfT4WbhwoUqVaqUatSoYb9xHwAAQE7jdLjp2bOnPvzwQx05ckQxMTF65plnOB0FAAByHKfvczNz5kydPn1aAwcO1GeffaawsDA9/fTT+vLLLzmSAwAAcgyXbuLn6+urDh06aM2aNfrxxx9VqVIl9erVS+Hh4bp06VJ21QgAAOA0l+9QbF/Ry8v+xZl8WSYAAMgpXAo3ycnJ+vDDD/XQQw/pvvvu0969ezVjxgwdO3ZMgYGB2VUjAACA05yeUNyrVy8tXbpUYWFheu655/Thhx8qJCQkO2sDAABwmdPhZvbs2SpZsqQiIiL07bff6ttvv82w34oVK9xWHAAAgKucDjfR0dF8vQIAAMjxXLqJHwAAQE6X5aulAAAAciLCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsJQcEW5mzpyp8PBw+fn5qXbt2tq2bVumfefOnauGDRsqf/78yp8/v6Kiom7aHwAA3Fs8Hm5iY2M1YMAAjRw5Urt27VK1atXUrFkznT17NsP+cXFx6tChg9atW6fNmzcrLCxMDz/8sE6ePHmHKwcAADmRx8PN1KlT1b17d8XExKhixYqaPXu2AgICNH/+/Az7L168WL169VL16tVVoUIFzZs3T6mpqVq7du0drhwAAOREHg03V69e1c6dOxUVFWVv8/LyUlRUlDZv3uzUNq5cuaJr166pQIECGS5PTk5WUlKSwwMAAFiXR8PNuXPnlJKSotDQUIf20NBQxcfHO7WNV199VcWKFXMISDcaP368goOD7Y+wsLDbrhsAAORcHj8tdTsmTJigpUuX6j//+Y/8/Pwy7DN48GAlJibaH8ePH7/DVQIAgDvJ25M7DwkJUa5cuXTmzBmH9jNnzqhIkSI3XXfy5MmaMGGCvv76a1WtWjXTfr6+vvL19XVLvQAAIOfz6JEbHx8f1axZ02EycNrk4Lp162a63qRJk/T6669r9erVqlWr1p0oFQAA3CU8euRGkgYMGKDOnTurVq1aioyM1PTp03X58mXFxMRIkqKjo1W8eHGNHz9ekjRx4kSNGDFCS5YsUXh4uH1uTmBgoAIDAz02DgAAkDN4PNy0a9dOCQkJGjFihOLj41W9enWtXr3aPsn42LFj8vL66wDTrFmzdPXqVT311FMO2xk5cqRee+21O1k6AADIgTwebiSpd+/e6t27d4bL4uLiHJ4fPXo0+wsCAAB3rbv6aikAAIC/I9wAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLIdwAAABLyRHhZubMmQoPD5efn59q166tbdu23bT/Rx99pAoVKsjPz09VqlTRf//73ztUKQAAyOk8Hm5iY2M1YMAAjRw5Urt27VK1atXUrFkznT17NsP+mzZtUocOHdS1a1ft3r1bbdq0UZs2bbRv3747XDkAAMiJPB5upk6dqu7duysmJkYVK1bU7NmzFRAQoPnz52fY/80339QjjzyiV155Rffff79ef/11PfDAA5oxY8YdrhwAAOREHg03V69e1c6dOxUVFWVv8/LyUlRUlDZv3pzhOps3b3boL0nNmjXLtD8AALi3eHty5+fOnVNKSopCQ0Md2kNDQ/XTTz9luE58fHyG/ePj4zPsn5ycrOTkZPvzxMRESVJSUtLtlJ6p1OQr2bLd7Obq63EvjPNuHaN0b4yT92zGGGfOdi/8bkrZ8xmbtk1jzC37ejTc3Anjx4/XqFGj0rWHhYV5oJqcK3i6pyu4MxinddwLY5QYp9Uwztt38eJFBQcH37SPR8NNSEiIcuXKpTNnzji0nzlzRkWKFMlwnSJFirjUf/DgwRowYID9eWpqqs6fP6+CBQvKZrPd5gjunKSkJIWFhen48eMKCgrydDnZhnFax70wRolxWg3jzLmMMbp48aKKFSt2y74eDTc+Pj6qWbOm1q5dqzZt2kj6M3ysXbtWvXv3znCdunXrau3aterfv7+9bc2aNapbt26G/X19feXr6+vQli9fPneU7xFBQUF3zRvxdjBO67gXxigxTqthnDnTrY7YpPH4aakBAwaoc+fOqlWrliIjIzV9+nRdvnxZMTExkqTo6GgVL15c48ePlyT169dPjRs31pQpU/TYY49p6dKl2rFjh+bMmePJYQAAgBzC4+GmXbt2SkhI0IgRIxQfH6/q1atr9erV9knDx44dk5fXXxd11atXT0uWLNGwYcM0ZMgQlStXTitXrlTlypU9NQQAAJCDeDzcSFLv3r0zPQ0VFxeXrq1t27Zq27ZtNleVs/j6+mrkyJHpTrFZDeO0jnthjBLjtBrGaQ0248w1VQAAAHcJj9+hGAAAwJ0INwAAwFIINwAAwFIINwAAwFIINzlMly5d7Dc0/Ls5c+bon//8p4KCgmSz2XThwoU7Wps7ZTbO8+fPq0+fPipfvrz8/f1VsmRJ9e3b1/6dYHebm/08e/TooTJlysjf31+FChVS69atM/1OtZzsZmNMY4zRo48+KpvNppUrV96RutwpszHGxcXZfxf/+OMPdenSRVWqVJG3t/ctX5OcyJlxxsXFqXXr1ipatKjy5Mmj6tWra/HixXe+2NvgzDgPHDigJk2aKDQ0VH5+foqIiNCwYcN07dq1O19wFjgzxhsdOnRIefPmvatvcnsjws1d5MqVK3rkkUc0ZMgQT5eSbU6dOqVTp05p8uTJ2rdvnxYuXKjVq1era9euni7N7WrWrKkFCxZo//79+vLLL2WM0cMPP6yUlBRPl+Z206dPv6u+7iQrUlJS5O/vr759+yoqKsrT5WSbTZs2qWrVqlq+fLl++OEHxcTEKDo6Wp9//rmnS3Or3LlzKzo6Wl999ZUOHDig6dOna+7cuRo5cqSnS3O7a9euqUOHDmrYsKGnS3GbHHGfGzgn7SsnMrr3j1VUrlxZy5cvtz8vU6aMxo4dq2eeeUbXr1+Xt7d13rLPP/+8/d/h4eEaM2aMqlWrpqNHj6pMmTIerMy9vv/+e02ZMkU7duxQ0aJFPV1OtsmTJ49mzZolSdq4ceNdfWT1Zv7+n6t+/frpq6++0ooVK9SiRQsPVeV+ERERioiIsD8vVaqU4uLitGHDBg9WlT2GDRumChUqqGnTptq0aZOny3ELjtwgx0tMTFRQUJClgs3fXb58WQsWLFDp0qUt9Y31V65cUceOHTVz5sxMv9wWd7/ExEQVKFDA02Vkq0OHDmn16tVq3Lixp0txq2+++UYfffSRZs6c6elS3Mq6nxawhHPnzun11193OMphJW+//bYGDhyoy5cvq3z58lqzZo18fHw8XZbbvPjii6pXr55at27t6VJu2+eff67AwECHNiueQnR1nMuWLdP27dv1zjvvZHdpbuXsOOvVq6ddu3YpOTlZzz//vEaPHn2nSrxttxrjr7/+qi5dumjRokV31ZdnOoNwgxwrKSlJjz32mCpWrKjXXnvN0+Vki06dOumhhx7S6dOnNXnyZD399NPauHGj/Pz8PF3abfv000/1zTffaPfu3Z4uxS2aNGliP+2UZuvWrXrmmWc8VFH2cGWc69atU0xMjObOnatKlSrdqRLdwtlxxsbG6uLFi9qzZ49eeeUVTZ48WQMHDryTpWbZrcbYvXt3dezYUY0aNfJEedmKcIMc6eLFi3rkkUeUN29e/ec//1Hu3Lk9XVK2CA4OVnBwsMqVK6c6deoof/78+s9//qMOHTp4urTb9s033+jw4cPprr548skn1bBhw7tu7liePHlUtmxZh7YTJ054qJrs4+w4v/32W7Vs2VLTpk1TdHT0nSrPbZwdZ9pp4ooVKyolJUXPP/+8XnrpJeXKleuO1Hk7bjXGb775Rp9++qkmT54s6c+rGlNTU+Xt7a05c+boueeeu6P1uhPhBjlOUlKSmjVrJl9fX3366aeWOIrhDGOMjDFKTk72dCluMWjQIHXr1s2hrUqVKpo2bZpatmzpoargDnFxcWrRooUmTpxo2VPGGUlNTdW1a9eUmpp6V4SbW9m8ebPDaapPPvlEEydO1KZNm1S8eHEPVnb7CDc5UGJior7//nuHtoIFCyp37tyKj4/XoUOHJEl79+5V3rx5VbJkybtyMl9G48yfP7/atWunK1euaNGiRUpKSlJSUpIkqVChQnflH5SMxpmYmKhNmzbp4YcfVqFChXTixAlNmDBB/v7+at68uWcKvQ2ZvWcrV66crm/JkiVVunTpO1TZnfXjjz/q6tWrOn/+vC5evGh/TapXr+7Rutxp3bp1atGihfr166cnn3xS8fHxkiQfH5+78u9QZhYvXqzcuXOrSpUq8vX11Y4dOzR48GC1a9fOMkeS77//fofnO3bskJeXV4a/t3cbwk0OFBcXpxo1aji0de3aVSVKlNCoUaPsbWnnSRcsWKAuXbrcyRLdIqNxlilTRocPH5akdIdTjxw5ovDw8DtVnttkNM6YmBjFx8dr+vTp+u233xQaGqpGjRpp06ZNKly4sIcqzbrM3rPz5s3zUEWe0bx5c/3yyy/252mviTHGUyW53XvvvacrV65o/PjxGj9+vL29cePGd92pxpvx9vbWxIkTdfDgQRljVKpUKfXu3Vsvvviip0uDE2zGSr91AADgnsd9bgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgC4RZcuXdSmTRtPl2EXFxcnm81204eV7qgL4C98/QIAS6pXr55Onz5tf96vXz8lJSVpwYIF9jYrfRcSgL9w5AbAHfHtt98qMjJSvr6+Klq0qAYNGqTr16/blycnJ6tv374qXLiw/Pz81KBBA23fvt2+PO1IzKpVq1S1alX5+fmpTp062rdvX4b78/HxUZEiRewPf39/+fr6qkiRIjp48KDCwsJ0/vx5h3X69++vhg0bSpIWLlyofPnyaeXKlSpXrpz8/PzUrFkzHT9+3GGdTz75RA888ID8/PwUERGhUaNGOYwLwJ1HuAGQ7U6ePKnmzZvrH//4h/bs2aNZs2bp3Xff1ZgxY+x9Bg4cqOXLl+u9997Trl27VLZsWTVr1ixdAHnllVc0ZcoUbd++XYUKFVLLli117do1l+pp1KiRIiIi9MEHH9jbrl27psWLF+u5556zt125ckVjx47V+++/r40bN+rChQtq3769ffmGDRsUHR2tfv366ccff9Q777yjhQsXauzYsa6+RADcyQCAG3Tu3Nm0bt06w2VDhgwx5cuXN6mpqfa2mTNnmsDAQJOSkmIuXbpkcufObRYvXmxffvXqVVOsWDEzadIkY4wx69atM5LM0qVL7X1+/fVX4+/vb2JjY12ub+LEieb++++3P1++fLkJDAw0ly5dMsYYs2DBAiPJbNmyxd5n//79RpLZunWrMcaYpk2bmnHjxjns54MPPjBFixa9ZT0Asg9HbgBku/3796tu3bqy2Wz2tvr16+vSpUs6ceKEDh8+rGvXrql+/fr25blz51ZkZKT279/vsK26deva/12gQAGVL18+XR9ndOnSRYcOHdKWLVsk/Xka6umnn1aePHnsfby9vfWPf/zD/rxChQrKly+ffX979uzR6NGjFRgYaH90795dp0+f1pUrV1yuCYB7MKEYwD2pcOHCatmypRYsWKDSpUvriy++cPnqqUuXLmnUqFF64okn0i3z8/NzU6UAXEW4AZDt7r//fi1fvlzGGPvRm40bNypv3rwqUaKEChYsKB8fH23cuFGlSpWS9OccmO3bt6t///4O29qyZYtKliwpSfrtt9908OBB3X///Vmqq1u3burQoYNKlCihMmXKOBw5kqTr169rx44dioyMlCQdOHBAFy5csO/vgQce0IEDB1S2bNks7R9A9iDcAHCbxMREff/99w5tBQsWVK9evTR9+nT16dNHvXv31oEDBzRy5EgNGDBAXl5eypMnj3r27KlXXnlFBQoUUMmSJTVp0iRduXJFXbt2ddje6NGjVbBgQYWGhmro0KEKCQnJ8v11mjVrpqCgII0ZM0ajR49Otzx37tzq06eP/v3vf8vb21u9e/dWnTp17GFnxIgRatGihUqWLKmnnnpKXl5e2rNnj/bt2+cwWRrAHebpST8ArKFz585GUrpH165djTHGxMXFmX/84x/Gx8fHFClSxLz66qvm2rVr9vV///1306dPHxMSEmJ8fX1N/fr1zbZt2+zL0yYUf/bZZ6ZSpUrGx8fHREZGmj179jhdX0YTnocPH25y5cplTp065dC+YMECExwcbJYvX24iIiKMr6+viYqKMr/88otDv9WrV5t69eoZf39/ExQUZCIjI82cOXOcfdkAZAObMcZ4MlwBgDPi4uLUpEkT/fbbb8qXL5/bttu1a1clJCTo008/dWhfuHCh+vfvrwsXLrhtXwDuDE5LAbgnJSYmau/evVqyZEm6YAPg7ka4AXBPat26tbZt26Z//etfeuihhzxdDgA34rQUAACwFG7iBwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALOX/AQtHB+THDcQ1AAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3835/3835 [05:09<00:00, 12.40it/s]\n"
     ]
    }
   ],
   "source": [
    "# separate by loop type\n",
    "# also becareful about masked aas, they need to be discarded \n",
    "model.to('cpu')\n",
    "\n",
    "# repeat but with no sequence information\n",
    "loss_fn = torch.nn.MSELoss()\n",
    "gt_aa_loop_types = defaultdict(list)\n",
    "pred_aa_loop_types = defaultdict(list)\n",
    "gt_aa_list = []\n",
    "pred_aa_list = []\n",
    "all_quantized_indices_dihedral_only = []\n",
    "with torch.no_grad():\n",
    "    for batch in tqdm(dataloader, total=len(dataloader)):\n",
    "        # mask out sequence information\n",
    "        batch_mask = torch.zeros_like(batch['sequence'], dtype=torch.bool)\n",
    "        for tok in dataset.alphabet.standard_toks:\n",
    "            tok_idx = dataset.alphabet.get_idx(tok)\n",
    "            batch_mask |= (batch['sequence'] == tok_idx)\n",
    "        batch['sequence'][batch_mask] = dataset.alphabet.mask_idx\n",
    "        output = model(batch, val=True)\n",
    "\n",
    "        # get masked aa recovery by loop type\n",
    "        curr_idx = 0\n",
    "        for i in range(batch_mask.shape[0]):\n",
    "            loop_type = batch['id'][i].split('_')[1]\n",
    "            num_masked = batch_mask[i].sum().item()\n",
    "            gt_aa_loop_types[loop_type].append(output.true_aa[curr_idx:curr_idx + num_masked].numpy())\n",
    "            pred_aa_loop_types[loop_type].append(output.pred_aa[curr_idx:curr_idx + num_masked].argmax(dim=-1).numpy())\n",
    "            curr_idx += num_masked\n",
    "\n",
    "        gt_aa_list.append(output.true_aa)\n",
    "        pred_aa = output.pred_aa.argmax(dim=-1).numpy()\n",
    "        pred_aa_list.append(pred_aa)\n",
    "        all_quantized_indices_dihedral_only.append(output.quantized_indices)\n",
    "all_quantized_indices_dihedral_only = torch.cat(all_quantized_indices_dihedral_only, dim=0)\n",
    "\n",
    "masked_aa_recovery = np.mean(np.concatenate(pred_aa_list) == np.concatenate(gt_aa_list))\n",
    "print(f\"Masked AA Recovery: {masked_aa_recovery:.4g}\")\n",
    "\n",
    "x = ['L1', 'L2', 'L3', 'L4', 'H1', 'H2', 'H3', 'H4']\n",
    "y = []\n",
    "for loop_type in x:\n",
    "    if loop_type not in gt_aa_loop_types:\n",
    "        y.append(0.0)\n",
    "        print(f\"Loop type {loop_type} not found in dataset, skipping.\")\n",
    "        continue\n",
    "    gt_aa_loop_types[loop_type] = np.concatenate(gt_aa_loop_types[loop_type])\n",
    "    pred_aa_loop_types[loop_type] = np.concatenate(pred_aa_loop_types[loop_type])\n",
    "    masked_aa_recovery = np.mean(pred_aa_loop_types[loop_type] == gt_aa_loop_types[loop_type])\n",
    "    y.append(masked_aa_recovery)\n",
    "    print(f\"Masked AA Recovery for loop type {loop_type}: {masked_aa_recovery:.4g}\")\n",
    "\n",
    "plt.bar(x, y)\n",
    "plt.xlabel(\"Loop Type\")\n",
    "plt.ylabel(\"Masked AA Recovery\")\n",
    "plt.title(\"Masked AA Recovery by Loop Type\")\n",
    "plt.show()\n",
    "\n",
    "# repeat but with no dihedral information\n",
    "loss_fn = torch.nn.MSELoss()\n",
    "all_quantized_indices_sequence_only = []\n",
    "with torch.no_grad():\n",
    "    for batch in tqdm(dataloader, total=len(dataloader)):\n",
    "        # mask out dihedral information\n",
    "        batch_mask = torch.zeros_like(batch['sequence'], dtype=torch.bool)\n",
    "        for tok in dataset.alphabet.standard_toks:\n",
    "            tok_idx = dataset.alphabet.get_idx(tok)\n",
    "            batch_mask |= (batch['sequence'] == tok_idx)\n",
    "        batch['angles_mask'] = batch_mask # True = masked out\n",
    "        output = model(batch, val=True)\n",
    "        all_quantized_indices_sequence_only.append(output.quantized_indices)\n",
    "all_quantized_indices_sequence_only = torch.cat(all_quantized_indices_sequence_only, dim=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e2f87924",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Codebook size: 8192 num used: 1305\n"
     ]
    }
   ],
   "source": [
    "unique, counts = np.unique(all_quantized_indices.numpy(), return_counts=True)\n",
    "print(\"Codebook size:\", model.codebook_size, \"num used:\", len(unique))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c1d3ab9b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of canonical clusters: 180\n",
      "Number of VQVAE clusters: 1305\n"
     ]
    }
   ],
   "source": [
    "ground_truth = [loop_to_canonical[x['loop_id']] for x in dataset.data]\n",
    "ground_truth_strict = [loop_to_canonical_strict[x['loop_id']] for x in dataset.data]\n",
    "ground_truth_ssc_comparison = [loop_to_canonical_ssc_comparison[x['loop_id']] for x in dataset.data]\n",
    "clusters_to_indices = {}\n",
    "for i, cluster in enumerate(set(ground_truth)):\n",
    "    clusters_to_indices[cluster] = i\n",
    "\n",
    "results_df = pd.DataFrame({\n",
    "    'loop_id': [x['loop_id'] for x in dataset.data],\n",
    "    'loop_type': [x['loop_id'].split('_')[1] for x in dataset.data],\n",
    "    'loop_length': [len(x['loop_sequence']) for x in dataset.data],\n",
    "    'canonical_cluster': ground_truth,\n",
    "    'canonical_cluster_strict': ground_truth_strict,\n",
    "    'canonical_cluster_ssc_comparison': ground_truth_ssc_comparison,\n",
    "    'quantized_index': all_quantized_indices.tolist(),\n",
    "    'quantized_index_dihedral_only': all_quantized_indices_dihedral_only.tolist(),\n",
    "    'quantized_index_sequence_only': all_quantized_indices_sequence_only.tolist(),\n",
    "})\n",
    "\n",
    "print(\"Number of canonical clusters:\", len(set(ground_truth)))\n",
    "print(\"Number of VQVAE clusters:\", len(set(all_quantized_indices.numpy())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf95af58",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "H1 top 1 proportion: index=2253 0.1034, top 20 proportion: 0.8361\n",
      "H2 top 1 proportion: index=4513 0.0412, top 20 proportion: 0.3843\n",
      "H3 top 1 proportion: index=2416 0.0212, top 20 proportion: 0.1460\n",
      "H4 top 1 proportion: index=3921 0.1004, top 20 proportion: 0.9297\n",
      "L1 top 1 proportion: index=762 0.1460, top 20 proportion: 0.6636\n",
      "L2 top 1 proportion: index=881 0.3153, top 20 proportion: 0.9174\n",
      "L3 top 1 proportion: index=3580 0.1721, top 20 proportion: 0.6375\n",
      "L4 top 1 proportion: index=6849 0.1362, top 20 proportion: 0.8529\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAB8QAAAEiCAYAAACP92jDAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAATX9JREFUeJzt3XmYlfV5P/73gDIDAooKyBQMWkWISEg0kBjrvicx5lv4mTYG8YtBIxo3opEGrY1xCag1ilLzpYiotYaY1JpFrQuN0UBtJca4ViUQwYCyKYFRmfn9YThhHGY5OOuZ18vrXNfM59zPee6Z45uZOfd5nqespqamJgAAAAAAAABQYrq0dQMAAAAAAAAA0BIMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiNLtHH300ZWVlKSsry9///d83aZvx48cXtlm8eHGd+5cuXZof/vCH+eY3v5nDDz88O+64Y9H7AIrT3Fmurq7Of/7nf2bKlCk59NBDs9tuu6Vbt27p3bt3hg8fnjPPPDNPP/10838h0Mk1d5ZXrFiROXPm5KyzzsqBBx6YPfbYI7169Up5eXkGDBiQY445JjfffHPWr1/f/F8MdGIt8Tt2fU466aTCdsVuCzTdtuR6s/feey9PPfVU/umf/imnnXZaRowYke22205uoQ18mCyvXbs2d9xxR0499dR87GMfy4477pjtt98+ffv2zWGHHZZrrrkma9asaZG+gT/7MDl+8sknc8011+RLX/pSRowYkQEDBqS8vDy9evXKPvvsk1NOOSWPPPJIyzQO1PJhslyf5cuXp0+fPoXHPfTQQ5vlcWl927V1A9CY3/3udxk8eHBbtwF8SIMHD87SpUvrrL/77rv57W9/m9/+9rf5p3/6p0yePDlXXXVVysrK2qBLoDH33ntvvvrVr271vtdffz2vv/56HnjggVx99dX54Q9/mP3337+VOwQ+jPvuuy933313W7cBNOI73/mON4dDB/ezn/0sX/ziF1NVVVXnvjfeeCOPPvpoHn300UyfPj133nlnDjvssDboEmjMueeem1/+8pd11t955528+OKLefHFF3Pbbbdl7Nixue2221JRUdEGXQLb6uyzz/bmtBJhIE67V1NTU/i4rKwsf/mXf5nKysr853/+Zxt2BRRr2bJlSZK99torf/3Xf53PfOYzqayszIYNG/LII4/kuuuuy+rVq/Pd7343Xbt2zRVXXNHGHQNbU1ZWlr333juHHnpoPv7xj+cv/uIvMmDAgGzcuDG/+93vcvvtt+f+++/P7373uxx11FF55plnUllZ2dZtA03w9ttvZ9KkSUmSfv36ZcWKFW3cEVCfLf9OrqioyMiRI7Ny5cq8/PLLbdgVUIw333wzVVVV6dKlS4466qgce+yx+djHPpaddtopv//973PHHXfkX//1X/P666/nc5/7XH75y19m5MiRbd028AHl5eU55JBDcuCBB2bYsGEZMGBAdt5556xcuTK//vWvM3PmzLz66qv5wQ9+kC5duuSuu+5q65aBJvr3f//3/PCHP/T3cYkwEKfd69WrVy6//PKMGjUqBxxwQPr06ZNHH33UO2Ohgxk1alQuvfTSHH300XWO/j7ooIPyt3/7t/n0pz+dlStXZtq0aTnttNOy5557tlG3QH1OOeWUTJgwYav3/dVf/VVOPvnk/OM//mPOO++8rF69OtOnT8+1117byl0C2+Jb3/pWlixZkiOOOCIDBw7MnDlz2roloB6f/vSnM3PmzIwaNSr77bdftttuu4wfP95AHDqQ7bffPqeffnqmTJmS3XffvdZ9H//4x/P5z38+n/nMZ/L1r389f/zjH3P++efn4YcfbqNugfrcf//92W67rY9ZjjnmmJx99tk5/PDD86tf/Sr/+q//milTpmTEiBGt3CVQrC3fMD59+vSMGzeujTviw3INcdq9XXbZJX/3d3+Xo446Kn369GnrdoBt9Pjjj+eYY46p91Tof/mXf5lLLrkkyfvXRPzxj3/cit0BTVXfH/pbOuuss9KzZ88kyS9+8YuWbgloBk8++WRuuOGGlJeX56abbmrrdoBGHHPMMTn99NPz8Y9/vEk/m4H256STTsrMmTPrDMO3dPbZZ+eAAw5IksyfPz9vvPFGa7UHNFFjP4e7d++ec845p/C5v5GhY5gyZUqWLl2aww47LF/5ylfauh2agYE4AO3Glmd+cHQLdFzbbbdd4bpoGzdubONugMa89957+epXv5rq6up885vfzJAhQ9q6JQDgTw499NAkSXV1dV599dW2bQbYJr169Sp87G9kaP8WLlyYGTNmpFu3brn55pvbuh2aiYE4AO1GVVVV4eOuXbu2YSfAh/HQQw8Vjl4ZOnRoG3cDNObaa6/NokWLsvfee+fiiy9u63YAgC34Oxk6vi2vG+5vZGjftnzD+EUXXZR99tmnrVuimTivFgDtxvz58wsfDxs2rA07AYr11ltvZenSpbn77rtrXTN8y1PDAe3Pq6++mssuuyxJctNNN6W8vLyNOwIAtrT57+Ttt98+e+21Vxt3AzRFdXV1Vq5cmd/+9re54YYbCpcFHDp0aI455pi2bQ5o0PTp0/P0009nr732ypQpU9q6HZqRgTgtasWKFXnmmWcarVuzZk3LNwNss9bI8h//+Mf84z/+Y5KkvLw8X/jCF7b5sYCta+4s//3f/31hkPZBXbt2zfXXX5+DDjqomBaBJmjOLJ9xxhn54x//mL/5m7/JkUce2QzdAQDN5Sc/+UmefvrpJMkxxxyT3r17t3FHQEMGDx6c3/3ud1u9b88998w999zT6DXHgbbz8ssv5x/+4R+SJDNmzChcDpDS4F9fWtTNN9/sGgtQAlojyxdddFGWLFmSJJk0aVIqKytbdH/QGbXWz+UjjjgiN954o1PBQQtprizffvvteeCBB7Ljjjvmuuuua4bOAIDmsmrVqkyaNCnJ+2823fwCPdCxbLfddvn7v//7fP3rX691LXGg/TnjjDOyYcOGnHTSSTn66KPbuh2amYE4AG3ujjvuyI033pjk/VOlX3755W3cEdAUZ555ZsaMGZMkWb9+fZ577rncdttteeihh3LSSSfllltuyejRo9u4S2Br3nzzzZx//vlJkiuuuCL9+/dv444AgM02bdqUL3/5y4UjTb/1rW/l4x//eBt3BTTmgQceyDvvvJPq6uq8+eab+eUvf5mbb745//AP/5AXXnghN910U3r27NnWbQJbcdttt+U//uM/0rt3b28YL1Fd2roBStull16ampqaRm+nnHJKW7cKNKAls/zoo49mwoQJSZKdd945P/zhD9O9e/fm/hKANH+W+/Xrl+HDh2f48OEZPXp0xo8fn4cffjiXX355nn766Rx66KF54IEHWvirgs6nObJ8wQUXZOXKlRk1alTOOOOMVuweAGjMmWeemZ///OdJks997nOZOnVqG3cENMWQIUMyfPjwjBgxIocddli+9a1v5ZlnnsnHPvaxzJ07N5/5zGfy9ttvt3WbwAe88cYbueCCC5Ik3/nOdzJgwIA27oiWYCAOQJt58sknc8IJJ6Sqqio9e/bMT3/60wwbNqyt2wI+pL/7u7/LqFGjsnHjxnz1q1/Ne++919YtAVt4+OGHM2fOnHTt2jUzZ85Mly7+LASA9uLiiy/OLbfckiT5q7/6q9x9993p2rVrG3cFbKs+ffpkzpw5SZKnn346V1xxRRt3BHzQ+eefnzfeeCMHHHBAzjzzzLZuhxbilOkAtInf/va3OfbYY/PWW2+lvLw8P/7xj51aGUrIF77whSxcuDBLlizJwoULc+CBB7Z1S8CfXH311UmSAw44IC+88EJeeOGFOjWvvvpq4eN///d/T9++fZMkX/rSl1qnSQDohK6++upcddVVSZJPfOITue+++5xBDUrAsGHDsvfee+ell17KvHnzDMWhHVm2bFnmzp2bJDn88MNz9913N1i/YsWK3HXXXUmSPfbYw+vZHYiBOACt7uWXX85RRx2VN998M9ttt13+9V//NUcccURbtwU0o83DsyT53e9+ZyAO7UhVVVWSZMGCBfmbv/mbRuu//vWvFz42EAeAlnHTTTflm9/8ZpL3h2f3339/evfu3cZdAc2lb9++eemll/K73/2urVsBtvDOO+8UPv7ud7/baP1zzz1X+Dv6lFNOMRDvQJwbD4BW9fvf/z5HHnlkli9fni5dumTOnDn5whe+0NZtAc3stddeK3zcs2fPNuwEAADat7lz5+ass85Kkuy55575j//4j+y6665t3BXQnDb/jezvY4C2YSAOQKtZsWJFjjzyyCxevDhJMnPmzPzt3/5t2zYFNLvq6ur88Ic/LHy+3377tWE3wAc9+uijqampafB2yimnFOpfffXVwjoA0LzuueeenHrqqampqcnAgQPz0EMPpbKysq3bAprRf/3XfxWODPf3MbQvgwcPbvTv4y3/Fj7kkEMKa7feemvbNU7RDMQBaBVr1qzJMcccU7hO6XXXXZevfvWrbdwVUKzvf//72bRpU733V1dX54ILLsgzzzyTJPmrv/qrDB48uJW6AwCAjuOBBx7I3/zN32TTpk3p169f/uM//sPvztCBLFy4MP/zP//TYM1rr71W682m48aNa+m2ANgK1xCnQ/j5z3+e119/vfD5888/X/h40aJFtd6J07Nnz4wZM6Y12wMaUVVVlc9+9rNZtGhRkuTLX/5yjjzyyMLAbGt22GGH7LHHHq3UIdBUEydOzGWXXZYxY8bkU5/6VD7ykY+kR48eWb16dZ566qnceuutefrpp5MkvXv3zowZM9q4YwBo/z74d219Dj/88Oy+++55++23M2/evFr3/e///m/h43nz5tU63fLIkSMzcuTI5moXqEcxWV62bFm++MUv5p133sn222+f6667Lu+++26DfycPHDgwO+20U/M1DNRRTI6fffbZnHrqqTnwwAPz+c9/PiNHjkzfvn2TvD8If+SRRzJ79uysXbs2SXLkkUdm/PjxLdg9sFmxv19T+gzE6RCuuuqqzJ8/f6v3/du//Vv+7d/+rfD5Rz7yEQNxaGeWL1+exx9/vPD5HXfckTvuuKPBbQ455JA8+uijLdwZsC1ee+21XH/99bn++uvrrRk2bFhuv/12p4MDgCb44N+19fnRj36U3XffPW+88UZOPfXUeuu+8Y1v1Pr80ksvNRCHVlBMlhctWpQ//vGPSZJ33303X/7ylxvdbvbs2YZp0MKKyfFmjz/+eK3XvbZm/PjxmTFjRrp0cdJeaA3F/n5N6TMQBwCgyf77v/87P/vZz/KrX/0qr776av7whz9kzZo16dGjRyorK/OJT3wiX/ziF/OFL3wh22+/fVu3CwAAAC3ipJNOSp8+ffLwww/nf/7nf7Js2bL84Q9/yLvvvpsdd9wxe+21Vz7zmc/kK1/5SkaMGNHW7QJ0amU1W14NHgAAAAAAAABKhPNzAAAAAAAAAFCSih6IV1VV5aKLLkplZWW6d++e0aNH58EHH2x0ux/96Ec55phjUllZmfLy8gwcODBjxozJM888U6d28ODBKSsrq3M744wzim0XAAAAAAAAgE6q6GuIjx8/PvPmzcu5556bvffeO7feemuOP/74PPLIIznooIPq3e43v/lN+vTpk3POOSe77rprXn/99fzzP/9zRo0alSeeeCIf+9jHatWPHDkyF1xwQa21IUOGFNsuAAAAAAAAAJ1UUdcQX7hwYUaPHp1p06Zl8uTJSZKNGzdm+PDh6devXx5//PGidv6HP/whAwcOzIQJEzJz5szC+uDBgzN8+PDcd999RT0eAAAAAAAAAGxW1CnT582bl65du2bixImFtYqKikyYMCFPPPFEli5dWtTO+/Xrlx49emTNmjVbvf+dd97J+vXri3pMAAAAAAAAAEiKHIg/9dRTGTJkSHr37l1rfdSoUUmSRYsWNfoYa9asycqVK/Ob3/wmp512WtatW5cjjjiiTt3DDz+cHj16pGfPnhk8eHCuv/76YloFAAAAAAAAoJMr6hriy5cvz4ABA+qsb15btmxZo4/xqU99Ki+88EKSpGfPnvnWt76VCRMm1KoZMWJEDjrooOyzzz558803c+utt+bcc8/NsmXLcvXVVzf4+FVVVamqqip8XlNTk3feeSe77rprysrKGu0PaB9kGTo+OYbSIMtQGmQZSoMsQ2mQZSgNsgwdR1ED8Q0bNqS8vLzOekVFReH+xsyePTvr1q3LK6+8ktmzZ2fDhg3ZtGlTunT588Hq9957b61tTj311Bx33HG59tprc/bZZ2fgwIH1Pv6VV16Zyy67rM76iy++mF69ejXaH7Btdtttt2Z9PFmGttGcWZZjaDuyDKVBlqE0yDKUBlmGjs9r2FAatiXLZTU1NTVNLR4+fHj69++fhx56qNb6s88+m3333TczZ87M6aef3uSdr169OsOGDcvJJ5+c6dOnN1h7//3359hjj83cuXNz8skn11v3wXfkrFu3LoMGDcrq1avrnOodaD5bvqmlOcgytI3mzLIcQ9uRZSgNsgylQZahNMgydHxew4bSsC1ZLuoI8QEDBuS1116rs758+fIkSWVlZVE779OnTw4//PDccccdjQ7EBw0alCRZtWpVg3Xl5eVbPYq9S5cuzf6PHdByZBk6PjmG0iDLUBpkGUqDLENpkGUoDbIMHUdRiRw5cmRefPHFrFu3rtb6ggULCvcXa8OGDVm7dm2jda+88kqSpG/fvkXvAwAAAAAAAIDOp6iB+JgxY7Jp06bccssthbWqqqrMnj07o0ePLhzFvWTJkjz//PO1tl2xYkWdx1u8eHEeeuihHHDAAYW1VatWZdOmTbXq3n333Vx11VXp1q1bDjvssGJaBgAAAAAAAKCTKuqU6aNHj87YsWNz8cUXZ8WKFdlrr70yZ86cLF68OLNmzSrUjRs3LvPnz8+Wlyffb7/9csQRR2TkyJHp06dPXnrppcyaNasw7N7s3nvvzeWXX54xY8Zkjz32yKpVq3LnnXfmmWeeyRVXXLFNF0oHAAAAAAAAoPMpaiCeJLfddlumTp2auXPnZvXq1RkxYkTuu+++HHzwwQ1u97WvfS0/+clP8vOf/zxvvfVW+vXrl6OPPjpTpkzJfvvtV6jbb7/98tGPfjS33357Vq5cmW7dumXkyJG5++67M3bs2OK/QgAAAAAAAAA6pbKaLQ/jLkHr1q3LjjvumLVr16Z3795t3Q6wjWQZOj45htIgy1AaZBlKgyxDaZBlKA2yDO1XUdcQBwAAAAAAAICOwkAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSdu1dQMAAAAAwLb73gUvpKJbzwZrJs8Y1krdAABA++IIcQAAAAAAAABKkoE4AAAAAAAAACWp6IF4VVVVLrroolRWVqZ79+4ZPXp0HnzwwUa3+9GPfpRjjjkmlZWVKS8vz8CBAzNmzJg888wzW62/995784lPfCIVFRXZfffdc+mll+a9994rtl0AAAAAAAAAOqmiB+Ljx4/Ptddemy9/+cu5/vrr07Vr1xx//PF57LHHGtzuN7/5Tfr06ZNzzjknN910U772ta/lqaeeyqhRo/LrX/+6Vu3PfvaznHjiidlpp51yww035MQTT8zll1+es88+u9h2AQAAAAAAAOiktiumeOHChbnrrrsybdq0TJ48OUkybty4DB8+PBdeeGEef/zxere95JJL6qyddtppGThwYG6++ebMnDmzsD558uSMGDEiDzzwQLbb7v0We/funSuuuCLnnHNOhg4dWkzbAAAAAAAAAHRCRR0hPm/evHTt2jUTJ04srFVUVGTChAl54oknsnTp0qJ23q9fv/To0SNr1qwprD377LN59tlnM3HixMIwPEnOPPPM1NTUZN68eUXtAwAAAAAAAIDOqaiB+FNPPZUhQ4akd+/etdZHjRqVJFm0aFGjj7FmzZqsXLkyv/nNb3Laaadl3bp1OeKII2rtI0kOOOCAWttVVlZm4MCBhfsBAAAAAAAAoCFFnTJ9+fLlGTBgQJ31zWvLli1r9DE+9alP5YUXXkiS9OzZM9/61rcyYcKEWvvY8jE/uJ/G9lFVVZWqqqrC5+vWrUuSVFdXp7q6utH+gG3TpUtR769plCxD22jOLMsxtB1ZhtIgy1AaWiPLKat5/9YAWYcPx89l6Pi8hg2lYVuyXNRAfMOGDSkvL6+zXlFRUbi/MbNnz866devyyiuvZPbs2dmwYUM2bdpUaH7zY9S3n8Iv+vW48sorc9lll9VZX7lyZTZu3Nhof8C22W233Zr18WQZ2kZzZlmOoe3IMpQGWYbS0BpZ7rHL+nQvL2tw2xUrVjRbH9AZ+bkMHZ/XsKE0bEuWy2pqahp+++gWhg8fnv79++ehhx6qtf7ss89m3333zcyZM3P66ac3eeerV6/OsGHDcvLJJ2f69OlJkunTp+cb3/hGlixZkkGDBtWqHzVqVLp27Zonnnii3sfc2jtyBg0alNWrV9c51TvQfFrj3XWyDC2vpd/xLsfQOmQZSoMsQ2lojSx/+6sLUtGtZ4Pbnv+9oc3WB3RGfi5Dx+c1bCgNLX6E+IABA/Laa6/VWd98mvPKysqidt6nT58cfvjhueOOOwoD8c2nSl++fHmdgfjy5csL1yuvT3l5+VaPLu/SpUuz/2MHtBxZho5PjqE0yDKUBlmG0lBfllNT9v6tAbIO7Yefy1AaZBk6jqISOXLkyLz44ot1Tlu+YMGCwv3F2rBhQ9auXVtrH0ny5JNP1qpbtmxZfv/732/TPgAAAAAAAADofIoaiI8ZMyabNm3KLbfcUlirqqrK7NmzM3r06MIR3UuWLMnzzz9fa9utXado8eLFeeihh3LAAQcU1vbdd98MHTo0t9xySzZt2lRYv/nmm1NWVpYxY8YU0zIAAAAAAAAAnVRRp0wfPXp0xo4dm4svvjgrVqzIXnvtlTlz5mTx4sWZNWtWoW7cuHGZP39+trw8+X777ZcjjjgiI0eOTJ8+ffLSSy9l1qxZeffdd3PVVVfV2s+0adNywgkn5Oijj86XvvSlPPPMM7nxxhtz2mmnZdiwYR/ySwYAAAAAAACgMyhqIJ4kt912W6ZOnZq5c+dm9erVGTFiRO67774cfPDBDW73ta99LT/5yU/y85//PG+99Vb69euXo48+OlOmTMl+++1Xq/Zzn/tc7rnnnlx22WU5++yz07dv30yZMiWXXHJJse0CAAAAAAAA0EmV1Wx5GHcJWrduXXbcccesXbs2vXv3but2gG0ky9DxyTGUBlmG0iDLUBo2Z/nbpy1MRbeeDdZOnuGsi9Be+bkMpUGWof0q6hriAAAAAAAAANBRGIgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkFT0Qr6qqykUXXZTKysp07949o0ePzoMPPtjodvfcc09OOumk7LnnnunRo0f22WefXHDBBVmzZk2d2sGDB6esrKzO7Ywzzii2XQAAAAAAAAA6qe2K3WD8+PGZN29ezj333Oy999659dZbc/zxx+eRRx7JQQcdVO92EydOTGVlZU4++eTsvvvu+c1vfpMbb7wxP/3pT/M///M/6d69e636kSNH5oILLqi1NmTIkGLbBQAAAAAAAKCTKmogvnDhwtx1112ZNm1aJk+enCQZN25chg8fngsvvDCPP/54vdvOmzcvhx56aK21/fffP6ecckruuOOOnHbaabXu+4u/+IucfPLJxbQHAAAAAAAAAAVFnTJ93rx56dq1ayZOnFhYq6ioyIQJE/LEE09k6dKl9W77wWF4knzxi19Mkjz33HNb3eadd97J+vXri2kRAAAAAAAAAJIUORB/6qmnMmTIkPTu3bvW+qhRo5IkixYtKmrnr7/+epJk1113rXPfww8/nB49eqRnz54ZPHhwrr/++qIeGwAAAAAAAIDOrahTpi9fvjwDBgyos755bdmyZUXt/Oqrr07Xrl0zZsyYWusjRozIQQcdlH322Sdvvvlmbr311px77rlZtmxZrr766gYfs6qqKlVVVYXP161blySprq5OdXV1Uf0BTdelS1Hvr2mULEPbaM4syzG0HVmG0iDLUBpaI8spq3n/1oDNWb/26883up/zvzd025uEEuXnMnR8XsOG0rAtWS5qIL5hw4aUl5fXWa+oqCjc31R33nlnZs2alQsvvDB77713rfvuvffeWp+feuqpOe6443Lttdfm7LPPzsCBA+t93CuvvDKXXXZZnfWVK1dm48aNTe4PKM5uu+3WrI8ny9A2mjPLcgxtR5ahNMgylIbWyHKPXdane3lZg9uuWLEiSbLDro1fnnBzLfBnfi5Dx+c1bCgN25LlspqamobfPrqF4cOHp3///nnooYdqrT/77LPZd999M3PmzJx++umNPs4vfvGLHH300TnkkENy3333ZbvtGp/L33///Tn22GMzd+7cnHzyyfXWbe0dOYMGDcrq1avrnOodaD6t8e46WYaW19LveJdjaB2yDKVBlqE0tEaWv/3VBano1rPBbTcf9e0Icdg2fi5Dx+c1bCgNLX6E+IABA/Laa6/VWV++fHmSpLKystHH+PWvf50TTjghw4cPz7x585o0DE+SQYMGJUlWrVrVYF15eflWj2Lv0qVLs/9jB7QcWYaOT46hNMgylAZZhtJQX5ZTU/b+rQGFrDdSV6sWaBF+LkNpkGXoOIpK5MiRI/Piiy/++fpEf7JgwYLC/Q15+eWXc+yxx6Zfv3756U9/mp49G37n6pZeeeWVJEnfvn2LaRkAAAAAAACATqqogfiYMWOyadOm3HLLLYW1qqqqzJ49O6NHjy4cxb1kyZI8/3zt0y+9/vrrOfroo9OlS5fcf//99Q62V61alU2bNtVae/fdd3PVVVelW7duOeyww4ppGQAAAAAAAIBOqqhTpo8ePTpjx47NxRdfnBUrVmSvvfbKnDlzsnjx4syaNatQN27cuMyfPz9bXp782GOPzSuvvJILL7wwjz32WB577LHCff37989RRx2VJLn33ntz+eWXZ8yYMdljjz2yatWq3HnnnXnmmWdyxRVXbNOF0gEAAAAAAADofIoaiCfJbbfdlqlTp2bu3LlZvXp1RowYkfvuuy8HH3xwg9v9+te/TpJ897vfrXPfIYccUhiI77fffvnoRz+a22+/PStXrky3bt0ycuTI3H333Rk7dmyx7QIAAAAAAADQSRU9EK+oqMi0adMybdq0emseffTROmtbHi3ekP333z/33ntvsW0BAAAAAAAAQC1FXUMcAAAAAAAAADoKA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiAAAAAAAAAJSkogfiVVVVueiii1JZWZnu3btn9OjRefDBBxvd7p577slJJ52UPffcMz169Mg+++yTCy64IGvWrNlq/b333ptPfOITqaioyO67755LL7007733XrHtAgAAAAAAANBJFT0QHz9+fK699tp8+ctfzvXXX5+uXbvm+OOPz2OPPdbgdhMnTsxzzz2Xk08+Od/73vdy7LHH5sYbb8ynP/3pbNiwoVbtz372s5x44onZaaedcsMNN+TEE0/M5ZdfnrPPPrvYdgEAAAAAAADopLYrpnjhwoW56667Mm3atEyePDlJMm7cuAwfPjwXXnhhHn/88Xq3nTdvXg499NBaa/vvv39OOeWU3HHHHTnttNMK65MnT86IESPywAMPZLvt3m+xd+/eueKKK3LOOedk6NChxbQNAAAAAAAAQCdU1BHi8+bNS9euXTNx4sTCWkVFRSZMmJAnnngiS5curXfbDw7Dk+SLX/xikuS5554rrD377LN59tlnM3HixMIwPEnOPPPM1NTUZN68ecW0DAAAAAAAAEAnVdRA/KmnnsqQIUPSu3fvWuujRo1KkixatKionb/++utJkl133bXWPpLkgAMOqFVbWVmZgQMHFu4HAAAAAAAAgIYUdcr05cuXZ8CAAXXWN68tW7asqJ1fffXV6dq1a8aMGVNrH1s+5gf309g+qqqqUlVVVfh83bp1SZLq6upUV1cX1R/QdF26FPX+mkbJMrSN5syyHEPbkWUoDbIMpaE1spyymvdvDShkvZG6WrVAgZ/L0PF5DRtKw7ZkuaiB+IYNG1JeXl5nvaKionB/U915552ZNWtWLrzwwuy999619pGk3v0UftGvx5VXXpnLLruszvrKlSuzcePGJvcHFGe33XZr1seTZWgbzZllOYa2I8tQGmQZSkNrZLnHLuvTvbyswW1XrFiRJNlh1/WN7mdzLfBnfi5Dx+c1bCgN25LlspqamsbfFvonw4cPT//+/fPQQw/VWn/22Wez7777ZubMmTn99NMbfZxf/OIXOfroo3PIIYfkvvvuq3Wt8OnTp+cb3/hGlixZkkGDBtXabtSoUenatWueeOKJeh97a+/IGTRoUFavXl3nVO9A82mNd9fJMrS8ln7HuxxD65BlKA2yDKWhNbL87a8uSEW3ng1ue/73hiZJrv36843uZ3Mt8Gd+LkPH5zVsKA0tfoT4gAED8tprr9VZ33ya88rKykYf49e//nVOOOGEDB8+PPPmzas1DN+8j82P+cGB+PLlywvXK69PeXn5Vo8u79KlS7P/Ywe0HFmGjk+OoTTIMpQGWYbSUF+WU1P2/q0Bhaw3UlerFmgRfi5DaZBl6DiKSuTIkSPz4osv1jlt+YIFCwr3N+Tll1/Osccem379+uWnP/1pevas+87VzY/x5JNP1lpftmxZfv/73ze6DwAAAAAAAABIihyIjxkzJps2bcott9xSWKuqqsrs2bMzevTowhHdS5YsyfPP1z790uuvv56jjz46Xbp0yf3335++fftudR/77rtvhg4dmltuuSWbNm0qrN98880pKyvLmDFjimkZAAAAAAAAgE6qqFOmjx49OmPHjs3FF1+cFStWZK+99sqcOXOyePHizJo1q1A3bty4zJ8/P1tenvzYY4/NK6+8kgsvvDCPPfZYHnvsscJ9/fv3z1FHHVX4fNq0aTnhhBNy9NFH50tf+lKeeeaZ3HjjjTnttNMybNiwD/P1AgAAAAAAANBJFDUQT5LbbrstU6dOzdy5c7N69eqMGDEi9913Xw4++OAGt/v1r3+dJPnud79b575DDjmk1kD8c5/7XO65555cdtllOfvss9O3b99MmTIll1xySbHtAgAAAAAAANBJFT0Qr6ioyLRp0zJt2rR6ax599NE6a1seLd4UJ554Yk488cQiuwMAAAAAAACA9xV1DXEAAAAAAAAA6CgMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJK2a+sGAAAAAACgs3vw0v9Nj/KeDdYcd9WQVuoGAEqHI8QBAAAAAAAAKEkG4gAAAAAAAACUJANxAAAAAAAAAEqSgTgAAAAAAAAAJclAHAAAAAAAAICSZCAOAAAAAAAAQEkyEAcAAAAAAACgJBU9EK+qqspFF12UysrKdO/ePaNHj86DDz7Y6HYvvPBCzjvvvBx44IGpqKhIWVlZFi9evNXawYMHp6ysrM7tjDPOKLZdAAAAAAAAADqp7YrdYPz48Zk3b17OPffc7L333rn11ltz/PHH55FHHslBBx1U73ZPPPFEvve97+WjH/1ohg0blkWLFjW4n5EjR+aCCy6otTZkyJBi2wUAAAAAtsH0Sc81WjN5xrBW6AQAALZdUQPxhQsX5q677sq0adMyefLkJMm4ceMyfPjwXHjhhXn88cfr3faEE07ImjVr0qtXr0yfPr3Rgfhf/MVf5OSTTy6mPQAAAAAAAAAoKOqU6fPmzUvXrl0zceLEwlpFRUUmTJiQJ554IkuXLq1325133jm9evUqqrl33nkn69evL2obAAAAAAAAAEiKHIg/9dRTGTJkSHr37l1rfdSoUUnS6FHfxXj44YfTo0eP9OzZM4MHD87111/fbI8NAAAAAAAAQOkr6pTpy5cvz4ABA+qsb15btmxZszQ1YsSIHHTQQdlnn33y5ptv5tZbb825556bZcuW5eqrr25w26qqqlRVVRU+X7duXZKkuro61dXVzdIfUFeXLkW9v6ZRsgxtozmzLMfQdmQZSoMsQ2lojSynrOb9WwMKWW+kbptrocS1RpZr/vRfQ2QOtp3XsKE0bEuWixqIb9iwIeXl5XXWKyoqCvc3h3vvvbfW56eeemqOO+64XHvttTn77LMzcODAere98sorc9lll9VZX7lyZTZu3Ngs/QF17bbbbs36eLIMbaM5syzH0HZkGUqDLENpaI0s99hlfbqXlzW47YoVK5IkO+za+OUJt6UWSl1rZLlmp7dTU9HwtjIH285r2FAatiXLZTU1NY2/1fNPhg8fnv79++ehhx6qtf7ss89m3333zcyZM3P66ac3+jjTp0/PN77xjbz66qsZPHhwk/Z9//3359hjj83cuXNz8skn11u3tXfkDBo0KKtXr65zqneg+bTGu+tkGVpeS7/jXY6hdcgylAZZhtLQGln+9lcXpKJbzwa3Pf97Q5Mk1379+Ub3sy21UOpaI8s/OPfJ9ChvOMvHXrF3s/UBnY3XsKE0tPgR4gMGDMhrr71WZ3358uVJksrKyqIbaKpBgwYlSVatWtVgXXl5+VaPYu/SpUuz/2MHtBxZho5PjqE0yDKUhvqy/I2rXkq3Rl54n3HZsJZqCyhSfVlOTdn7twYUfm43UrfNtUCT1Zflsj/91xCZg/bD38vQcRSVyJEjR+bFF1/88/WJ/mTBggWF+1vKK6+8kiTp27dvi+0DAAAAAAAAgNJR1EB8zJgx2bRpU2655ZbCWlVVVWbPnp3Ro0cXjuJesmRJnn++8VMqbc2qVauyadOmWmvvvvturrrqqnTr1i2HHXbYNj0uAAAAAAAAAJ1LUadMHz16dMaOHZuLL744K1asyF577ZU5c+Zk8eLFmTVrVqFu3LhxmT9/fra8PPnatWtzww03JEl++ctfJkluvPHG7LTTTtlpp51y1llnJUnuvffeXH755RkzZkz22GOPrFq1KnfeeWeeeeaZXHHFFdt0oXQAAAAAAAAAOp+iBuJJctttt2Xq1KmZO3duVq9enREjRuS+++7LwQcf3OB2q1evztSpU2utXXPNNUmSj3zkI4WB+H777ZePfvSjuf3227Ny5cp069YtI0eOzN13352xY8cW2y4AAAAAAAAAnVTRA/GKiopMmzYt06ZNq7fm0UcfrbM2ePDgWkeM12f//ffPvffeW2xbAAAAAAAAAFBL0QNxAAAAAAAAoK5nLnghPbv1bLBmxIxhrdQNkCRd2roBAAAAAAAAAGgJBuIAAAAAAAAAlCSnTAcAAAAAAIBW9vSk5xqtcXp1+PAcIQ4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJJcQxwAAABo1KRLG7++4YzLXN8QAACA9sUR4gAAAAAAAACUJANxAAAAAAAAAEqSgTgAAAAAAAAAJclAHAAAAAAAAICSZCAOAAAAAAAAQEkyEAcAAAAAAACgJBmIAwAAAAAAAFCSDMQBAAAAAAAAKEkG4gAAAAAAAACUpKIH4lVVVbnoootSWVmZ7t27Z/To0XnwwQcb3e6FF17IeeedlwMPPDAVFRUpKyvL4sWL662/995784lPfCIVFRXZfffdc+mll+a9994rtl0AAAAAAAAAOqntit1g/PjxmTdvXs4999zsvffeufXWW3P88cfnkUceyUEHHVTvdk888US+973v5aMf/WiGDRuWRYsW1Vv7s5/9LCeeeGIOPfTQ3HDDDfnNb36Tyy+/PCtWrMjNN99cbMsAAAAAAADQYT096blGa0bMGNYKnUDHU9RAfOHChbnrrrsybdq0TJ48OUkybty4DB8+PBdeeGEef/zxerc94YQTsmbNmvTq1SvTp09vcCA+efLkjBgxIg888EC22+79Fnv37p0rrrgi55xzToYOHVpM2wAAAAAAAAB0QkUNxOfNm5euXbtm4sSJhbWKiopMmDAhU6ZMydKlSzNo0KCtbrvzzjs3aR/PPvtsnn322cyYMaMwDE+SM888M9/5zncyb968fOtb3yqm7STJBS/clW49u9d7/4xhXyn6MQEAAAAAAABov4q6hvhTTz2VIUOGpHfv3rXWR40alSQNHvVdzD6S5IADDqi1XllZmYEDBxbuBwAAAAAAAICGFHWE+PLlyzNgwIA665vXli1b9qEbWr58ea3H/OB+GttHVVVVqqqqCp+vW7cuSVJW8/6tPtXV1dvQLbBZly5Fvb+mUfVlubq6Wl6hBTVnluUY2k5rZHnync+nW/eeDW77va+41BF8GK2R5bLUpCwN/LGcP/+93FjdlrXAn7VGlht94Stb5LORum2uhRLXGlmu+dN/DdmcuZ9PeanR/Rx7xd5F10Ipa63XsGvKalLTxJ/LjdVtay2Usm3JclED8Q0bNqS8vLzOekVFReH+D2vzY9S3n8Iv+vW48sorc9lll9VZ32V915SX1f/lrlixovDxzKWPNLiPMwYd1uD90Bnttttuzfp49WV55cqV2bhxY7PuC/iz5syyHEPbaY0s79J1fcq3K2tw282/Y898aGmj+znjiK1fegk6s1bJcu/1Ka9oWpZ33XF9o/vZ8m9r4H2tkeUeu6xP9/KmZXmHXZue5WJqodS1RpZrdno7NRUNb7s5czV93m50P9tSC6WstV7D3rjL+nRt4s/lDUX8rC2mFkrZtmS5qIF49+7da73bZbPNL2p3717/NbqL2UeSevfT2D4uvvjinH/++YXP161bl0GDBuXNHTal2w7v1btdv379Ch+/sar+ug/WAi2jviz37du3zmUbgPZJjqE01Pv79aYd0u29HRrcdvPvzW+8t6rR/fgdG1pWvVlet0O6VTUxy2tlGdpafVn+45s7pLpb07K8/o2mZ7mYWqDp6sty2ZqeKStv+CxMmzNXtnpto/vZllqg6erLcsWbO6R7E38u/6GIn7XF1AK1FTUQHzBgQF577bU665tPc15ZWfmhG9p8qvTly5dn0KDaR4ksX768cL3y+pSXl2/16PKasvdv9dny8PqG6j5YO+m5uQ3Wzhj2lYYfDNiq+rLcpUuXZj+1DdAy5BhKQ72/X//pRMsN2Zz1xuq2rJ0057lGa2ecMqzRGqC21s4y0DLqy3KjL3xli3w29sLXttYCTVZflsv+9F9DNmeusbptrQWart4s15SlrIk/lxur29ZaoLaikjFy5Mi8+OKLdU5bvmDBgsL9H9bmx3jyySdrrS9btiy///3vm2UfAAAAAAAAAJS+ogbiY8aMyaZNm3LLLbcU1qqqqjJ79uyMHj26cET3kiVL8vzzz29TQ/vuu2+GDh2aW265JZs2bSqs33zzzSkrK8uYMWO26XEBAAAAAAAA6FyKOmX66NGjM3bs2Fx88cVZsWJF9tprr8yZMyeLFy/OrFmzCnXjxo3L/PnzU1NTU1hbu3ZtbrjhhiTJL3/5yyTJjTfemJ122ik77bRTzjrrrELttGnTcsIJJ+Too4/Ol770pTzzzDO58cYbc9ppp2XYMKdHBAAAAAAAAKBxRQ3Ek+S2227L1KlTM3fu3KxevTojRozIfffdl4MPPrjB7VavXp2pU6fWWrvmmmuSJB/5yEdqDcQ/97nP5Z577slll12Ws88+O3379s2UKVNyySWXFNsuAAAAAAAAdBpPT3qu0ZoRMxyASudR9EC8oqIi06ZNy7Rp0+qtefTRR+usDR48uNYR44058cQTc+KJJxbbHgAAAAAAAAAk2YaBOAAAQHswaU7j73ifcYp3vAMAAAB0ZgbiAABAyTM8BwAAAOicurR1AwAAAAAAAADQEgzEAQAAAAAAAChJBuIAAAAAAAAAlCTXEAcAANiC640DAAAAlA5HiAMAAAAAAABQkhwhDgAAAAAAAJ3Q05MaPkvaiBnOkEbH5whxAAAAAAAAAEqSI8RbyaTn5jZaM2PYV1qhEwAAoLm43jgAAABA+2YgDgAA0AoMzwEAAABan1OmAwAAAAAAAFCSHCHeDjm9OgAAAAAAAMCH5whxAAAAAAAAAEqSI8Q7OEeTAwAA0N5MuvS5RmtmXDasFToBAACgs3OEOAAAAAAAAAAlqegjxKuqqnLJJZdk7ty5Wb16dUaMGJHLL788Rx11VKPbvvbaaznvvPPywAMPpLq6Oocddliuu+667LnnnrXqysrKtrr9lVdemW9+85vFtgwAANChTJrThKNrT3F0LQAAAEBjih6Ijx8/PvPmzcu5556bvffeO7feemuOP/74PPLIIznooIPq3e7tt9/OYYcdlrVr12bKlCnZfvvtc9111+WQQw7JokWLsssuu9SqP+qoozJu3Lhaax//+MeLbZctOL06AACUnsaG5wbnAAAAQGdW1EB84cKFueuuuzJt2rRMnjw5STJu3LgMHz48F154YR5//PF6t73pppvy0ksvZeHChfnkJz+ZJDnuuOMyfPjwXHPNNbniiitq1Q8ZMiQnn3xysV8PAAAAAAAAACQpciA+b968dO3aNRMnTiysVVRUZMKECZkyZUqWLl2aQYMG1bvtJz/5ycIwPEmGDh2aI444InfffXedgXiSbNiwIWVlZamoqCimTQAAAACgFU2f1PjlPibPcNYSAABaX5diip966qkMGTIkvXv3rrU+atSoJMmiRYu2ul11dXWefvrpHHDAAXXuGzVqVF5++eW89dZbtdZvvfXW7LDDDunevXs++tGP5s477yymVQAAAAAAAAA6uaKOEF++fHkGDBhQZ33z2rJly7a63apVq1JVVdXotvvss0+S5MADD8z/9//9f9ljjz2ybNmyzJgxI1/+8pezdu3afO1rX2uwx6qqqlRVVRU+X7duXZKkrOb9W32qq6sLHzdUt621jdW1t9qvP39Ho7XfG/rlxh+QTqNLl6LeX9Oo+rJcXV1dK4NA82rOLMsxtJ3WyHJZalKWhn/BLPwe2kid2pat9W9ux9VZsgylrjWy3OgLX9kic014gajFa6EDao0s1/zpv4ZszlFjddtaC6WstV7DrimrSU0Tfy43Vtceav37QHuzLVkuaiC+YcOGlJeX11nffErzDRs21LtdkiZv+8tf/rJWzf/9v/83+++/f6ZMmZLx48ene/fu9fZ45ZVX5rLLLquzvsv6rikvq//LXbFiReHjXdc3/G3ZltrG6jpyLSTJbrvt1qyPV1+WV65cmY0bNzbrvoA/a84syzG0ndbI8i5d16d8u7IGty38brnd+kb3o7blav3e3nG1SpZ7r095RROzvGMR/18WUQulrjWy3GOX9ele3rQs77Br0/PZUrXQEbVGlmt2ejs1jVw9dHOOavq83eh+tqX2v+e81mjt/qf8RaM10B611mvYG3dZn65N/Lm8oYifn21V6+c37c22ZLmogXj37t1rvdtls80vatc3qN68vi3bJkm3bt1y1lln5Ywzzsh///d/56CDDqq39uKLL875559f+HzdunUZNGhQ3txhU7rt8F692/Xr16/w8Rur6q/b1trG6jpyLbSE+rLct2/fOpdtANonOYbSUO/v15t2SLf3dmhw28Lvlu+tanQ/aluu1u/tJA1ked0O6VbVxCyvLeL/yyJqgaarL8t/fHOHVHdrWpbXv9H0fLZULXR29WW5bE3PlJX3bHDbzTkqW7220f20dC10dvVlueLNHdK9iT+X/1DEz8+2qpV5SkFRA/EBAwbktdfqvkNs+fLlSZLKysqtbrfzzjunvLy8UFfMtpsNGjQoyfunX29IeXn5Vo9Eryl7/1afLQ+vb6huW2sbq+vItdAS6styly5d/L8HHYQcQ2mo9/frP51ouSGF3y0bqVPbsrX+zSVp31kGmq6+LDf6wle2yFwTXvRp8Vro5OrLctmf/mvI5hw1VtcatdDZ1ZvlmrKUNfHncmN17aFW5ikFRf1fPHLkyLz44ot/vj7RnyxYsKBw/1Z30qVL9ttvvzz55JN17luwYEH23HPP9OrVq8F9v/LKK0mSvn37FtMyAAAAAAAAAJ1UUUeIjxkzJtOnT88tt9ySyZMnJ3n/NOizZ8/O6NGjC0dxL1myJH/84x8zdOjQWtt+85vfzJNPPpkDDjggSfLCCy/k4YcfLjxW8v51RT849H7rrbfyj//4j9l1112z//77b9tXCgAA0MlNmvNcg/fPOGVYK3UCAAAA0DqKGoiPHj06Y8eOzcUXX5wVK1Zkr732ypw5c7J48eLMmjWrUDdu3LjMnz8/NTU1hbUzzzwz3//+9/PZz342kydPzvbbb59rr702/fv3zwUXXFComzFjRn784x/n85//fHbfffcsX748//zP/5wlS5Zk7ty56datWzN82QAAAAAAAACUuqIG4kly2223ZerUqZk7d25Wr16dESNG5L777svBBx/c4Ha9evXKo48+mvPOOy+XX355qqurc+ihh+a6666rdUT4Zz7zmTz++OP5f//v/+XNN9/MDjvskFGjRuWf//mfc/jhhxf/FQIl5YIX/ivdeu5Q7/0zhn2qFbsBAAAAAACgPSt6IF5RUZFp06Zl2rRp9dY8+uijW10fOHBgfvCDHzT4+EcddVSOOuqoYtsCAAAAAAAAgFq6tHUDAAAAAAAAANASij5CHKCjmPTcrxq83+nVAQAAAACgaZ6e9FyD94+YMayVOoHiGIjzoU16bm6jNTOGfaUVOgEAAAAAAAD4MwNxAAAAoM1MurTho0ySZMZljjSBUjK9kaPLkmSyI8wAAGgmBuIAAAAAAEBRfvbNFxutOe6qIa3QCQA0zEAcAAAA6BAcTQ4AAECxDMRpVa43Tns06blfNVozY9inWqETAACguRieAwAAkBiI044ZngMAANAaDM8BAKB5PT2p4d+xR8zw+zWtx0CckmB4TmtxNDkAAHRuhucAANB8GhucJ38enhdTC1syEAcAAAAAAABKhkE7WzIQB2ghjiaH+k16rOFfMmcc5BdMAKDja+xo8i2PJC+mFgAAaBstNWg3lG9ZBuIA7YDhOQAAAACl6mfffLHB+4+7akgrdQJAZ2QgTqfT2PXGt7zWeDG10FqKGZ4btAMAQOlxNDkApczwHIDmZiAOQJKWG7Q3Z63hPQC0nklzGhm4nWLgBu1dY4Pz5M/D85aqBQAAmpfTthfPQByaiaPJoXUVMzxvaq0j6tufxq41nrjeOABAe2Z4DkBLcjQ5QNtobHje3gbnBuLQBgzPoePrCEfUb2stABSjsSPJE0eTA01jeA5AS2rq8Lyxui1rAWhcMcPzlhq0G4hDO9bY4Dz58/C8mFoAAADoyNrD6eAN8AEoZnhu0A7QdooeiFdVVeWSSy7J3Llzs3r16owYMSKXX355jjrqqEa3fe2113LeeeflgQceSHV1dQ477LBcd9112XPPPevUzpo1K9OnT8+rr76aQYMG5etf/3rOPvvsYtsFtsKgHQCAtuBocgCKNb0J17Kc/KcjhRqrndzOTt0JUJ+WGrQbygOdVdED8fHjx2fevHk599xzs/fee+fWW2/N8ccfn0ceeSQHHXRQvdu9/fbbOeyww7J27dpMmTIl22+/fa677roccsghWbRoUXbZZZdC7T/90z/ljDPOyF//9V/n/PPPzy9+8Yt8/etfzx//+MdcdNFF2/aVAgAlrZjrjbs2OUD7V8zw3KAdgGIVMzxvqVqA9qw9DOWLuUa868kDDSlqIL5w4cLcddddmTZtWiZPnpwkGTduXIYPH54LL7wwjz/+eL3b3nTTTXnppZeycOHCfPKTn0ySHHfccRk+fHiuueaaXHHFFUmSDRs25O/+7u/y2c9+NvPmzUuSfPWrX011dXW+/e1vZ+LEienTp882fbEAAMVqqUF7R6sFKBWG5wC0B8155Htr1AIAdGRFDcTnzZuXrl27ZuLEiYW1ioqKTJgwIVOmTMnSpUszaNCgerf95Cc/WRiGJ8nQoUNzxBFH5O677y4MxB955JG8+eabOfPMM2ttP2nSpNxxxx35yU9+kpNPPrmYtgEA+JAaG55vOThvidr28KaAjlYLfHgtdZS6oTwAHU17GMq3Va03BQBAx9elmOKnnnoqQ4YMSe/evWutjxo1KkmyaNGirW5XXV2dp59+OgcccECd+0aNGpWXX345b731VmEfSerU7r///unSpUvhfgAAAAAAAABoSFFHiC9fvjwDBgyos755bdmyZVvdbtWqVamqqmp023322SfLly9P165d069fv1p13bp1yy677FLvPjarqqpKVVVV4fO1a9cmSd59a0PKaurfbs2aNYWP331rQ4P72JbaxurUtp/alv5/oZjajvD92lzbpUuX9OrVK2VlZY3WN0X9WX47ZTX1h7n29/ntBvfx56+x4Tq17ad2W57fYmo7wvegpWubM8v15XjNmjV59+23Gu0lSaN1attPba3MtUBtR/getKfa1sjyuxveSlka+AU7W/S9oYivUW2z19bKXAvUdoTvQUetbZUsVxWR5aoivka1TaqtlbkWqO0I34POUNsaWd747ltp8IWv/Lnvje82/Wsstdotc9TWtR3h+6W2dl1rZHl91VupaeLP5fVF/HvVEWq3zFFb13aE75fa2nVNrW2t17Dffvet1DTx5/JbRfx71Va1W36fW6K2I3wP1BZf25L/32xTlmuKsOeee9Ycd9xxddZffvnlmiQ111133Va3W7JkSU2SmquvvrrOfbNmzapJUvPUU0/V1NTU1Pzf//t/a7p3777Vxxk0aFDNF77whQZ7vPTSS2uSuLm5tcFt7dq1DeazGLLs5tZ2t+bKshy7ubXtTZbd3ErjJstubqVxk2U3t9K4ybKbW8e/eQ3bza00bsVmuaympoFDLT9g+PDh6d+/fx566KFa688++2z23XffzJw5M6effnqd7d5444307ds3//AP/5CpU6fWuu+mm27KpEmT8vzzz2efffbJWWedlZkzZ+a9996r8zj9+vXLEUcckX/5l3+pt8cPviOnuro6q1atyi677FJ4p8C6desyaNCgLF26tM7p32k9noeOqaHnrSXfXSfL7ZfnoeNp7DlrqXe8by3HTemH1uF56Hhkma3xPHQ8sszWeB46HllmazwPHY8sszWeh47Ha9hsjeei42nOLBd1yvQBAwbktddeq7O+fPnyJEllZeVWt9t5551TXl5eqGto2wEDBmTTpk1ZsWJFrdOmv/POO3nzzTfr3cdm5eXlKS8vr7W20047bbW2d+/e/qdvBzwPHVNLP2+y3PF4Hjqe9pTj1uiHpvE8dDyyzNZ4HjoeWWZrPA8djyyzNZ6HjkeW2RrPQ8fTnrLs/5/2w3PR8TTHc9almOKRI0fmxRdfzLp162qtL1iwoHD/VnfSpUv222+/PPnkk3XuW7BgQfbcc8/06tWr1mN8sPbJJ59MdXV1vfsAAAAAAAAAgC0VNRAfM2ZMNm3alFtuuaWwVlVVldmzZ2f06NEZNGhQkmTJkiV5/vnn62z7X//1X7UG3S+88EIefvjhjB07trB2+OGHZ+edd87NN99ca/ubb745PXr0yGc/+9liWgYAAAAAAACgkyrqlOmjR4/O2LFjc/HFF2fFihXZa6+9MmfOnCxevDizZs0q1I0bNy7z58/PlpcnP/PMM/P9738/n/3sZzN58uRsv/32ufbaa9O/f/9ccMEFhbru3bvn29/+diZNmpSxY8fmmGOOyS9+8Yvcfvvt+c53vpOdd975Q3/R5eXlufTSS+ucyoLW5XnomNrT89aeeunMPA8dT3t7ztpbP52V56HjaW/PWXvrp7PyPHQ87e05a2/9dFaeh46nvT1n7a2fzsrz0PG0t+esvfXTWXkeOp729Jy1p146O89Fx9Ocz1lZzZZT6ybYuHFjpk6dmttvvz2rV6/OiBEj8u1vfzvHHHNMoebQQw+tMxBPkt///vc577zz8sADD6S6ujqHHnporrvuuuy111519vP9738/11xzTV599dUMGjQoZ511Vs4555yiLpAOAAAAAAAAQOdV9EAcAAAAAAAAADqCoq4hDgAAAAAAAAAdhYE4AAAAAAAAACXJQBwAAAAAAACAktSpBuL/+Z//mc9//vOprKxMWVlZfvzjH7d1S51CY9/3mpqaXHLJJRkwYEC6d++eI488Mi+99FLbNEuS5Morr8wnP/nJ9OrVK/369cuJJ56YF154oVbNxo0bM2nSpOyyyy7p2bNn/vqv/zp/+MMfWqU/WW4bstzxyDJbI8sdjyyzNbLc8cgyWyPLHUt7z3Eiy21FljsWWaY+styxyDJbI8cdT2tluVMNxNevX5+PfexjmTFjRlu30qk09n3/7ne/m+9973uZOXNmFixYkB122CHHHHNMNm7c2Mqdstn8+fMzadKk/OpXv8qDDz6Yd999N0cffXTWr19fqDnvvPPy7//+7/nBD36Q+fPnZ9myZfk//+f/tEp/stw2ZLnjkWW2RpY7Hllma2S545FltkaWO5b2nuNEltuKLHcsskx9ZLljkWW2Ro47nlbLck0nlaTmRz/6UVu30el88PteXV1ds9tuu9VMmzatsLZmzZqa8vLymn/5l39pgw7ZmhUrVtQkqZk/f35NTc37z9H2229f84Mf/KBQ89xzz9UkqXniiSdatTdZbhuy3DHJMh8kyx2TLPNBstwxyTIfJMsdT3vOcU2NLLcVWe54ZJmtkeWOR5b5IDnumFoqy53qCHHan1dffTWvv/56jjzyyMLajjvumNGjR+eJJ55ow87Y0tq1a5MkO++8c5Lkv//7v/Puu+/Wet6GDh2a3Xff3fPWSclyxyDLNEaWOwZZpjGy3DHIMo2R5fZPjmkKWW7/ZJmmkOX2T5ZpjBx3DC2VZQNx2tTrr7+eJOnfv3+t9f79+xfuo21VV1fn3HPPzWc+85kMHz48yfvPW7du3bLTTjvVqvW8dV6y3P7JMk0hy+2fLNMUstz+yTJNIcvtmxzTVLLcvskyTSXL7Zss0xRy3P61ZJa3a85GgdIzadKkPPPMM3nsscfauhXgQ5BlKA2yDKVBlqHjk2MoDbIMpUGWoTS0ZJYdIU6b2m233ZIkf/jDH2qt/+EPfyjcR9s566yzct999+WRRx7JwIEDC+u77bZb3nnnnaxZs6ZWveet85Ll9k2WaSpZbt9kmaaS5fZNlmkqWW6/5JhiyHL7JcsUQ5bbL1mmqeS4fWvpLBuI06b22GOP7LbbbnnooYcKa+vWrcuCBQvy6U9/ug0769xqampy1lln5Uc/+lEefvjh7LHHHrXu33///bP99tvXet5eeOGFLFmyxPPWScly+yTLFEuW2ydZpliy3D7JMsWS5fZHjtkWstz+yDLbQpbbH1mmWHLcPrVWljvVKdPffvvt/O///m/h81dffTWLFi3KzjvvnN13370NOyttjX3fzz333Fx++eXZe++9s8cee2Tq1KmprKzMiSee2HZNd3KTJk3KnXfemX/7t39Lr169Ctdh2HHHHdO9e/fsuOOOmTBhQs4///zsvPPO6d27d84+++x8+tOfzqc+9akW70+W24YsdzyyzNbIcscjy2yNLHc8sszWyHLH0t5znMhyW5HljkWWqY8sdyyyzNbIccfTalmu6UQeeeSRmiR1bqecckpbt1bSGvu+V1dX10ydOrWmf//+NeXl5TVHHHFEzQsvvNC2TXdyW3u+ktTMnj27ULNhw4aaM888s6ZPnz41PXr0qPniF79Ys3z58lbpT5bbhix3PLLM1shyxyPLbI0sdzyyzNbIcsfS3nNcUyPLbUWWOxZZpj6y3LHIMlsjxx1Pa2W57E87AwAAAAAAAICS4hriAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJJkIA4AAAAAAABASTIQBwAAAAAAAKAkGYgDAAAAAAAAUJIMxAEAAAAAAAAoSQbiAAAAAAAAAJQkA3EAAAAAAAAASpKBOAAAAAAAAAAlyUAcAAAAAAAAgJL0/wNr44XUv/ZAsgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 2000x300 with 8 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "loops_to_plot = []\n",
    "\n",
    "counts = (\n",
    "    results_df.groupby(['quantized_index', 'loop_type'])\n",
    "    .size()\n",
    "    .reset_index(name='count')\n",
    ")\n",
    "totals = counts.groupby('loop_type').aggregate({'count': 'sum'}).reset_index().rename(columns={'count': 'total_count'})\n",
    "counts = counts.merge(totals, on='loop_type')\n",
    "counts['proportion'] = counts['count'] / counts['total_count']\n",
    "\n",
    "loop_types = ['H1', 'H2', 'H3', 'H4', 'L1', 'L2', 'L3', 'L4']\n",
    "colors = [\"#58E8A3\", \"#58E8D4\", \"#58CEE8\", \"#58A0E8\", \"#5873E8\", \"#8458E8\", \"#BA66EA\", \"#E258E8\"]\n",
    "loop_type_colors = dict(zip(loop_types, colors))\n",
    "\n",
    "fig, axs = plt.subplots(nrows=1, ncols=8, figsize=(20, 3), sharey=True)\n",
    "for i, loop_type in enumerate(loop_types):\n",
    "    ax = axs[i]\n",
    "    loop_df = counts[counts['loop_type'] == loop_type].sort_values(by='proportion', ascending=False).head(20).copy()\n",
    "    loops_to_plot.extend(results_df[(results_df['quantized_index'] == loop_df['quantized_index'].iloc[0]) & (results_df['quantized_index_dihedral_only'] == loop_df['quantized_index'].iloc[0])].sample(20)['loop_id'].tolist())\n",
    "    print(f\"{loop_type} top 1 proportion: index={loop_df['quantized_index'].iloc[0]} {loop_df['proportion'].iloc[0]:.4f}, top 20 proportion: {loop_df['proportion'].sum():.4f}\")\n",
    "    loop_df['rank'] = range(1, len(loop_df) + 1)\n",
    "    sns.barplot(\n",
    "        data=loop_df,\n",
    "        x='rank',\n",
    "        y='proportion',\n",
    "        order=loop_df['rank'],  # Ensures descending bar order\n",
    "        ax=ax,\n",
    "        color=loop_type_colors[loop_type],\n",
    "    )\n",
    "    ax.set_ylim(0, 0.35)\n",
    "    sns.despine()\n",
    "    ax.set_title(loop_type, fontsize=20)\n",
    "    ax.set_ylabel(\"\")\n",
    "    ax.set_xlabel(\"\")\n",
    "    ax.grid(axis='y', alpha=0.3)\n",
    "    ax.tick_params(axis='y', labelsize=12)\n",
    "    ax.set_yticks(np.arange(0, 0.40, 0.05))\n",
    "    ax.set_xticks([0, 9, 19])\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"quantized_indices_loop_type_proportions.svg\", format='svg')\n",
    "\n",
    "with open(\"loops_to_plot.txt\", \"w\") as f:\n",
    "    for loop_id in loops_to_plot:\n",
    "        f.write(f\"{loop_id}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fea85b74",
   "metadata": {},
   "outputs": [],
   "source": [
    "loops = []\n",
    "for loop_type in results_df['loop_type'].unique():\n",
    "    loop_df_filtered = results_df[results_df['loop_type'] == loop_type]\n",
    "    most_common_index = loop_df_filtered['quantized_index'].mode().iloc[0]\n",
    "    loop_df_filtered = loop_df_filtered[loop_df_filtered['quantized_index'] == most_common_index]\n",
    "    loops.extend(loop_df_filtered['loop_id'].tolist())\n",
    "\n",
    "with open(\"paper_analyses/1_recovery_of_canonical_clusters/loops_for_sequence_logo.txt\", \"w\") as f:\n",
    "    for loop_id in loops:\n",
    "        f.write(f\"{loop_id}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8c69c89e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loop_id</th>\n",
       "      <th>loop_type</th>\n",
       "      <th>loop_length</th>\n",
       "      <th>canonical_cluster</th>\n",
       "      <th>canonical_cluster_strict</th>\n",
       "      <th>canonical_cluster_ssc_comparison</th>\n",
       "      <th>quantized_index</th>\n",
       "      <th>quantized_index_dihedral_only</th>\n",
       "      <th>quantized_index_sequence_only</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2567</th>\n",
       "      <td>6027_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>7708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4210</th>\n",
       "      <td>9861_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>7708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5653</th>\n",
       "      <td>13406_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>7708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7449</th>\n",
       "      <td>0_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>1123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7497</th>\n",
       "      <td>9_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>4271</td>\n",
       "      <td>7708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115273</th>\n",
       "      <td>18751_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>1123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115293</th>\n",
       "      <td>18754_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>1123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115389</th>\n",
       "      <td>18770_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>6849</td>\n",
       "      <td>1123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118406</th>\n",
       "      <td>8052_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>4271</td>\n",
       "      <td>7708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119444</th>\n",
       "      <td>10689_L4</td>\n",
       "      <td>L4</td>\n",
       "      <td>6</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>L4-6-1</td>\n",
       "      <td>6849</td>\n",
       "      <td>4564</td>\n",
       "      <td>3112</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1682 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         loop_id loop_type  loop_length canonical_cluster  \\\n",
       "2567     6027_L4        L4            6            L4-6-1   \n",
       "4210     9861_L4        L4            6            L4-6-1   \n",
       "5653    13406_L4        L4            6            L4-6-1   \n",
       "7449        0_L4        L4            6            L4-6-1   \n",
       "7497        9_L4        L4            6            L4-6-1   \n",
       "...          ...       ...          ...               ...   \n",
       "115273  18751_L4        L4            6            L4-6-1   \n",
       "115293  18754_L4        L4            6            L4-6-1   \n",
       "115389  18770_L4        L4            6            L4-6-1   \n",
       "118406   8052_L4        L4            6            L4-6-1   \n",
       "119444  10689_L4        L4            6            L4-6-1   \n",
       "\n",
       "       canonical_cluster_strict canonical_cluster_ssc_comparison  \\\n",
       "2567                     L4-6-1                           L4-6-1   \n",
       "4210                     L4-6-1                           L4-6-1   \n",
       "5653                     L4-6-1                           L4-6-1   \n",
       "7449                     L4-6-1                           L4-6-1   \n",
       "7497                     L4-6-1                           L4-6-1   \n",
       "...                         ...                              ...   \n",
       "115273                   L4-6-1                           L4-6-1   \n",
       "115293                   L4-6-1                           L4-6-1   \n",
       "115389                   L4-6-1                           L4-6-1   \n",
       "118406                   L4-6-1                           L4-6-1   \n",
       "119444                   L4-6-1                           L4-6-1   \n",
       "\n",
       "        quantized_index  quantized_index_dihedral_only  \\\n",
       "2567               6849                           6849   \n",
       "4210               6849                           6849   \n",
       "5653               6849                           6849   \n",
       "7449               6849                           6849   \n",
       "7497               6849                           4271   \n",
       "...                 ...                            ...   \n",
       "115273             6849                           6849   \n",
       "115293             6849                           6849   \n",
       "115389             6849                           6849   \n",
       "118406             6849                           4271   \n",
       "119444             6849                           4564   \n",
       "\n",
       "        quantized_index_sequence_only  \n",
       "2567                             7708  \n",
       "4210                             7708  \n",
       "5653                             7708  \n",
       "7449                             1123  \n",
       "7497                             7708  \n",
       "...                               ...  \n",
       "115273                           1123  \n",
       "115293                           1123  \n",
       "115389                           1123  \n",
       "118406                           7708  \n",
       "119444                           3112  \n",
       "\n",
       "[1682 rows x 9 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loop_df_filtered"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "232a8366",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Weighted loop type purity: 0.9832\n",
      "Weighted loop length purity: 0.9652\n",
      "Number of clusters with multiple loop lengths: 475 out of 1305\n",
      "Clusters with multiple loop lengths that have H3: 392 out of 724\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>quantized_index</th>\n",
       "      <th>cluster_size</th>\n",
       "      <th>loop_type</th>\n",
       "      <th>loop_length</th>\n",
       "      <th>num_loop_lengths</th>\n",
       "      <th>has_H3</th>\n",
       "      <th>loop_type_purity</th>\n",
       "      <th>loop_length_purity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>343</th>\n",
       "      <td>2044</td>\n",
       "      <td>87</td>\n",
       "      <td>{'H3': 75, 'L3': 6, 'L2': 1, 'H2': 5}</td>\n",
       "      <td>{16: 12, 19: 2, 14: 15, 20: 1, 12: 16, 10: 6, ...</td>\n",
       "      <td>13</td>\n",
       "      <td>True</td>\n",
       "      <td>0.862069</td>\n",
       "      <td>0.183908</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>115</td>\n",
       "      <td>42</td>\n",
       "      <td>{'L3': 10, 'H2': 2, 'H3': 25, 'H1': 4, 'L2': 1}</td>\n",
       "      <td>{13: 6, 9: 8, 14: 9, 16: 2, 19: 1, 17: 1, 15: ...</td>\n",
       "      <td>12</td>\n",
       "      <td>True</td>\n",
       "      <td>0.595238</td>\n",
       "      <td>0.238095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1247</th>\n",
       "      <td>7809</td>\n",
       "      <td>48</td>\n",
       "      <td>{'L3': 1, 'H3': 46, 'H1': 1}</td>\n",
       "      <td>{11: 1, 16: 11, 12: 6, 19: 8, 18: 3, 29: 1, 13...</td>\n",
       "      <td>11</td>\n",
       "      <td>True</td>\n",
       "      <td>0.958333</td>\n",
       "      <td>0.229167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>248</th>\n",
       "      <td>1436</td>\n",
       "      <td>96</td>\n",
       "      <td>{'H3': 93, 'L3': 3}</td>\n",
       "      <td>{15: 35, 19: 2, 14: 30, 16: 14, 20: 1, 18: 4, ...</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.968750</td>\n",
       "      <td>0.364583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>937</th>\n",
       "      <td>5876</td>\n",
       "      <td>49</td>\n",
       "      <td>{'L3': 20, 'H2': 3, 'H1': 7, 'H3': 18, 'L1': 1}</td>\n",
       "      <td>{10: 12, 11: 4, 9: 15, 16: 2, 19: 2, 12: 5, 17...</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.408163</td>\n",
       "      <td>0.306122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>364</th>\n",
       "      <td>2156</td>\n",
       "      <td>28</td>\n",
       "      <td>{'H3': 25, 'L2': 3}</td>\n",
       "      <td>{12: 9, 16: 5, 17: 5, 15: 1, 10: 2, 9: 1, 27: ...</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.892857</td>\n",
       "      <td>0.321429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>509</th>\n",
       "      <td>3178</td>\n",
       "      <td>47</td>\n",
       "      <td>{'H3': 47}</td>\n",
       "      <td>{17: 1, 23: 25, 25: 3, 24: 9, 19: 2, 26: 3, 15...</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.531915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>850</th>\n",
       "      <td>5363</td>\n",
       "      <td>37</td>\n",
       "      <td>{'H3': 19, 'H2': 12, 'L3': 5, 'L2': 1}</td>\n",
       "      <td>{13: 4, 10: 5, 16: 6, 14: 2, 9: 9, 20: 1, 15: ...</td>\n",
       "      <td>10</td>\n",
       "      <td>True</td>\n",
       "      <td>0.513514</td>\n",
       "      <td>0.243243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>909</th>\n",
       "      <td>5686</td>\n",
       "      <td>39</td>\n",
       "      <td>{'H3': 39}</td>\n",
       "      <td>{24: 9, 23: 10, 26: 1, 20: 5, 19: 2, 25: 9, 22...</td>\n",
       "      <td>9</td>\n",
       "      <td>True</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.256410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <td>6264</td>\n",
       "      <td>57</td>\n",
       "      <td>{'H3': 46, 'H2': 7, 'L3': 3, 'L2': 1}</td>\n",
       "      <td>{10: 28, 13: 2, 16: 1, 9: 8, 12: 7, 11: 7, 15:...</td>\n",
       "      <td>9</td>\n",
       "      <td>True</td>\n",
       "      <td>0.807018</td>\n",
       "      <td>0.491228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895</th>\n",
       "      <td>5609</td>\n",
       "      <td>51</td>\n",
       "      <td>{'H3': 47, 'L3': 3, 'H2': 1}</td>\n",
       "      <td>{11: 6, 13: 9, 15: 4, 17: 2, 14: 22, 19: 1, 16...</td>\n",
       "      <td>9</td>\n",
       "      <td>True</td>\n",
       "      <td>0.921569</td>\n",
       "      <td>0.431373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>600</th>\n",
       "      <td>3729</td>\n",
       "      <td>50</td>\n",
       "      <td>{'H1': 50}</td>\n",
       "      <td>{16: 21, 9: 2, 15: 3, 12: 3, 14: 6, 17: 1, 10:...</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.420000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>774</th>\n",
       "      <td>4888</td>\n",
       "      <td>27</td>\n",
       "      <td>{'H3': 26, 'H2': 1}</td>\n",
       "      <td>{14: 3, 13: 2, 16: 8, 17: 1, 11: 1, 24: 3, 12:...</td>\n",
       "      <td>9</td>\n",
       "      <td>True</td>\n",
       "      <td>0.962963</td>\n",
       "      <td>0.296296</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>763</th>\n",
       "      <td>4814</td>\n",
       "      <td>41</td>\n",
       "      <td>{'H3': 27, 'L3': 4, 'H2': 6, 'L1': 2, 'H1': 2}</td>\n",
       "      <td>{12: 21, 11: 2, 10: 9, 18: 1, 13: 2, 16: 2, 17...</td>\n",
       "      <td>9</td>\n",
       "      <td>True</td>\n",
       "      <td>0.658537</td>\n",
       "      <td>0.512195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>905</th>\n",
       "      <td>5661</td>\n",
       "      <td>38</td>\n",
       "      <td>{'H3': 32, 'L2': 3, 'H2': 1, 'L3': 2}</td>\n",
       "      <td>{11: 11, 12: 8, 10: 5, 13: 5, 16: 2, 8: 3, 14:...</td>\n",
       "      <td>8</td>\n",
       "      <td>True</td>\n",
       "      <td>0.842105</td>\n",
       "      <td>0.289474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>832</th>\n",
       "      <td>5242</td>\n",
       "      <td>27</td>\n",
       "      <td>{'H1': 27}</td>\n",
       "      <td>{10: 4, 19: 2, 16: 1, 14: 5, 12: 7, 15: 1, 11:...</td>\n",
       "      <td>8</td>\n",
       "      <td>False</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.259259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>363</th>\n",
       "      <td>2155</td>\n",
       "      <td>37</td>\n",
       "      <td>{'H3': 37}</td>\n",
       "      <td>{15: 18, 14: 4, 13: 4, 12: 2, 20: 1, 16: 5, 25...</td>\n",
       "      <td>8</td>\n",
       "      <td>True</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.486486</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442</th>\n",
       "      <td>2687</td>\n",
       "      <td>41</td>\n",
       "      <td>{'H3': 38, 'L3': 2, 'L2': 1}</td>\n",
       "      <td>{14: 8, 11: 13, 15: 8, 12: 2, 16: 5, 19: 2, 17...</td>\n",
       "      <td>8</td>\n",
       "      <td>True</td>\n",
       "      <td>0.926829</td>\n",
       "      <td>0.317073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>463</th>\n",
       "      <td>2824</td>\n",
       "      <td>47</td>\n",
       "      <td>{'H3': 43, 'L3': 3, 'L2': 1}</td>\n",
       "      <td>{19: 1, 13: 7, 15: 19, 16: 3, 14: 14, 17: 1, 1...</td>\n",
       "      <td>8</td>\n",
       "      <td>True</td>\n",
       "      <td>0.914894</td>\n",
       "      <td>0.404255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>3792</td>\n",
       "      <td>47</td>\n",
       "      <td>{'H3': 40, 'H2': 1, 'L3': 6}</td>\n",
       "      <td>{12: 16, 15: 3, 14: 13, 13: 3, 17: 1, 10: 5, 1...</td>\n",
       "      <td>8</td>\n",
       "      <td>True</td>\n",
       "      <td>0.851064</td>\n",
       "      <td>0.340426</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      quantized_index  cluster_size  \\\n",
       "343              2044            87   \n",
       "23                115            42   \n",
       "1247             7809            48   \n",
       "248              1436            96   \n",
       "937              5876            49   \n",
       "364              2156            28   \n",
       "509              3178            47   \n",
       "850              5363            37   \n",
       "909              5686            39   \n",
       "1013             6264            57   \n",
       "895              5609            51   \n",
       "600              3729            50   \n",
       "774              4888            27   \n",
       "763              4814            41   \n",
       "905              5661            38   \n",
       "832              5242            27   \n",
       "363              2155            37   \n",
       "442              2687            41   \n",
       "463              2824            47   \n",
       "608              3792            47   \n",
       "\n",
       "                                            loop_type  \\\n",
       "343             {'H3': 75, 'L3': 6, 'L2': 1, 'H2': 5}   \n",
       "23    {'L3': 10, 'H2': 2, 'H3': 25, 'H1': 4, 'L2': 1}   \n",
       "1247                     {'L3': 1, 'H3': 46, 'H1': 1}   \n",
       "248                               {'H3': 93, 'L3': 3}   \n",
       "937   {'L3': 20, 'H2': 3, 'H1': 7, 'H3': 18, 'L1': 1}   \n",
       "364                               {'H3': 25, 'L2': 3}   \n",
       "509                                        {'H3': 47}   \n",
       "850            {'H3': 19, 'H2': 12, 'L3': 5, 'L2': 1}   \n",
       "909                                        {'H3': 39}   \n",
       "1013            {'H3': 46, 'H2': 7, 'L3': 3, 'L2': 1}   \n",
       "895                      {'H3': 47, 'L3': 3, 'H2': 1}   \n",
       "600                                        {'H1': 50}   \n",
       "774                               {'H3': 26, 'H2': 1}   \n",
       "763    {'H3': 27, 'L3': 4, 'H2': 6, 'L1': 2, 'H1': 2}   \n",
       "905             {'H3': 32, 'L2': 3, 'H2': 1, 'L3': 2}   \n",
       "832                                        {'H1': 27}   \n",
       "363                                        {'H3': 37}   \n",
       "442                      {'H3': 38, 'L3': 2, 'L2': 1}   \n",
       "463                      {'H3': 43, 'L3': 3, 'L2': 1}   \n",
       "608                      {'H3': 40, 'H2': 1, 'L3': 6}   \n",
       "\n",
       "                                            loop_length  num_loop_lengths  \\\n",
       "343   {16: 12, 19: 2, 14: 15, 20: 1, 12: 16, 10: 6, ...                13   \n",
       "23    {13: 6, 9: 8, 14: 9, 16: 2, 19: 1, 17: 1, 15: ...                12   \n",
       "1247  {11: 1, 16: 11, 12: 6, 19: 8, 18: 3, 29: 1, 13...                11   \n",
       "248   {15: 35, 19: 2, 14: 30, 16: 14, 20: 1, 18: 4, ...                10   \n",
       "937   {10: 12, 11: 4, 9: 15, 16: 2, 19: 2, 12: 5, 17...                10   \n",
       "364   {12: 9, 16: 5, 17: 5, 15: 1, 10: 2, 9: 1, 27: ...                10   \n",
       "509   {17: 1, 23: 25, 25: 3, 24: 9, 19: 2, 26: 3, 15...                10   \n",
       "850   {13: 4, 10: 5, 16: 6, 14: 2, 9: 9, 20: 1, 15: ...                10   \n",
       "909   {24: 9, 23: 10, 26: 1, 20: 5, 19: 2, 25: 9, 22...                 9   \n",
       "1013  {10: 28, 13: 2, 16: 1, 9: 8, 12: 7, 11: 7, 15:...                 9   \n",
       "895   {11: 6, 13: 9, 15: 4, 17: 2, 14: 22, 19: 1, 16...                 9   \n",
       "600   {16: 21, 9: 2, 15: 3, 12: 3, 14: 6, 17: 1, 10:...                 9   \n",
       "774   {14: 3, 13: 2, 16: 8, 17: 1, 11: 1, 24: 3, 12:...                 9   \n",
       "763   {12: 21, 11: 2, 10: 9, 18: 1, 13: 2, 16: 2, 17...                 9   \n",
       "905   {11: 11, 12: 8, 10: 5, 13: 5, 16: 2, 8: 3, 14:...                 8   \n",
       "832   {10: 4, 19: 2, 16: 1, 14: 5, 12: 7, 15: 1, 11:...                 8   \n",
       "363   {15: 18, 14: 4, 13: 4, 12: 2, 20: 1, 16: 5, 25...                 8   \n",
       "442   {14: 8, 11: 13, 15: 8, 12: 2, 16: 5, 19: 2, 17...                 8   \n",
       "463   {19: 1, 13: 7, 15: 19, 16: 3, 14: 14, 17: 1, 1...                 8   \n",
       "608   {12: 16, 15: 3, 14: 13, 13: 3, 17: 1, 10: 5, 1...                 8   \n",
       "\n",
       "      has_H3  loop_type_purity  loop_length_purity  \n",
       "343     True          0.862069            0.183908  \n",
       "23      True          0.595238            0.238095  \n",
       "1247    True          0.958333            0.229167  \n",
       "248     True          0.968750            0.364583  \n",
       "937     True          0.408163            0.306122  \n",
       "364     True          0.892857            0.321429  \n",
       "509     True          1.000000            0.531915  \n",
       "850     True          0.513514            0.243243  \n",
       "909     True          1.000000            0.256410  \n",
       "1013    True          0.807018            0.491228  \n",
       "895     True          0.921569            0.431373  \n",
       "600    False          1.000000            0.420000  \n",
       "774     True          0.962963            0.296296  \n",
       "763     True          0.658537            0.512195  \n",
       "905     True          0.842105            0.289474  \n",
       "832    False          1.000000            0.259259  \n",
       "363     True          1.000000            0.486486  \n",
       "442     True          0.926829            0.317073  \n",
       "463     True          0.914894            0.404255  \n",
       "608     True          0.851064            0.340426  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lengths_per_cluster = results_df.groupby('quantized_index').agg(cluster_size=('loop_length', 'count'), loop_type=('loop_type', Counter), loop_length=('loop_length', Counter), num_loop_lengths=('loop_length', 'nunique')).reset_index().sort_values('num_loop_lengths', ascending=False)\n",
    "lengths_per_cluster['has_H3'] = lengths_per_cluster['loop_type'].apply(lambda x: 'H3' in x)\n",
    "\n",
    "lengths_per_cluster['loop_type_purity'] = lengths_per_cluster['loop_type'].apply(lambda x: max(x.values()) / sum(x.values()))\n",
    "loop_type_purity = (lengths_per_cluster['loop_type_purity'] * lengths_per_cluster['cluster_size']).sum() / lengths_per_cluster['cluster_size'].sum()\n",
    "print(f\"Weighted loop type purity: {loop_type_purity:.4g}\")\n",
    "\n",
    "lengths_per_cluster['loop_length_purity'] = lengths_per_cluster['loop_length'].apply(lambda x: max(x.values()) / sum(x.values()))\n",
    "loop_length_purity = (lengths_per_cluster['loop_length_purity'] * lengths_per_cluster['cluster_size']).sum() / lengths_per_cluster['cluster_size'].sum()\n",
    "print(f\"Weighted loop length purity: {loop_length_purity:.4g}\")\n",
    "print(\"Number of clusters with multiple loop lengths:\", len(lengths_per_cluster[lengths_per_cluster['num_loop_lengths'] > 1]), \"out of\", len(lengths_per_cluster))\n",
    "\n",
    "clusters_of_diff_length_with_H3 = len(lengths_per_cluster[(lengths_per_cluster['num_loop_lengths'] > 1) & (lengths_per_cluster['has_H3'])])\n",
    "print(f\"Clusters with multiple loop lengths that have H3: {clusters_of_diff_length_with_H3} out of {len(lengths_per_cluster[lengths_per_cluster['has_H3']])}\")\n",
    "\n",
    "lengths_per_cluster.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "54f1fad5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ground_truth_indices = []\n",
    "# test_indicies = []\n",
    "# dataset_indices = []\n",
    "# for i, (gt, test) in enumerate(zip(ground_truth, all_quantized_indices)):\n",
    "#     if gt.endswith(\"-*\"):\n",
    "#         continue\n",
    "#     ground_truth_indices.append(clusters_to_indices[gt])\n",
    "#     test_indicies.append(test.item())\n",
    "#     dataset_indices.append(i)\n",
    "\n",
    "# nmi = normalized_mutual_info_score(ground_truth_indices, test_indicies)\n",
    "# print(f\"NMI: {nmi:.3f}\")\n",
    "# ari = adjusted_rand_score(ground_truth_indices, test_indicies)\n",
    "# print(f\"ARI: {ari:.3f}\")\n",
    "\n",
    "# all_angles = np.array([dataset[i]['angles'] for i in range(len(dataset))])\n",
    "# all_tokens = np.array([dataset[i]['sequence'] for i in range(len(dataset))])\n",
    "# all_loop_coords = np.array([dataset[i]['loop_c_alpha_coords'] for i in range(len(dataset))])\n",
    "# all_stem_coords = np.array([dataset[i]['stem_c_alpha_coords'] for i in range(len(dataset))])\n",
    "\n",
    "# special_tokens_mask = (\n",
    "#     (all_tokens == alphabet.cls_idx) | (all_tokens == alphabet.eos_idx) | (all_tokens == alphabet.padding_idx)\n",
    "# )\n",
    "\n",
    "# all_angles_with_canonical = all_angles[dataset_indices]\n",
    "\n",
    "# print(\"\\nFor Kelow Clusters\")\n",
    "# correct, angle1, angle2, angle3 = eval_clusters_length_independent(\n",
    "#     all_angles[dataset_indices], all_loop_coords[dataset_indices], all_stem_coords[dataset_indices],\n",
    "#     np.array(ground_truth_indices), ~special_tokens_mask[dataset_indices])\n",
    "# print(f\"Proportion of pairs in a cluster that are within 0.47 radians of each other: {correct:4f}\")\n",
    "# print(f\"Angle variance of the clusters: {angle1:4f}, {angle2:4f}, {angle3:4f}\")\n",
    "\n",
    "# print(\"\\nFor VQVAE Clusters\")\n",
    "# correct, angle1, angle2, angle3 = eval_clusters_length_independent(\n",
    "#     all_angles[dataset_indices], all_loop_coords[dataset_indices], all_stem_coords[dataset_indices],\n",
    "#     np.array(test_indicies), ~special_tokens_mask[dataset_indices])\n",
    "# print(f\"Proportion of pairs in a cluster that are within 0.47 radians of each other: {correct:4f}\")\n",
    "# print(f\"Angle variance of the clusters: {angle1:4f}, {angle2:4f}, {angle3:4f}\")\n",
    "\n",
    "# print(\"\\nFor VQVAE Clusters (including noise)\")\n",
    "# correct, angle1, angle2, angle3 = eval_clusters_length_independent(\n",
    "#     all_angles, all_loop_coords, all_stem_coords,\n",
    "#     all_quantized_indices.numpy(), ~special_tokens_mask)\n",
    "# print(f\"Proportion of pairs in a cluster that are within 0.47 radians of each other: {correct:4f}\")\n",
    "# print(f\"Angle variance of the clusters: {angle1:4f}, {angle2:4f}, {angle3:4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7417e54e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# this is quite intensive to run, can crash kernel\n",
    "# dataset_dihedral_distance = dihedral_distance_pairwise(all_angles, mask=~special_tokens_mask)\n",
    "# sil_score = silhouette_score(dataset_dihedral_distance, all_quantized_indices, metric='precomputed')\n",
    "# print(f\"Silhouette Score: {sil_score:.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fa6856de",
   "metadata": {},
   "outputs": [],
   "source": [
    "# proportion_within_threshold_kelow, circular_variance_kelow = eval_clusters(np.array([dataset[i]['angles'] for i in dataset_indices]), np.array(ground_truth_indices), return_mean=False)\n",
    "# proportion_within_threshold, circular_variance = eval_clusters(np.array([dataset[i]['angles'] for i in range(len(dataset))]), all_quantized_indices.numpy(), return_mean=False)\n",
    "# clusters, counts = np.unique(all_quantized_indices.numpy(), return_counts=True)\n",
    "# clusters_var_df = pd.DataFrame({\n",
    "#     'cluster': clusters,\n",
    "#     'cluster_size': counts,\n",
    "#     'proportion_within_threshold': proportion_within_threshold,\n",
    "#     'phi': circular_variance[:, 0],\n",
    "#     'psi': circular_variance[:, 1],\n",
    "#     'omega': circular_variance[:, 2],\n",
    "# })\n",
    "# clusters_var_df.sort_values(by='proportion_within_threshold', ascending=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d1f576d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(16, 4))\n",
    "# bins = np.linspace(0, 1, 50)\n",
    "# axes[0].hist(proportion_within_threshold_kelow, bins=bins, alpha=0.5, label='Kelow Clusters', density=True)\n",
    "# axes[0].hist(proportion_within_threshold, bins=bins, alpha=0.5, label='VQVAE Clusters', density=True)\n",
    "# axes[0].set_xlabel('Proportion of pairs within threshold')\n",
    "# axes[0].set_ylabel('Frequency')\n",
    "# axes[0].set_title('Distribution of Proportion of Pairs \\n Within Threshold across Clusters')\n",
    "# axes[0].legend()\n",
    "\n",
    "# angle_bins = np.linspace(0, 0.5, 50)\n",
    "# axes[1].hist(circular_variance_kelow[:, 0], bins=angle_bins, alpha=0.5, label='Kelow Clusters', density=True)\n",
    "# axes[1].hist(circular_variance[:, 0], bins=angle_bins, alpha=0.5, label='VQVAE Clusters', density=True)\n",
    "# axes[1].set_xlabel('Circular Variance (radians)')\n",
    "# axes[1].set_ylabel('Frequency')\n",
    "# axes[1].set_title('Distribution of Circular Variance \\n (phi) across Clusters')\n",
    "# axes[2].hist(circular_variance_kelow[:, 1], bins=angle_bins, alpha=0.5, label='Kelow Clusters', density=True)\n",
    "# axes[2].hist(circular_variance[:, 1], bins=angle_bins, alpha=0.5, label='VQVAE Clusters', density=True)\n",
    "# axes[2].set_xlabel('Circular Variance (radians)')\n",
    "# axes[2].set_ylabel('Frequency')\n",
    "# axes[2].set_title('Distribution of Circular Variance \\n (psi) across Clusters')\n",
    "# axes[3].hist(circular_variance_kelow[:, 2], bins=angle_bins, alpha=0.5, label='Kelow Clusters', density=True)\n",
    "# axes[3].hist(circular_variance[:, 2], bins=angle_bins, alpha=0.5, label='VQVAE Clusters', density=True)\n",
    "# axes[3].set_xlabel('Circular Variance (radians)')\n",
    "# axes[3].set_ylabel('Frequency')\n",
    "# axes[3].set_title('Distribution of Circular Variance \\n (omega) across Clusters')\n",
    "\n",
    "\n",
    "# _, cluster_counts = np.unique(np.array(ground_truth_indices, dtype=np.int64, copy=True), return_counts=True)\n",
    "# _, vqvae_counts = np.unique(np.array(all_quantized_indices, dtype=np.int64, copy=True), return_counts=True)\n",
    "# count_bins = np.linspace(0, max(max(cluster_counts), max(vqvae_counts)), 50)\n",
    "# axes[4].hist(cluster_counts, bins=count_bins, alpha=0.5, label='Kelow Clusters', density=True)\n",
    "# axes[4].hist(vqvae_counts, bins=count_bins, alpha=0.5, label='VQVAE Clusters', density=True)\n",
    "# axes[4].set_xlabel('Cluster Size')\n",
    "# axes[4].set_ylabel('Frequency')\n",
    "# axes[4].set_title('Distribution of Cluster Sizes')\n",
    "# axes[4].legend()\n",
    "\n",
    "# plt.tight_layout()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62a12be9",
   "metadata": {},
   "source": [
    "## How pure are the VQVAE clusters compared to canonical clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "b0997f80",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Numuber of unique canonical clusters: 180\n",
      "Num noise clusters: 107\n"
     ]
    }
   ],
   "source": [
    "print(\"Numuber of unique canonical clusters:\", results_df['canonical_cluster'].nunique())\n",
    "print(\"Num noise clusters:\", len(results_df[results_df['canonical_cluster'].str.endswith(\"-*\")]['canonical_cluster'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "dc32530d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Purity of VQVAE clusters wrt canonical clusters (D=0.61): 0.902\n",
      "Purity of VQVAE clusters wrt canonical clusters (D=0.47): 0.906\n",
      "Purity of VQVAE clusters wrt canonical clusters (D=0.1): 0.915\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loop_type</th>\n",
       "      <th>prop_correct (D=0.61)</th>\n",
       "      <th>prop_correct (D=0.47)</th>\n",
       "      <th>prop_correct (D=0.1)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>H1</td>\n",
       "      <td>0.885790</td>\n",
       "      <td>0.893791</td>\n",
       "      <td>0.914189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>H2</td>\n",
       "      <td>0.895813</td>\n",
       "      <td>0.899520</td>\n",
       "      <td>0.910251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>H3</td>\n",
       "      <td>0.782635</td>\n",
       "      <td>0.754447</td>\n",
       "      <td>0.528327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>H4</td>\n",
       "      <td>0.983750</td>\n",
       "      <td>0.983005</td>\n",
       "      <td>0.977472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>L1</td>\n",
       "      <td>0.870198</td>\n",
       "      <td>0.879889</td>\n",
       "      <td>0.888132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>L2</td>\n",
       "      <td>0.985178</td>\n",
       "      <td>0.974512</td>\n",
       "      <td>0.171196</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>L3</td>\n",
       "      <td>0.815433</td>\n",
       "      <td>0.831003</td>\n",
       "      <td>0.863029</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>L4</td>\n",
       "      <td>0.933672</td>\n",
       "      <td>0.929558</td>\n",
       "      <td>0.964983</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  loop_type  prop_correct (D=0.61)  prop_correct (D=0.47)  \\\n",
       "0        H1               0.885790               0.893791   \n",
       "1        H2               0.895813               0.899520   \n",
       "2        H3               0.782635               0.754447   \n",
       "3        H4               0.983750               0.983005   \n",
       "4        L1               0.870198               0.879889   \n",
       "5        L2               0.985178               0.974512   \n",
       "6        L3               0.815433               0.831003   \n",
       "7        L4               0.933672               0.929558   \n",
       "\n",
       "   prop_correct (D=0.1)  \n",
       "0              0.914189  \n",
       "1              0.910251  \n",
       "2              0.528327  \n",
       "3              0.977472  \n",
       "4              0.888132  \n",
       "5              0.171196  \n",
       "6              0.863029  \n",
       "7              0.964983  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_results = None\n",
    "for dihedral_cutoff, canonical_cluster_key in [(0.61, 'canonical_cluster_ssc_comparison'), (0.47, 'canonical_cluster'), (0.1, 'canonical_cluster_strict')]:\n",
    "    vqvae_to_canonical = (\n",
    "    results_df\n",
    "        .groupby('quantized_index')\n",
    "        .agg(\n",
    "            cluster_size = ('quantized_index', 'size'),\n",
    "            canonical_cluster_nunique = (canonical_cluster_key, 'nunique'),\n",
    "            canonical_cluster_set = (canonical_cluster_key, Counter),\n",
    "        ).reset_index().sort_values('canonical_cluster_nunique', ascending=False)\n",
    "    )\n",
    "    vqvae_to_canonical['most_common_canonical_cluster'] = vqvae_to_canonical['canonical_cluster_set'].apply(lambda x: x.most_common(1)[0][0] if x else None)\n",
    "    vqvae_to_canonical['canonical_cluster_set'] = vqvae_to_canonical['canonical_cluster_set'].apply(lambda x: sorted(x.items(), key=lambda y: y[1], reverse=True))\n",
    "    vqvae_to_canonical_map = vqvae_to_canonical.set_index('quantized_index')['most_common_canonical_cluster'].to_dict()\n",
    "    \n",
    "    results_df[f'{canonical_cluster_key}_vqvae'] = results_df['quantized_index'].map(vqvae_to_canonical_map)\n",
    "    mask = ~results_df[canonical_cluster_key].str.endswith(\"-*\")\n",
    "    purity = (results_df[canonical_cluster_key][mask] == results_df[f'{canonical_cluster_key}_vqvae'][mask]).sum() / len(results_df[mask])\n",
    "    print(f\"Purity of VQVAE clusters wrt canonical clusters (D={dihedral_cutoff}): {purity:.3f}\")\n",
    "\n",
    "    results_df['correct_assignment'] = results_df[canonical_cluster_key] == results_df[f'{canonical_cluster_key}_vqvae']\n",
    "    agg_results_ = results_df[~results_df[canonical_cluster_key].str.endswith(\"-*\")].groupby(['loop_type']).agg(prop_correct=('correct_assignment', 'mean')).reset_index()\n",
    "    agg_results_.rename(columns={'prop_correct': f'prop_correct (D={dihedral_cutoff})'}, inplace=True)\n",
    "    if agg_results is None:\n",
    "        agg_results = agg_results_\n",
    "    else:\n",
    "        agg_results = agg_results.merge(agg_results_, on='loop_type')\n",
    "agg_results"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0d77e80",
   "metadata": {},
   "source": [
    "### How pure are the clusters if we use dihedral only"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "04816e77",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Purity of VQVAE clusters wrt canonical clusters (D=0.61): 0.893\n",
      "Purity of VQVAE clusters wrt canonical clusters (D=0.47): 0.902\n",
      "Purity of VQVAE clusters wrt canonical clusters (D=0.1): 0.912\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loop_type</th>\n",
       "      <th>prop_correct (D=0.61)</th>\n",
       "      <th>prop_correct (D=0.47)</th>\n",
       "      <th>prop_correct (D=0.1)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>H1</td>\n",
       "      <td>0.878597</td>\n",
       "      <td>0.897771</td>\n",
       "      <td>0.902954</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>H2</td>\n",
       "      <td>0.904126</td>\n",
       "      <td>0.913730</td>\n",
       "      <td>0.928264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>H3</td>\n",
       "      <td>0.749811</td>\n",
       "      <td>0.724648</td>\n",
       "      <td>0.652174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>H4</td>\n",
       "      <td>0.983419</td>\n",
       "      <td>0.978991</td>\n",
       "      <td>0.971120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>L1</td>\n",
       "      <td>0.860208</td>\n",
       "      <td>0.867043</td>\n",
       "      <td>0.891483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>L2</td>\n",
       "      <td>0.977893</td>\n",
       "      <td>0.976465</td>\n",
       "      <td>0.214674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>L3</td>\n",
       "      <td>0.791215</td>\n",
       "      <td>0.812187</td>\n",
       "      <td>0.845764</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>L4</td>\n",
       "      <td>0.919857</td>\n",
       "      <td>0.914097</td>\n",
       "      <td>0.944858</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  loop_type  prop_correct (D=0.61)  prop_correct (D=0.47)  \\\n",
       "0        H1               0.878597               0.897771   \n",
       "1        H2               0.904126               0.913730   \n",
       "2        H3               0.749811               0.724648   \n",
       "3        H4               0.983419               0.978991   \n",
       "4        L1               0.860208               0.867043   \n",
       "5        L2               0.977893               0.976465   \n",
       "6        L3               0.791215               0.812187   \n",
       "7        L4               0.919857               0.914097   \n",
       "\n",
       "   prop_correct (D=0.1)  \n",
       "0              0.902954  \n",
       "1              0.928264  \n",
       "2              0.652174  \n",
       "3              0.971120  \n",
       "4              0.891483  \n",
       "5              0.214674  \n",
       "6              0.845764  \n",
       "7              0.944858  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_results = None\n",
    "for dihedral_cutoff, canonical_cluster_key in [(0.61, 'canonical_cluster_ssc_comparison'), (0.47, 'canonical_cluster'), (0.1, 'canonical_cluster_strict')]:\n",
    "    vqvae_to_canonical = (\n",
    "    results_df\n",
    "        .groupby('quantized_index_dihedral_only')\n",
    "        .agg(\n",
    "            cluster_size = ('quantized_index_dihedral_only', 'size'),\n",
    "            canonical_cluster_nunique = (canonical_cluster_key, 'nunique'),\n",
    "            canonical_cluster_set = (canonical_cluster_key, Counter),\n",
    "        ).reset_index().sort_values('canonical_cluster_nunique', ascending=False)\n",
    "    )\n",
    "    vqvae_to_canonical['most_common_canonical_cluster'] = vqvae_to_canonical['canonical_cluster_set'].apply(lambda x: x.most_common(1)[0][0] if x else None)\n",
    "    vqvae_to_canonical['canonical_cluster_set'] = vqvae_to_canonical['canonical_cluster_set'].apply(lambda x: sorted(x.items(), key=lambda y: y[1], reverse=True))\n",
    "    vqvae_to_canonical_map = vqvae_to_canonical.set_index('quantized_index_dihedral_only')['most_common_canonical_cluster'].to_dict()\n",
    "    \n",
    "    results_df[f'{canonical_cluster_key}_vqvae'] = results_df['quantized_index_dihedral_only'].map(vqvae_to_canonical_map)\n",
    "    mask = ~results_df[canonical_cluster_key].str.endswith(\"-*\")\n",
    "    purity = (results_df[canonical_cluster_key][mask] == results_df[f'{canonical_cluster_key}_vqvae'][mask]).sum() / len(results_df[mask])\n",
    "    print(f\"Purity of VQVAE clusters wrt canonical clusters (D={dihedral_cutoff}): {purity:.3f}\")\n",
    "\n",
    "    results_df['correct_assignment'] = results_df[canonical_cluster_key] == results_df[f'{canonical_cluster_key}_vqvae']\n",
    "    agg_results_ = results_df[~results_df[canonical_cluster_key].str.endswith(\"-*\")].groupby(['loop_type']).agg(prop_correct=('correct_assignment', 'mean')).reset_index()\n",
    "    agg_results_.rename(columns={'prop_correct': f'prop_correct (D={dihedral_cutoff})'}, inplace=True)\n",
    "    if agg_results is None:\n",
    "        agg_results = agg_results_\n",
    "    else:\n",
    "        agg_results = agg_results.merge(agg_results_, on='loop_type')\n",
    "agg_results"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bea31137",
   "metadata": {},
   "source": [
    "### How pure are the clusters if we use sequence only"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "56fd1dae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Purity of VQVAE clusters wrt canonical clusters (D=0.61): 0.873\n",
      "Purity of VQVAE clusters wrt canonical clusters (D=0.47): 0.882\n",
      "Purity of VQVAE clusters wrt canonical clusters (D=0.1): 0.886\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loop_type</th>\n",
       "      <th>prop_correct (D=0.61)</th>\n",
       "      <th>prop_correct (D=0.47)</th>\n",
       "      <th>prop_correct (D=0.1)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>H1</td>\n",
       "      <td>0.867113</td>\n",
       "      <td>0.879528</td>\n",
       "      <td>0.861211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>H2</td>\n",
       "      <td>0.866810</td>\n",
       "      <td>0.875458</td>\n",
       "      <td>0.898113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>H3</td>\n",
       "      <td>0.645742</td>\n",
       "      <td>0.536614</td>\n",
       "      <td>0.052701</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>H4</td>\n",
       "      <td>0.995442</td>\n",
       "      <td>0.995852</td>\n",
       "      <td>0.985966</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>L1</td>\n",
       "      <td>0.826363</td>\n",
       "      <td>0.840861</td>\n",
       "      <td>0.852897</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>L2</td>\n",
       "      <td>0.984257</td>\n",
       "      <td>0.991349</td>\n",
       "      <td>0.103261</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>L3</td>\n",
       "      <td>0.754966</td>\n",
       "      <td>0.770743</td>\n",
       "      <td>0.821593</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>L4</td>\n",
       "      <td>0.927597</td>\n",
       "      <td>0.928342</td>\n",
       "      <td>0.939525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  loop_type  prop_correct (D=0.61)  prop_correct (D=0.47)  \\\n",
       "0        H1               0.867113               0.879528   \n",
       "1        H2               0.866810               0.875458   \n",
       "2        H3               0.645742               0.536614   \n",
       "3        H4               0.995442               0.995852   \n",
       "4        L1               0.826363               0.840861   \n",
       "5        L2               0.984257               0.991349   \n",
       "6        L3               0.754966               0.770743   \n",
       "7        L4               0.927597               0.928342   \n",
       "\n",
       "   prop_correct (D=0.1)  \n",
       "0              0.861211  \n",
       "1              0.898113  \n",
       "2              0.052701  \n",
       "3              0.985966  \n",
       "4              0.852897  \n",
       "5              0.103261  \n",
       "6              0.821593  \n",
       "7              0.939525  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_results = None\n",
    "for dihedral_cutoff, canonical_cluster_key in [(0.61, 'canonical_cluster_ssc_comparison'), (0.47, 'canonical_cluster'), (0.1, 'canonical_cluster_strict')]:\n",
    "    vqvae_to_canonical = (\n",
    "    results_df\n",
    "        .groupby('quantized_index_sequence_only')\n",
    "        .agg(\n",
    "            cluster_size = ('quantized_index_sequence_only', 'size'),\n",
    "            canonical_cluster_nunique = (canonical_cluster_key, 'nunique'),\n",
    "            canonical_cluster_set = (canonical_cluster_key, Counter),\n",
    "        ).reset_index().sort_values('canonical_cluster_nunique', ascending=False)\n",
    "    )\n",
    "    vqvae_to_canonical['most_common_canonical_cluster'] = vqvae_to_canonical['canonical_cluster_set'].apply(lambda x: x.most_common(1)[0][0] if x else None)\n",
    "    vqvae_to_canonical['canonical_cluster_set'] = vqvae_to_canonical['canonical_cluster_set'].apply(lambda x: sorted(x.items(), key=lambda y: y[1], reverse=True))\n",
    "    vqvae_to_canonical_map = vqvae_to_canonical.set_index('quantized_index_sequence_only')['most_common_canonical_cluster'].to_dict()\n",
    "    \n",
    "    results_df[f'{canonical_cluster_key}_vqvae'] = results_df['quantized_index_sequence_only'].map(vqvae_to_canonical_map)\n",
    "    mask = ~results_df[canonical_cluster_key].str.endswith(\"-*\")\n",
    "    purity = (results_df[canonical_cluster_key][mask] == results_df[f'{canonical_cluster_key}_vqvae'][mask]).sum() / len(results_df[mask])\n",
    "    print(f\"Purity of VQVAE clusters wrt canonical clusters (D={dihedral_cutoff}): {purity:.3f}\")\n",
    "\n",
    "    results_df['correct_assignment'] = results_df[canonical_cluster_key] == results_df[f'{canonical_cluster_key}_vqvae']\n",
    "    agg_results_ = results_df[~results_df[canonical_cluster_key].str.endswith(\"-*\")].groupby(['loop_type']).agg(prop_correct=('correct_assignment', 'mean')).reset_index()\n",
    "    agg_results_.rename(columns={'prop_correct': f'prop_correct (D={dihedral_cutoff})'}, inplace=True)\n",
    "    if agg_results is None:\n",
    "        agg_results = agg_results_\n",
    "    else:\n",
    "        agg_results = agg_results.merge(agg_results_, on='loop_type')\n",
    "agg_results"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
