{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from rdkit import Chem, DataStructs\n",
    "from rdkit.Chem import AllChem\n",
    "from rdkit.ML.Cluster import Butina"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../../code')\n",
    "\n",
    "from utils import binarize_log_data, remove_ambiguous_row, clean_continuous, calculate_circles_quick\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw = pd.read_csv('../../data/raw/ADME_public_set_3521.csv')\n",
    "raw = raw.dropna(subset=['LOG SOLUBILITY PH 6.8 (ug/mL)'])\n",
    "solubility = raw['LOG SOLUBILITY PH 6.8 (ug/mL)'].to_list()\n",
    "smiles = raw['SMILES'].to_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 13.,  33.,  33.,  53.,  26.,  38.,  37.,  21.,  46.,  48.,  52.,\n",
       "         39.,  66.,  74., 150., 362., 601., 408.,  67.,   6.]),\n",
       " array([-1.        , -0.84103678, -0.68207355, -0.52311033, -0.36414711,\n",
       "        -0.20518388, -0.04622066,  0.11274256,  0.27170579,  0.43066901,\n",
       "         0.58963223,  0.74859546,  0.90755868,  1.0665219 ,  1.22548512,\n",
       "         1.38444835,  1.54341157,  1.70237479,  1.86133802,  2.02030124,\n",
       "         2.17926446]),\n",
       " <BarContainer object of 20 artists>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlXklEQVR4nO3df3CU9YHH8U9CyIZfu2mQ7CZngFgtEAVR0LDoqZWUiKkjZ2wLl6Opw8EdFzwhLUpmECv2DKWeeHTQtHct8e7kbJkpesIBjaGGqywRI0xjwBxYNFjcjS3NLmBJIPneHx2edoUoG7LZb8L7NfPMmOf5Ppvv87BN3n322U2SMcYIAADAIsmJngAAAMAnESgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArJOS6An0RFdXl44dO6YRI0YoKSkp0dMBAAAXwRijEydOKDs7W8nJn36NpF8GyrFjx5STk5PoaQAAgB44evSorrzyyk8d0y8DZcSIEZL+eIButzvBswEAABcjEokoJyfH+T3+afploJx7WcftdhMoAAD0MxdzewY3yQIAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALBOzIHym9/8Rn/zN3+jkSNHasiQIZo4caLefPNNZ7sxRitXrlRWVpaGDBmigoICHTp0KOoxjh8/rpKSErndbqWnp2v+/Pk6efLkpR8NAAAYEGIKlN///ve65ZZbNHjwYG3btk0HDhzQP//zP+tzn/ucM2bNmjVat26dqqqqVF9fr2HDhqmwsFCnT592xpSUlKipqUk1NTXasmWLdu3apYULF/beUQEAgH4tyRhjLnbw8uXL9frrr+t///d/L7jdGKPs7Gx985vf1Le+9S1JUjgcltfrVXV1tebMmaODBw8qLy9Pe/fu1dSpUyVJ27dv1913360PPvhA2dnZnzmPSCQij8ejcDjMXzMGAKCfiOX3d0osD/zf//3fKiws1Fe+8hXV1dXpL/7iL/QP//APWrBggSTpyJEjCgaDKigocPbxeDzKz89XIBDQnDlzFAgElJ6e7sSJJBUUFCg5OVn19fX6q7/6q/O+b3t7u9rb26MOEABw+Rm7fGvcHvu91UVxe2zELqaXeH7961/rueee0zXXXKMdO3Zo0aJF+sd//Ec9//zzkqRgMChJ8nq9Uft5vV5nWzAYVGZmZtT2lJQUZWRkOGM+qbKyUh6Px1lycnJimTYAAOhnYgqUrq4u3XjjjXryySd1ww03aOHChVqwYIGqqqriNT9JUkVFhcLhsLMcPXo0rt8PAAAkVkyBkpWVpby8vKh1EyZMUEtLiyTJ5/NJkkKhUNSYUCjkbPP5fGptbY3afvbsWR0/ftwZ80kul0tutztqAQAAA1dMgXLLLbeoubk5at3//d//acyYMZKk3Nxc+Xw+1dbWOtsjkYjq6+vl9/slSX6/X21tbWpoaHDG7Ny5U11dXcrPz+/xgQAAgIEjpptkly5dqunTp+vJJ5/UV7/6Vb3xxhv64Q9/qB/+8IeSpKSkJC1ZskTf+c53dM011yg3N1ePPvqosrOzNXv2bEl/vOJy1113OS8NnTlzRosXL9acOXMu6h08AABg4IspUG666SZt3rxZFRUVWrVqlXJzc/XMM8+opKTEGfPwww/r1KlTWrhwodra2nTrrbdq+/btSktLc8a88MILWrx4sWbMmKHk5GQVFxdr3bp1vXdUAACgX4vpc1BsweegAMDlibcZ92+x/P7mb/EAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwTkyB8u1vf1tJSUlRy/jx453tp0+fVllZmUaOHKnhw4eruLhYoVAo6jFaWlpUVFSkoUOHKjMzU8uWLdPZs2d752gAAMCAkBLrDtdee61effXVPz1Ayp8eYunSpdq6das2bdokj8ejxYsX67777tPrr78uSers7FRRUZF8Pp92796tDz/8UF//+tc1ePBgPfnkk71wOAAAYCCIOVBSUlLk8/nOWx8Oh/WjH/1IGzdu1J133ilJ2rBhgyZMmKA9e/Zo2rRp+vnPf64DBw7o1Vdfldfr1eTJk/XEE0/okUce0be//W2lpqZe+hEBAIB+L+Z7UA4dOqTs7GxdddVVKikpUUtLiySpoaFBZ86cUUFBgTN2/PjxGj16tAKBgCQpEAho4sSJ8nq9zpjCwkJFIhE1NTV1+z3b29sViUSiFgAAMHDFFCj5+fmqrq7W9u3b9dxzz+nIkSP6y7/8S504cULBYFCpqalKT0+P2sfr9SoYDEqSgsFgVJyc235uW3cqKyvl8XicJScnJ5ZpAwCAfiaml3hmzZrl/PekSZOUn5+vMWPG6Kc//amGDBnS65M7p6KiQuXl5c7XkUiESAEAYAC7pLcZp6en6wtf+IIOHz4sn8+njo4OtbW1RY0JhULOPSs+n++8d/Wc+/pC97Wc43K55Ha7oxYAADBwXVKgnDx5Uu+++66ysrI0ZcoUDR48WLW1tc725uZmtbS0yO/3S5L8fr8aGxvV2trqjKmpqZHb7VZeXt6lTAUAAAwgMb3E861vfUv33HOPxowZo2PHjumxxx7ToEGDNHfuXHk8Hs2fP1/l5eXKyMiQ2+3Wgw8+KL/fr2nTpkmSZs6cqby8PM2bN09r1qxRMBjUihUrVFZWJpfLFZcDBAAA/U9MgfLBBx9o7ty5+t3vfqdRo0bp1ltv1Z49ezRq1ChJ0tq1a5WcnKzi4mK1t7ersLBQzz77rLP/oEGDtGXLFi1atEh+v1/Dhg1TaWmpVq1a1btHBQAA+rUkY4xJ9CRiFYlE5PF4FA6HuR8FAC4jY5dvjdtjv7e6KG6PjT+K5fc3f4sHAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdVISPQEAAGwwdvnWuDzue6uL4vK4A90lXUFZvXq1kpKStGTJEmfd6dOnVVZWppEjR2r48OEqLi5WKBSK2q+lpUVFRUUaOnSoMjMztWzZMp09e/ZSpgIAAAaQHgfK3r179YMf/ECTJk2KWr906VK98sor2rRpk+rq6nTs2DHdd999zvbOzk4VFRWpo6NDu3fv1vPPP6/q6mqtXLmy50cBAAAGlB4FysmTJ1VSUqJ//dd/1ec+9zlnfTgc1o9+9CM9/fTTuvPOOzVlyhRt2LBBu3fv1p49eyRJP//5z3XgwAH953/+pyZPnqxZs2bpiSee0Pr169XR0dE7RwUAAPq1HgVKWVmZioqKVFBQELW+oaFBZ86ciVo/fvx4jR49WoFAQJIUCAQ0ceJEeb1eZ0xhYaEikYiampou+P3a29sViUSiFgAAMHDFfJPsiy++qLfeekt79+49b1swGFRqaqrS09Oj1nu9XgWDQWfMn8fJue3ntl1IZWWlHn/88VinCgAA+qmYrqAcPXpUDz30kF544QWlpaXFa07nqaioUDgcdpajR4/22fcGAAB9L6ZAaWhoUGtrq2688UalpKQoJSVFdXV1WrdunVJSUuT1etXR0aG2trao/UKhkHw+nyTJ5/Od966ec1+fG/NJLpdLbrc7agEAAANXTIEyY8YMNTY2av/+/c4ydepUlZSUOP89ePBg1dbWOvs0NzerpaVFfr9fkuT3+9XY2KjW1lZnTE1Njdxut/Ly8nrpsAAAQH8W0z0oI0aM0HXXXRe1btiwYRo5cqSzfv78+SovL1dGRobcbrcefPBB+f1+TZs2TZI0c+ZM5eXlad68eVqzZo2CwaBWrFihsrIyuVyuXjosAADQn/X6J8muXbtWycnJKi4uVnt7uwoLC/Xss8862wcNGqQtW7Zo0aJF8vv9GjZsmEpLS7Vq1arengoAAOinkowxJtGTiFUkEpHH41E4HOZ+FAC4jMTr4+jjiY+6/5NYfn/zxwIBAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFgnJdETAAAMPGOXb030FNDPcQUFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHViCpTnnntOkyZNktvtltvtlt/v17Zt25ztp0+fVllZmUaOHKnhw4eruLhYoVAo6jFaWlpUVFSkoUOHKjMzU8uWLdPZs2d752gAAMCAEFOgXHnllVq9erUaGhr05ptv6s4779S9996rpqYmSdLSpUv1yiuvaNOmTaqrq9OxY8d03333Oft3dnaqqKhIHR0d2r17t55//nlVV1dr5cqVvXtUAACgX0syxphLeYCMjAx973vf0/33369Ro0Zp48aNuv/++yVJ77zzjiZMmKBAIKBp06Zp27Zt+vKXv6xjx47J6/VKkqqqqvTII4/oo48+Umpq6kV9z0gkIo/Ho3A4LLfbfSnTBwDEwdjlWxM9BWu8t7oo0VOwRiy/v3t8D0pnZ6defPFFnTp1Sn6/Xw0NDTpz5owKCgqcMePHj9fo0aMVCAQkSYFAQBMnTnTiRJIKCwsViUScqzAAAAApse7Q2Ngov9+v06dPa/jw4dq8ebPy8vK0f/9+paamKj09PWq81+tVMBiUJAWDwag4Obf93LbutLe3q7293fk6EonEOm0AANCPxHwFZdy4cdq/f7/q6+u1aNEilZaW6sCBA/GYm6OyslIej8dZcnJy4vr9AABAYsUcKKmpqbr66qs1ZcoUVVZW6vrrr9e//Mu/yOfzqaOjQ21tbVHjQ6GQfD6fJMnn8533rp5zX58bcyEVFRUKh8POcvTo0VinDQAA+pFL/hyUrq4utbe3a8qUKRo8eLBqa2udbc3NzWppaZHf75ck+f1+NTY2qrW11RlTU1Mjt9utvLy8br+Hy+Vy3tp8bgEAAANXTPegVFRUaNasWRo9erROnDihjRs36rXXXtOOHTvk8Xg0f/58lZeXKyMjQ263Ww8++KD8fr+mTZsmSZo5c6by8vI0b948rVmzRsFgUCtWrFBZWZlcLldcDhAAAPQ/MQVKa2urvv71r+vDDz+Ux+PRpEmTtGPHDn3pS1+SJK1du1bJyckqLi5We3u7CgsL9eyzzzr7Dxo0SFu2bNGiRYvk9/s1bNgwlZaWatWqVb17VAAAoF+75M9BSQQ+BwUA7MbnoPwJn4PyJ33yOSgAAADxQqAAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwTkyBUllZqZtuukkjRoxQZmamZs+erebm5qgxp0+fVllZmUaOHKnhw4eruLhYoVAoakxLS4uKioo0dOhQZWZmatmyZTp79uylHw0AABgQYgqUuro6lZWVac+ePaqpqdGZM2c0c+ZMnTp1yhmzdOlSvfLKK9q0aZPq6up07Ngx3Xfffc72zs5OFRUVqaOjQ7t379bzzz+v6upqrVy5sveOCgAA9GtJxhjT050/+ugjZWZmqq6uTrfddpvC4bBGjRqljRs36v7775ckvfPOO5owYYICgYCmTZumbdu26ctf/rKOHTsmr9crSaqqqtIjjzyijz76SKmpqZ/5fSORiDwej8LhsNxud0+nDwCIk7HLtyZ6CtZ4b3VRoqdgjVh+f1/SPSjhcFiSlJGRIUlqaGjQmTNnVFBQ4IwZP368Ro8erUAgIEkKBAKaOHGiEyeSVFhYqEgkoqampgt+n/b2dkUikagFAAAMXD0OlK6uLi1ZskS33HKLrrvuOklSMBhUamqq0tPTo8Z6vV4Fg0FnzJ/Hybnt57ZdSGVlpTwej7Pk5OT0dNoAAKAf6HGglJWV6e2339aLL77Ym/O5oIqKCoXDYWc5evRo3L8nAABInJSe7LR48WJt2bJFu3bt0pVXXums9/l86ujoUFtbW9RVlFAoJJ/P54x54403oh7v3Lt8zo35JJfLJZfL1ZOpAgCAfiimKyjGGC1evFibN2/Wzp07lZubG7V9ypQpGjx4sGpra511zc3Namlpkd/vlyT5/X41NjaqtbXVGVNTUyO32628vLxLORYAADBAxHQFpaysTBs3btTLL7+sESNGOPeMeDweDRkyRB6PR/Pnz1d5ebkyMjLkdrv14IMPyu/3a9q0aZKkmTNnKi8vT/PmzdOaNWsUDAa1YsUKlZWVcZUEAABIijFQnnvuOUnSHXfcEbV+w4YN+sY3viFJWrt2rZKTk1VcXKz29nYVFhbq2WefdcYOGjRIW7Zs0aJFi+T3+zVs2DCVlpZq1apVl3YkAABgwLikz0FJFD4HBQDsxueg/Amfg/InffY5KAAAAPFAoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrpCR6AgCAxBi7fGuipwB0iysoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOjEHyq5du3TPPfcoOztbSUlJeumll6K2G2O0cuVKZWVlaciQISooKNChQ4eixhw/flwlJSVyu91KT0/X/PnzdfLkyUs6EAAAMHDEHCinTp3S9ddfr/Xr119w+5o1a7Ru3TpVVVWpvr5ew4YNU2FhoU6fPu2MKSkpUVNTk2pqarRlyxbt2rVLCxcu7PlRAACAASUl1h1mzZqlWbNmXXCbMUbPPPOMVqxYoXvvvVeS9O///u/yer166aWXNGfOHB08eFDbt2/X3r17NXXqVEnS97//fd1999166qmnlJ2dfQmHAwAABoJevQflyJEjCgaDKigocNZ5PB7l5+crEAhIkgKBgNLT0504kaSCggIlJyervr7+go/b3t6uSCQStQAAgIGrVwMlGAxKkrxeb9R6r9frbAsGg8rMzIzanpKSooyMDGfMJ1VWVsrj8ThLTk5Ob04bAABYpl+8i6eiokLhcNhZjh49mugpAQCAOOrVQPH5fJKkUCgUtT4UCjnbfD6fWltbo7afPXtWx48fd8Z8ksvlktvtjloAAMDA1auBkpubK5/Pp9raWmddJBJRfX29/H6/JMnv96utrU0NDQ3OmJ07d6qrq0v5+fm9OR0AANBPxfwunpMnT+rw4cPO10eOHNH+/fuVkZGh0aNHa8mSJfrOd76ja665Rrm5uXr00UeVnZ2t2bNnS5ImTJigu+66SwsWLFBVVZXOnDmjxYsXa86cObyDBwAASOpBoLz55pv64he/6HxdXl4uSSotLVV1dbUefvhhnTp1SgsXLlRbW5tuvfVWbd++XWlpac4+L7zwghYvXqwZM2YoOTlZxcXFWrduXS8cDgAAGAiSjDEm0ZOIVSQSkcfjUTgc5n4UAOihscu3JnoKl4X3VhclegrWiOX3d794Fw8AALi8ECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArBPzJ8kCAPoWH6iGyxFXUAAAgHUIFAAAYB0CBQAAWId7UAAAiKN43kM0kP8QIYECAL2AG1mB3sVLPAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDm8zxqfi/fsAgEQgUABYhzAGwEs8AADAOlxBAdBjfHoqgHghUIABjogA0B/xEg8AALAOgQIAAKxDoAAAAOtwDwqAywr35AD9A4GCAYfP0ACA/o9AAWIQr/ghfAAgGvegAAAA63AFpQ/x2jcAABeHKygAAMA6BAoAALAOgQIAAKzDPShIGO7JAQB0hysoAADAOgQKAACwDi/xABbg5S4AiMYVFAAAYB0CBQAAWIdAAQAA1iFQAACAdbhJ9gK4YREAgMTiCgoAALAOgQIAAKyT0EBZv369xo4dq7S0NOXn5+uNN95I5HQAAIAlEnYPyk9+8hOVl5erqqpK+fn5euaZZ1RYWKjm5mZlZmYmaloAAPQb8bxn8r3VRXF77IuRsCsoTz/9tBYsWKAHHnhAeXl5qqqq0tChQ/XjH/84UVMCAACWSMgVlI6ODjU0NKiiosJZl5ycrIKCAgUCgfPGt7e3q7293fk6HA5LkiKRSFzm19X+cVweFwCA/iIev2PPPaYx5jPHJiRQfvvb36qzs1Nerzdqvdfr1TvvvHPe+MrKSj3++OPnrc/JyYnbHAEAuJx5nonfY584cUIej+dTx/SLz0GpqKhQeXm583VXV5eOHz+ukSNHKikpqVe/VyQSUU5Ojo4ePSq3292rj93fcW4ujPPSPc5N9zg33ePcdK+/nxtjjE6cOKHs7OzPHJuQQLniiis0aNAghUKhqPWhUEg+n++88S6XSy6XK2pdenp6PKcot9vdL//x+wLn5sI4L93j3HSPc9M9zk33+vO5+awrJ+ck5CbZ1NRUTZkyRbW1tc66rq4u1dbWyu/3J2JKAADAIgl7iae8vFylpaWaOnWqbr75Zj3zzDM6deqUHnjggURNCQAAWCJhgfK1r31NH330kVauXKlgMKjJkydr+/bt590429dcLpcee+yx815SAuemO5yX7nFuuse56R7npnuX07lJMhfzXh8AAIA+xN/iAQAA1iFQAACAdQgUAABgHQIFAABY57IPlH/6p3/S9OnTNXTo0Iv+8DdjjFauXKmsrCwNGTJEBQUFOnToUHwnmgDHjx9XSUmJ3G630tPTNX/+fJ08efJT97njjjuUlJQUtfz93/99H804ftavX6+xY8cqLS1N+fn5euONNz51/KZNmzR+/HilpaVp4sSJ+p//+Z8+mmnfi+XcVFdXn/f8SEtL68PZ9p1du3bpnnvuUXZ2tpKSkvTSSy995j6vvfaabrzxRrlcLl199dWqrq6O+zwTIdZz89prr533vElKSlIwGOybCfeRyspK3XTTTRoxYoQyMzM1e/ZsNTc3f+Z+A/XnzWUfKB0dHfrKV76iRYsWXfQ+a9as0bp161RVVaX6+noNGzZMhYWFOn36dBxn2vdKSkrU1NSkmpoabdmyRbt27dLChQs/c78FCxboww8/dJY1a9b0wWzj5yc/+YnKy8v12GOP6a233tL111+vwsJCtba2XnD87t27NXfuXM2fP1/79u3T7NmzNXv2bL399tt9PPP4i/XcSH/8BMw/f368//77fTjjvnPq1Cldf/31Wr9+/UWNP3LkiIqKivTFL35R+/fv15IlS/S3f/u32rFjR5xn2vdiPTfnNDc3Rz13MjMz4zTDxKirq1NZWZn27NmjmpoanTlzRjNnztSpU6e63WdA/7wxMMYYs2HDBuPxeD5zXFdXl/H5fOZ73/ues66trc24XC7zX//1X3GcYd86cOCAkWT27t3rrNu2bZtJSkoyv/nNb7rd7/bbbzcPPfRQH8yw79x8882mrKzM+bqzs9NkZ2ebysrKC47/6le/aoqKiqLW5efnm7/7u7+L6zwTIdZzc7H/OxtoJJnNmzd/6piHH37YXHvttVHrvva1r5nCwsI4zizxLubc/OIXvzCSzO9///s+mZMtWltbjSRTV1fX7ZiB/PPmsr+CEqsjR44oGAyqoKDAWefxeJSfn69AIJDAmfWuQCCg9PR0TZ061VlXUFCg5ORk1dfXf+q+L7zwgq644gpdd911qqio0Mcffxzv6cZNR0eHGhoaov69k5OTVVBQ0O2/dyAQiBovSYWFhQPq+SH17NxI0smTJzVmzBjl5OTo3nvvVVNTU19M13qXy/PmUkyePFlZWVn60pe+pNdffz3R04m7cDgsScrIyOh2zEB+3vSLv2Zsk3OveX7yE2+9Xu+Aej00GAyed/k0JSVFGRkZn3qcf/3Xf60xY8YoOztbv/rVr/TII4+oublZP/vZz+I95bj47W9/q87Ozgv+e7/zzjsX3CcYDA7454fUs3Mzbtw4/fjHP9akSZMUDof11FNPafr06WpqatKVV17ZF9O2VnfPm0gkoj/84Q8aMmRIgmaWeFlZWaqqqtLUqVPV3t6uf/u3f9Mdd9yh+vp63XjjjYmeXlx0dXVpyZIluuWWW3Tdddd1O24g/7wZkIGyfPlyffe73/3UMQcPHtT48eP7aEb2uNhz01N/fo/KxIkTlZWVpRkzZujdd9/V5z//+R4/LgYGv98f9QdBp0+frgkTJugHP/iBnnjiiQTODDYbN26cxo0b53w9ffp0vfvuu1q7dq3+4z/+I4Ezi5+ysjK9/fbb+uUvf5noqSTMgAyUb37zm/rGN77xqWOuuuqqHj22z+eTJIVCIWVlZTnrQ6GQJk+e3KPH7EsXe258Pt95NzqePXtWx48fd87BxcjPz5ckHT58uF8GyhVXXKFBgwYpFApFrQ+FQt2eB5/PF9P4/qon5+aTBg8erBtuuEGHDx+OxxT7le6eN263+7K+etKdm2++ecD+8l68eLHzxoTPurI4kH/eDMh7UEaNGqXx48d/6pKamtqjx87NzZXP51Ntba2zLhKJqL6+Pur/GdrqYs+N3+9XW1ubGhoanH137typrq4uJzouxv79+yUpKub6k9TUVE2ZMiXq37urq0u1tbXd/nv7/f6o8ZJUU1PTL54fsejJufmkzs5ONTY29tvnR2+6XJ43vWX//v0D7nljjNHixYu1efNm7dy5U7m5uZ+5z4B+3iT6Lt1Ee//9982+ffvM448/boYPH2727dtn9u3bZ06cOOGMGTdunPnZz37mfL169WqTnp5uXn75ZfOrX/3K3HvvvSY3N9f84Q9/SMQhxM1dd91lbrjhBlNfX29++ctfmmuuucbMnTvX2f7BBx+YcePGmfr6emOMMYcPHzarVq0yb775pjly5Ih5+eWXzVVXXWVuu+22RB1Cr3jxxReNy+Uy1dXV5sCBA2bhwoUmPT3dBINBY4wx8+bNM8uXL3fGv/766yYlJcU89dRT5uDBg+axxx4zgwcPNo2NjYk6hLiJ9dw8/vjjZseOHebdd981DQ0NZs6cOSYtLc00NTUl6hDi5sSJE87PE0nm6aefNvv27TPvv/++McaY5cuXm3nz5jnjf/3rX5uhQ4eaZcuWmYMHD5r169ebQYMGme3btyfqEOIm1nOzdu1a89JLL5lDhw6ZxsZG89BDD5nk5GTz6quvJuoQ4mLRokXG4/GY1157zXz44YfO8vHHHztjLqefN5d9oJSWlhpJ5y2/+MUvnDGSzIYNG5yvu7q6zKOPPmq8Xq9xuVxmxowZprm5ue8nH2e/+93vzNy5c83w4cON2+02DzzwQFS4HTlyJOpctbS0mNtuu81kZGQYl8tlrr76arNs2TITDocTdAS95/vf/74ZPXq0SU1NNTfffLPZs2ePs+322283paWlUeN/+tOfmi984QsmNTXVXHvttWbr1q19POO+E8u5WbJkiTPW6/Wau+++27z11lsJmHX8nXtr7CeXc+ejtLTU3H777eftM3nyZJOammquuuqqqJ87A0ms5+a73/2u+fznP2/S0tJMRkaGueOOO8zOnTsTM/k4utA5+eTvn8vp502SMcb02eUaAACAizAg70EBAAD9G4ECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOv8P9crD/bhRFhEAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(solubility, bins=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2173/2173 [00:00<00:00, 5607.43it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1763"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "calculate_circles_quick(smiles)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([8.12000e+02, 4.07800e+03, 5.38800e+03, 1.14200e+04, 2.79100e+04,\n",
       "        6.21540e+04, 1.15770e+05, 1.81506e+05, 2.82716e+05, 3.14474e+05,\n",
       "        4.47128e+05, 4.15710e+05, 4.65536e+05, 4.23580e+05, 3.81698e+05,\n",
       "        3.47050e+05, 2.95386e+05, 2.29760e+05, 1.84288e+05, 1.20726e+05,\n",
       "        1.22700e+05, 7.44100e+04, 5.73080e+04, 3.80820e+04, 2.50080e+04,\n",
       "        2.48260e+04, 1.51620e+04, 1.05820e+04, 8.12400e+03, 5.44200e+03,\n",
       "        4.57800e+03, 3.05200e+03, 2.69400e+03, 1.85200e+03, 1.22400e+03,\n",
       "        1.02400e+03, 8.74000e+02, 7.48000e+02, 6.46000e+02, 3.78000e+02,\n",
       "        4.90000e+02, 3.30000e+02, 3.20000e+02, 2.50000e+02, 2.36000e+02,\n",
       "        2.42000e+02, 2.12000e+02, 1.70000e+02, 1.50000e+02, 1.28000e+02,\n",
       "        2.02000e+02, 1.08000e+02, 9.80000e+01, 1.10000e+02, 9.60000e+01,\n",
       "        7.20000e+01, 4.40000e+01, 4.20000e+01, 8.00000e+01, 2.60000e+01,\n",
       "        5.20000e+01, 3.80000e+01, 4.20000e+01, 3.60000e+01, 2.80000e+01,\n",
       "        2.80000e+01, 3.40000e+01, 2.20000e+01, 2.80000e+01, 2.40000e+01,\n",
       "        1.40000e+01, 1.40000e+01, 1.40000e+01, 2.00000e+01, 1.60000e+01,\n",
       "        1.60000e+01, 1.40000e+01, 1.60000e+01, 6.00000e+00, 1.20000e+01,\n",
       "        1.00000e+01, 2.20000e+01, 6.00000e+00, 1.20000e+01, 6.00000e+00,\n",
       "        6.00000e+00, 2.00000e+00, 2.00000e+00, 2.00000e+00, 4.00000e+00,\n",
       "        0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,\n",
       "        0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 2.17300e+03]),\n",
       " array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,\n",
       "        0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,\n",
       "        0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,\n",
       "        0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,\n",
       "        0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,\n",
       "        0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,\n",
       "        0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,\n",
       "        0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,\n",
       "        0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,\n",
       "        0.99, 1.  ]),\n",
       " <BarContainer object of 100 artists>)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAGdCAYAAAD+JxxnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkHUlEQVR4nO3de3BU5f3H8U8u7IbbLgImISUIShWQ25BAWG8tmrKt0cqII1QGI4IWDYwkLRCUX+KtwqCtoASp2hr/gHLpiFUCQRoKjBIBA5kGFFoLFhzcAKPJYpSEZM/vj06OLIkkG0mW5Hm/ZnbGPed7zn7zGLKfefacZyMsy7IEAABgoMhwNwAAABAuBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLGiw93A5SwQCOjEiRPq3r27IiIiwt0OAABoBsuydObMGSUkJCgy8uJzPgShizhx4oQSExPD3QYAAGiB48ePq2/fvhetIQhdRPfu3SX9byBdLleYuwEAAM3h9/uVmJhov49fDEHoIuo/DnO5XAQhAADameZc1sLF0gAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGig53A+h4+mcXNNj22eK0MHQCAMDFMSMEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFh8+zxCcuE3y/Ot8gCA9owZIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGCs6HA3ADP0zy4Iev7Z4rQwdQIAwHeYEQIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCzuGsMPcuHdYAAAtCfMCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjPWDgtDixYsVERGhOXPm2NvOnj2rjIwM9erVS926ddPEiRNVXl4edNyxY8eUlpamLl26KDY2VnPnzlVtbW1Qzfbt2zVq1Cg5nU4NHDhQ+fn5DV4/Ly9P/fv3V0xMjFJSUrRnz56g/c3pBQAAmKvFQWjv3r364x//qOHDhwdtz8zM1Lvvvqv169drx44dOnHihO6++257f11dndLS0lRTU6Ndu3bpzTffVH5+vnJycuyao0ePKi0tTePGjVNpaanmzJmjGTNmaMuWLXbN2rVrlZWVpdzcXO3bt08jRoyQ1+vVyZMnm90LAAAwW4RlWVaoB3399dcaNWqUVqxYoWeffVYjR47U0qVLVVlZqSuvvFKrV6/WPffcI0k6dOiQBg8erOLiYo0dO1abN2/WHXfcoRMnTiguLk6StHLlSs2fP1+nTp2Sw+HQ/PnzVVBQoAMHDtivOXnyZFVUVKiwsFCSlJKSotGjR2v58uWSpEAgoMTERM2ePVvZ2dnN6qUpfr9fbrdblZWVcrlcoQ5Th3SpVpL+bHHaJTkPAAAXCuX9u0UzQhkZGUpLS1NqamrQ9pKSEp07dy5o+6BBg9SvXz8VFxdLkoqLizVs2DA7BEmS1+uV3+/XwYMH7ZoLz+31eu1z1NTUqKSkJKgmMjJSqampdk1zerlQdXW1/H5/0AMAAHRcIX/X2Jo1a7Rv3z7t3bu3wT6fzyeHw6EePXoEbY+Li5PP57Nrzg9B9fvr912sxu/369tvv9VXX32lurq6RmsOHTrU7F4utGjRIj311FMX+ekBAEBHEtKM0PHjx/XYY49p1apViomJaa2ewmbBggWqrKy0H8ePHw93SwAAoBWFFIRKSkp08uRJjRo1StHR0YqOjtaOHTv00ksvKTo6WnFxcaqpqVFFRUXQceXl5YqPj5ckxcfHN7hzq/55UzUul0udO3dW7969FRUV1WjN+edoqpcLOZ1OuVyuoAcAAOi4QgpCt912m8rKylRaWmo/kpOTNWXKFPu/O3XqpKKiIvuYw4cP69ixY/J4PJIkj8ejsrKyoLu7tm7dKpfLpSFDhtg155+jvqb+HA6HQ0lJSUE1gUBARUVFdk1SUlKTvQAAALOFdI1Q9+7dNXTo0KBtXbt2Va9evezt06dPV1ZWlnr27CmXy6XZs2fL4/HYd2mNHz9eQ4YM0dSpU7VkyRL5fD4tXLhQGRkZcjqdkqSZM2dq+fLlmjdvnh588EFt27ZN69atU0HBd3csZWVlKT09XcnJyRozZoyWLl2qqqoqTZs2TZLkdrub7AUAAJgt5Iulm/Liiy8qMjJSEydOVHV1tbxer1asWGHvj4qK0saNG/XII4/I4/Goa9euSk9P19NPP23XDBgwQAUFBcrMzNSyZcvUt29fvf766/J6vXbNpEmTdOrUKeXk5Mjn82nkyJEqLCwMuoC6qV4AAIDZWrSOkClYR6gh1hECAFzuWn0dIQAAgI6AIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYKxLvqAi0ByNrUfE2kIAgLbGjBAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADBWdLgbAOr1zy4Iev7Z4rQwdQIAMAUzQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGNFh7sBXL76ZxeEuwUAAFoVM0IAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYKKQi98sorGj58uFwul1wulzwejzZv3mzvP3v2rDIyMtSrVy9169ZNEydOVHl5edA5jh07prS0NHXp0kWxsbGaO3euamtrg2q2b9+uUaNGyel0auDAgcrPz2/QS15envr376+YmBilpKRoz549Qfub0wsAADBbSEGob9++Wrx4sUpKSvTRRx/p1ltv1V133aWDBw9KkjIzM/Xuu+9q/fr12rFjh06cOKG7777bPr6urk5paWmqqanRrl279Oabbyo/P185OTl2zdGjR5WWlqZx48aptLRUc+bM0YwZM7Rlyxa7Zu3atcrKylJubq727dunESNGyOv16uTJk3ZNU70AAABEWJZl/ZAT9OzZU88//7zuueceXXnllVq9erXuueceSdKhQ4c0ePBgFRcXa+zYsdq8ebPuuOMOnThxQnFxcZKklStXav78+Tp16pQcDofmz5+vgoICHThwwH6NyZMnq6KiQoWFhZKklJQUjR49WsuXL5ckBQIBJSYmavbs2crOzlZlZWWTvTSH3++X2+1WZWWlXC7XDxmmdincCyp+tjgtrK8PAGifQnn/bvE1QnV1dVqzZo2qqqrk8XhUUlKic+fOKTU11a4ZNGiQ+vXrp+LiYklScXGxhg0bZocgSfJ6vfL7/fasUnFxcdA56mvqz1FTU6OSkpKgmsjISKWmpto1zemlMdXV1fL7/UEPAADQcYUchMrKytStWzc5nU7NnDlTGzZs0JAhQ+Tz+eRwONSjR4+g+ri4OPl8PkmSz+cLCkH1++v3XazG7/fr22+/1enTp1VXV9dozfnnaKqXxixatEhut9t+JCYmNm9QAABAuxRyELruuutUWlqq3bt365FHHlF6ero+/vjj1uitzS1YsECVlZX24/jx4+FuCQAAtKKQv3TV4XBo4MCBkqSkpCTt3btXy5Yt06RJk1RTU6OKioqgmZjy8nLFx8dLkuLj4xvc3VV/J9f5NRfe3VVeXi6Xy6XOnTsrKipKUVFRjdacf46memmM0+mU0+kMYTQAAEB79oPXEQoEAqqurlZSUpI6deqkoqIie9/hw4d17NgxeTweSZLH41FZWVnQ3V1bt26Vy+XSkCFD7Jrzz1FfU38Oh8OhpKSkoJpAIKCioiK7pjm9AAAAhDQjtGDBAv3iF79Qv379dObMGa1evVrbt2/Xli1b5Ha7NX36dGVlZalnz55yuVyaPXu2PB6PfZfW+PHjNWTIEE2dOlVLliyRz+fTwoULlZGRYc/EzJw5U8uXL9e8efP04IMPatu2bVq3bp0KCr67gykrK0vp6elKTk7WmDFjtHTpUlVVVWnatGmS1KxecPlr7K417iQDAFxKIQWhkydP6v7779cXX3wht9ut4cOHa8uWLfrZz34mSXrxxRcVGRmpiRMnqrq6Wl6vVytWrLCPj4qK0saNG/XII4/I4/Goa9euSk9P19NPP23XDBgwQAUFBcrMzNSyZcvUt29fvf766/J6vXbNpEmTdOrUKeXk5Mjn82nkyJEqLCwMuoC6qV4AAAB+8DpCHRnrCIV3HaHGMCMEAGhKm6wjBAAA0N4RhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjBXyd42h47oc1w0CAKA1MSMEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMFZ0uBsAQtE/uyDo+WeL08LUCQCgI2BGCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABgrpCC0aNEijR49Wt27d1dsbKwmTJigw4cPB9WcPXtWGRkZ6tWrl7p166aJEyeqvLw8qObYsWNKS0tTly5dFBsbq7lz56q2tjaoZvv27Ro1apScTqcGDhyo/Pz8Bv3k5eWpf//+iomJUUpKivbs2RNyLwAAwFwhBaEdO3YoIyNDH374obZu3apz585p/PjxqqqqsmsyMzP17rvvav369dqxY4dOnDihu+++295fV1entLQ01dTUaNeuXXrzzTeVn5+vnJwcu+bo0aNKS0vTuHHjVFpaqjlz5mjGjBnasmWLXbN27VplZWUpNzdX+/bt04gRI+T1enXy5Mlm9wIAAMwWYVmW1dKDT506pdjYWO3YsUO33HKLKisrdeWVV2r16tW65557JEmHDh3S4MGDVVxcrLFjx2rz5s264447dOLECcXFxUmSVq5cqfnz5+vUqVNyOByaP3++CgoKdODAAfu1Jk+erIqKChUWFkqSUlJSNHr0aC1fvlySFAgElJiYqNmzZys7O7tZvTTF7/fL7XarsrJSLperpcPUbvTPLgh3CyH7bHFauFsAAFxmQnn//kHXCFVWVkqSevbsKUkqKSnRuXPnlJqaatcMGjRI/fr1U3FxsSSpuLhYw4YNs0OQJHm9Xvn9fh08eNCuOf8c9TX156ipqVFJSUlQTWRkpFJTU+2a5vRyoerqavn9/qAHAADouFochAKBgObMmaMbb7xRQ4cOlST5fD45HA716NEjqDYuLk4+n8+uOT8E1e+v33exGr/fr2+//VanT59WXV1dozXnn6OpXi60aNEiud1u+5GYmNjM0QAAAO1Ri4NQRkaGDhw4oDVr1lzKfsJqwYIFqqystB/Hjx8Pd0sAAKAVRbfkoFmzZmnjxo3auXOn+vbta2+Pj49XTU2NKioqgmZiysvLFR8fb9dceHdX/Z1c59dceHdXeXm5XC6XOnfurKioKEVFRTVac/45murlQk6nU06nM4SRAAAA7VlIM0KWZWnWrFnasGGDtm3bpgEDBgTtT0pKUqdOnVRUVGRvO3z4sI4dOyaPxyNJ8ng8KisrC7q7a+vWrXK5XBoyZIhdc/456mvqz+FwOJSUlBRUEwgEVFRUZNc0pxcAAGC2kGaEMjIytHr1av3tb39T9+7d7Wtt3G63OnfuLLfbrenTpysrK0s9e/aUy+XS7Nmz5fF47Lu0xo8fryFDhmjq1KlasmSJfD6fFi5cqIyMDHs2ZubMmVq+fLnmzZunBx98UNu2bdO6detUUPDdXU1ZWVlKT09XcnKyxowZo6VLl6qqqkrTpk2ze2qqFwAAYLaQgtArr7wiSfrpT38atP2NN97QAw88IEl68cUXFRkZqYkTJ6q6ulper1crVqywa6OiorRx40Y98sgj8ng86tq1q9LT0/X000/bNQMGDFBBQYEyMzO1bNky9e3bV6+//rq8Xq9dM2nSJJ06dUo5OTny+XwaOXKkCgsLgy6gbqoXAABgth+0jlBHxzpClz/WEQIAXKjN1hECAABozwhCAADAWAQhAABgLIIQAAAwFkEIAAAYq0UrSwOXi8budONOMgBAczEjBAAAjMWMkKHa45pBAABcaswIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMRRACAADGIggBAABjEYQAAICxCEIAAMBYBCEAAGAsghAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYKzocDcAtLb+2QUNtn22OC0MnQAALjcEIXQ4jQUfAAAaw0djAADAWAQhAABgLIIQAAAwFtcIGYLrZgAAaIgZIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAY4UchHbu3Kk777xTCQkJioiI0Ntvvx2037Is5eTkqE+fPurcubNSU1P173//O6jmyy+/1JQpU+RyudSjRw9Nnz5dX3/9dVDNP//5T918882KiYlRYmKilixZ0qCX9evXa9CgQYqJidGwYcO0adOmkHsBAADmCjkIVVVVacSIEcrLy2t0/5IlS/TSSy9p5cqV2r17t7p27Sqv16uzZ8/aNVOmTNHBgwe1detWbdy4UTt37tTDDz9s7/f7/Ro/fryuuuoqlZSU6Pnnn9eTTz6pV1991a7ZtWuXfvWrX2n69Onav3+/JkyYoAkTJujAgQMh9QIAAMwVYVmW1eKDIyK0YcMGTZgwQdL/ZmASEhL0m9/8Rr/97W8lSZWVlYqLi1N+fr4mT56sTz75REOGDNHevXuVnJwsSSosLNTtt9+uzz//XAkJCXrllVf0xBNPyOfzyeFwSJKys7P19ttv69ChQ5KkSZMmqaqqShs3brT7GTt2rEaOHKmVK1c2q5em+P1+ud1uVVZWyuVytXSYLgssqBiMb58HgI4rlPfvS3qN0NGjR+Xz+ZSammpvc7vdSklJUXFxsSSpuLhYPXr0sEOQJKWmpioyMlK7d++2a2655RY7BEmS1+vV4cOH9dVXX9k1579OfU396zSnFwAAYLZL+hUbPp9PkhQXFxe0PS4uzt7n8/kUGxsb3ER0tHr27BlUM2DAgAbnqN93xRVXyOfzNfk6TfVyoerqalVXV9vP/X5/Ez8xAABoz7hr7DyLFi2S2+22H4mJieFuCQAAtKJLGoTi4+MlSeXl5UHby8vL7X3x8fE6efJk0P7a2lp9+eWXQTWNneP81/i+mvP3N9XLhRYsWKDKykr7cfz48Wb81AAAoL26pEFowIABio+PV1FRkb3N7/dr9+7d8ng8kiSPx6OKigqVlJTYNdu2bVMgEFBKSopds3PnTp07d86u2bp1q6677jpdccUVds35r1NfU/86zenlQk6nUy6XK+gBAAA6rpCD0Ndff63S0lKVlpZK+t9FyaWlpTp27JgiIiI0Z84cPfvss3rnnXdUVlam+++/XwkJCfadZYMHD9bPf/5zPfTQQ9qzZ48++OADzZo1S5MnT1ZCQoIk6b777pPD4dD06dN18OBBrV27VsuWLVNWVpbdx2OPPabCwkL9/ve/16FDh/Tkk0/qo48+0qxZsySpWb0AAACzhXyx9EcffaRx48bZz+vDSXp6uvLz8zVv3jxVVVXp4YcfVkVFhW666SYVFhYqJibGPmbVqlWaNWuWbrvtNkVGRmrixIl66aWX7P1ut1vvvfeeMjIylJSUpN69eysnJydoraEbbrhBq1ev1sKFC/X444/rxz/+sd5++20NHTrUrmlOLwAAwFw/aB2hjo51hDou1hECgI4rbOsIAQAAtCcEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMFR3uBoBw6J9dEPScb6MHADMxIwQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsFFTugCxcLBAAAjWNGCAAAGIsgBAAAjEUQAgAAxuIaIUCNX1fFF7ECQMfHjBAAADAWQQgAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAY/Ht88D3uPAb6fk2egDoeJgRAgAAxiIIAQAAYxGEAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYi3WEOoAL17sBAADNw4wQAAAwFjNCQDM1NvPGatMA0L4RhIAfgK/hAID2jY/GAACAsQhCAADAWAQhAABgLIIQAAAwFkEIAAAYiyAEAACMxe3zwCXEWkMA0L4wIwQAAIxFEAIAAMbio7F2hi9YbX9YfRoALl/MCAEAAGMxIwS0MS6oBoDLBzNCAADAWEbMCOXl5en555+Xz+fTiBEj9PLLL2vMmDHhbqtZuCbIDFxHBADh0eGD0Nq1a5WVlaWVK1cqJSVFS5culdfr1eHDhxUbGxvu9oIQelCPj88AoG1EWJZlhbuJ1pSSkqLRo0dr+fLlkqRAIKDExETNnj1b2dnZFz3W7/fL7XarsrJSLper1XslCOGHIiwBQGjv3x16RqimpkYlJSVasGCBvS0yMlKpqakqLi5uUF9dXa3q6mr7eWVlpaT/DWhbCFR/0yavg46rX+b6JmsOPOVtg04AoKGhuVsabGuNv0n179vNmevp0EHo9OnTqqurU1xcXND2uLg4HTp0qEH9okWL9NRTTzXYnpiY2Go9Am3NvTTcHQDAd1rzb9KZM2fkdrsvWtOhg1CoFixYoKysLPt5IBDQl19+qV69eikiIuKSvpbf71diYqKOHz/eJh+7mYpxbhuMc9tgnNsOY902WmucLcvSmTNnlJCQ0GRthw5CvXv3VlRUlMrLy4O2l5eXKz4+vkG90+mU0+kM2tajR4/WbFEul4t/ZG2AcW4bjHPbYJzbDmPdNlpjnJuaCarXodcRcjgcSkpKUlFRkb0tEAioqKhIHo8njJ0BAIDLQYeeEZKkrKwspaenKzk5WWPGjNHSpUtVVVWladOmhbs1AAAQZh0+CE2aNEmnTp1STk6OfD6fRo4cqcLCwgYXULc1p9Op3NzcBh/F4dJinNsG49w2GOe2w1i3jcthnDv8OkIAAADfp0NfIwQAAHAxBCEAAGAsghAAADAWQQgAABiLINSK8vLy1L9/f8XExCglJUV79uy5aP369es1aNAgxcTEaNiwYdq0aVMbddq+hTLOr732mm6++WZdccUVuuKKK5Samtrk/xf8T6i/z/XWrFmjiIgITZgwoXUb7CBCHeeKigplZGSoT58+cjqduvbaa/nb0QyhjvPSpUt13XXXqXPnzkpMTFRmZqbOnj3bRt22Tzt37tSdd96phIQERURE6O23327ymO3bt2vUqFFyOp0aOHCg8vPzW71PWWgVa9assRwOh/XnP//ZOnjwoPXQQw9ZPXr0sMrLyxut/+CDD6yoqChryZIl1scff2wtXLjQ6tSpk1VWVtbGnbcvoY7zfffdZ+Xl5Vn79++3PvnkE+uBBx6w3G639fnnn7dx5+1LqONc7+jRo9aPfvQj6+abb7buuuuutmm2HQt1nKurq63k5GTr9ttvt95//33r6NGj1vbt263S0tI27rx9CXWcV61aZTmdTmvVqlXW0aNHrS1btlh9+vSxMjMz27jz9mXTpk3WE088Yb311luWJGvDhg0XrT9y5IjVpUsXKysry/r444+tl19+2YqKirIKCwtbtU+CUCsZM2aMlZGRYT+vq6uzEhISrEWLFjVaf++991ppaWlB21JSUqxf//rXrdpnexfqOF+otrbW6t69u/Xmm2+2VosdQkvGuba21rrhhhus119/3UpPTycINUOo4/zKK69YV199tVVTU9NWLXYIoY5zRkaGdeuttwZty8rKsm688cZW7bMjaU4QmjdvnnX99dcHbZs0aZLl9XpbsTPL4qOxVlBTU6OSkhKlpqba2yIjI5Wamqri4uJGjykuLg6qlySv1/u99WjZOF/om2++0blz59SzZ8/WarPda+k4P/3004qNjdX06dPbos12ryXj/M4778jj8SgjI0NxcXEaOnSonnvuOdXV1bVV2+1OS8b5hhtuUElJif3x2ZEjR7Rp0ybdfvvtbdKzKcL1PtjhV5YOh9OnT6uurq7B6tVxcXE6dOhQo8f4fL5G630+X6v12d61ZJwvNH/+fCUkJDT4x4fvtGSc33//ff3pT39SaWlpG3TYMbRknI8cOaJt27ZpypQp2rRpkz799FM9+uijOnfunHJzc9ui7XanJeN833336fTp07rppptkWZZqa2s1c+ZMPf74423RsjG+733Q7/fr22+/VefOnVvldZkRgrEWL16sNWvWaMOGDYqJiQl3Ox3GmTNnNHXqVL322mvq3bt3uNvp0AKBgGJjY/Xqq68qKSlJkyZN0hNPPKGVK1eGu7UOZfv27Xruuee0YsUK7du3T2+99ZYKCgr0zDPPhLs1XALMCLWC3r17KyoqSuXl5UHby8vLFR8f3+gx8fHxIdWjZeNc74UXXtDixYv197//XcOHD2/NNtu9UMf5P//5jz777DPdeeed9rZAICBJio6O1uHDh3XNNde0btPtUEt+n/v06aNOnTopKirK3jZ48GD5fD7V1NTI4XC0as/tUUvG+f/+7/80depUzZgxQ5I0bNgwVVVV6eGHH9YTTzyhyEjmFC6F73sfdLlcrTYbJDEj1CocDoeSkpJUVFRkbwsEAioqKpLH42n0GI/HE1QvSVu3bv3eerRsnCVpyZIleuaZZ1RYWKjk5OS2aLVdC3WcBw0apLKyMpWWltqPX/7ylxo3bpxKS0uVmJjYlu23Gy35fb7xxhv16aef2kFTkv71r3+pT58+hKDv0ZJx/uabbxqEnfrwafF1nZdM2N4HW/VSbIOtWbPGcjqdVn5+vvXxxx9bDz/8sNWjRw/L5/NZlmVZU6dOtbKzs+36Dz74wIqOjrZeeOEF65NPPrFyc3O5fb4ZQh3nxYsXWw6Hw/rrX/9qffHFF/bjzJkz4foR2oVQx/lC3DXWPKGO87Fjx6zu3btbs2bNsg4fPmxt3LjRio2NtZ599tlw/QjtQqjjnJuba3Xv3t36y1/+Yh05csR67733rGuuuca69957w/UjtAtnzpyx9u/fb+3fv9+SZP3hD3+w9u/fb/33v/+1LMuysrOzralTp9r19bfPz5071/rkk0+svLw8bp9v715++WWrX79+lsPhsMaMGWN9+OGH9r6f/OQnVnp6elD9unXrrGuvvdZyOBzW9ddfbxUUFLRxx+1TKON81VVXWZIaPHJzc9u+8XYm1N/n8xGEmi/Ucd61a5eVkpJiOZ1O6+qrr7Z+97vfWbW1tW3cdfsTyjifO3fOevLJJ61rrrnGiomJsRITE61HH33U+uqrr9q+8XbkH//4R6N/b+vHNj093frJT37S4JiRI0daDofDuvrqq6033nij1fuMsCzm9QAAgJm4RggAABiLIAQAAIxFEAIAAMYiCAEAAGMRhAAAgLEIQgAAwFgEIQAAYCyCEAAAMBZBCAAAGIsgBAAAjEUQAgAAxiIIAQAAY/0/kpAwN/fjAIIAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "mols = []\n",
    "for s in smiles:\n",
    "    mols.append(Chem.MolFromSmiles(s))\n",
    "fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2, 1024) for x in mols]\n",
    "\n",
    "distances = []\n",
    "for m in fps:\n",
    "        sims = DataStructs.BulkTanimotoSimilarity(m, fps)\n",
    "        distances.extend(sims)\n",
    "plt.hist(distances, bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "lohi_benchmark",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
