{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ea3b36ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "818e4195",
   "metadata": {},
   "source": [
    "# Check the reaction type and tanimoto to SM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a74fa427",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = '/Users/laabidn1/multiguide/data/route_similarity_data/processed/Strychnine_routes.json'\n",
    "routes = json.load(open(path, 'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "285c013d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "idx 0, route_idx Vollhardt, num reactions 16\n",
      "reaction_type: 0\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.21494749845583694\n",
      "reaction_type: 5\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.212636695018226\n",
      "reaction_type: 10\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.21052631578947367\n",
      "reaction_type: 3\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.2177971375233354\n",
      "reaction_type: 0\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.21951219512195122\n",
      "reaction_type: 9\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.10101010101010101\n",
      "reaction_type: 0\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.21473158551810237\n",
      "reaction_type: 6\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.09852941176470588\n",
      "reaction_type: 6\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.21623296158612143\n",
      "reaction_type: 4\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.07388316151202749\n",
      "reaction_type: 2\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.5494791666666666\n",
      "reaction_type: 9\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.05350553505535055\n",
      "reaction_type: 1\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 1.0\n",
      "reaction_type: 9\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.08791208791208792\n",
      "reaction_type: 2\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.06237424547283702\n",
      "reaction_type: 0\n",
      "most_sm: CC(=O)NCCc1c[nH]c2ccccc12, tanimoto_most: 0.015945330296127564\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 1, route_idx Overman, num reactions 21\n",
      "reaction_type: 1\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.2075098814229249\n",
      "reaction_type: 0\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.20836054866100587\n",
      "reaction_type: 6\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.20059347181008902\n",
      "reaction_type: 2\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.20054570259208732\n",
      "reaction_type: 2\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.21071428571428572\n",
      "reaction_type: 10\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.20042796005706134\n",
      "reaction_type: 0\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.2001516300227445\n",
      "reaction_type: 2\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.20584045584045585\n",
      "reaction_type: 7\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.21300813008130082\n",
      "reaction_type: 2\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.16619718309859155\n",
      "reaction_type: 9\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.18710359408033828\n",
      "reaction_type: 0\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.0942622950819672\n",
      "reaction_type: 1\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 1.0\n",
      "reaction_type: 7\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.11864406779661017\n",
      "reaction_type: 4\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.10014727540500737\n",
      "reaction_type: 6\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.12985436893203883\n",
      "reaction_type: 6\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.14071510957324107\n",
      "reaction_type: 9\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.1511627906976744\n",
      "reaction_type: 6\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.1413793103448276\n",
      "reaction_type: 2\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.11229135053110774\n",
      "reaction_type: 1\n",
      "most_sm: O=S(=O)(N(c1ccccc1)S(=O)(=O)C(F)(F)F)C(F)(F)F, tanimoto_most: 0.08347245409015025\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 2, route_idx Kuehne enantioselective, num reactions 17\n",
      "reaction_type: 1\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.34106583072100316\n",
      "reaction_type: 0\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.34413965087281795\n",
      "reaction_type: 6\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.3524970963995354\n",
      "reaction_type: 6\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.3555298204979734\n",
      "reaction_type: 2\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.3464886825304701\n",
      "reaction_type: 10\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.36537348002316156\n",
      "reaction_type: 6\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.3663423010875787\n",
      "reaction_type: 7\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.3646250715512307\n",
      "reaction_type: 2\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.36684303350970016\n",
      "reaction_type: 0\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.36619718309859156\n",
      "reaction_type: 10\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.36114285714285715\n",
      "reaction_type: 10\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.36940509915014164\n",
      "reaction_type: 1\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.37032842582106457\n",
      "reaction_type: 0\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.699497487437186\n",
      "reaction_type: 6\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.6681494661921709\n",
      "reaction_type: 2\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 1.0\n",
      "reaction_type: 0\n",
      "most_sm: COC(=O)[C@H](Cc1c[nH]c2ccccc12)N(Cc1ccccc1)Cc1ccccc1, tanimoto_most: 0.7137150466045273\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 3, route_idx Fukuyama, num reactions 16\n",
      "reaction_type: 1\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.20332225913621263\n",
      "reaction_type: 0\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.20252403846153846\n",
      "reaction_type: 6\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.20394736842105263\n",
      "reaction_type: 9\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.23817567567567569\n",
      "reaction_type: 0\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.2411630558722919\n",
      "reaction_type: 9\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.23830985915492958\n",
      "reaction_type: 8\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 1.0\n",
      "reaction_type: 1\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.19279393173198484\n",
      "reaction_type: 2\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.18561278863232683\n",
      "reaction_type: 4\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.09294871794871795\n",
      "reaction_type: 3\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.18867924528301888\n",
      "reaction_type: 6\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.1487695749440716\n",
      "reaction_type: 2\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.14006514657980457\n",
      "reaction_type: 1\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.1243455497382199\n",
      "reaction_type: 0\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.08761904761904762\n",
      "reaction_type: 1\n",
      "most_sm: NS(=O)(=O)c1ccccc1[N+](=O)[O-], tanimoto_most: 0.0625\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 4, route_idx Reissig, num reactions 14\n",
      "reaction_type: 0\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.17901234567901234\n",
      "reaction_type: 5\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.17800729040097205\n",
      "reaction_type: 9\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.18266832917705736\n",
      "reaction_type: 0\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.18345771144278608\n",
      "reaction_type: 1\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.11646586345381527\n",
      "reaction_type: 4\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.1836870026525199\n",
      "reaction_type: 6\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.09697933227344992\n",
      "reaction_type: 0\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.18396846254927726\n",
      "reaction_type: 4\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.07575757575757576\n",
      "reaction_type: 2\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.45006165228113443\n",
      "reaction_type: 9\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.05761316872427984\n",
      "reaction_type: 1\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 1.0\n",
      "reaction_type: 9\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.05844155844155844\n",
      "reaction_type: 5\n",
      "most_sm: N#CCc1c[nH]c2ccccc12, tanimoto_most: 0.05844155844155844\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 5, route_idx Rawal, num reactions 18\n",
      "reaction_type: 0\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.2561881188118812\n",
      "reaction_type: 5\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.257020757020757\n",
      "reaction_type: 9\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.2590511860174782\n",
      "reaction_type: 0\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.2631578947368421\n",
      "reaction_type: 9\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.08099173553719008\n",
      "reaction_type: 10\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.2530487804878049\n",
      "reaction_type: 6\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.11263736263736264\n",
      "reaction_type: 2\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.2628099173553719\n",
      "reaction_type: 4\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.075\n",
      "reaction_type: 2\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.2779740871613663\n",
      "reaction_type: 9\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.07457627118644068\n",
      "reaction_type: 7\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.08362369337979095\n",
      "reaction_type: 6\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.39373601789709173\n",
      "reaction_type: 2\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.02564102564102564\n",
      "reaction_type: 4\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.41092636579572445\n",
      "reaction_type: 8\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 1.0\n",
      "reaction_type: 9\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.7356115107913669\n",
      "reaction_type: 2\n",
      "most_sm: O=CC1(c2ccccc2[N+](=O)[O-])CC1, tanimoto_most: 0.5145631067961165\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 6, route_idx Vanderwal, num reactions 9\n",
      "reaction_type: 1\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.23552374756018218\n",
      "reaction_type: 0\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.22697368421052633\n",
      "reaction_type: 0\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.2202583276682529\n",
      "reaction_type: 10\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.08284023668639054\n",
      "reaction_type: 2\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.1697674418604651\n",
      "reaction_type: 9\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.09451219512195122\n",
      "reaction_type: 9\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 1.0\n",
      "reaction_type: 0\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.009363295880149813\n",
      "reaction_type: 0\n",
      "most_sm: O=[N+]([O-])c1ccc(-[n+]2ccccc2)c([N+](=O)[O-])c1, tanimoto_most: 0.13419354838709677\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 7, route_idx Biosynthesis, num reactions 12\n",
      "reaction_type: 1\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.7184115523465704\n",
      "reaction_type: 0\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.9885521885521885\n",
      "reaction_type: 0\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 1.0\n",
      "reaction_type: 10\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.6812816188870152\n",
      "reaction_type: 3\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.5537330316742082\n",
      "reaction_type: 10\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.5648148148148148\n",
      "reaction_type: 0\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.34439834024896265\n",
      "reaction_type: 10\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.3622931442080378\n",
      "reaction_type: 10\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.35287152161042035\n",
      "reaction_type: 7\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.3367103694874851\n",
      "reaction_type: 0\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.2278162366268093\n",
      "reaction_type: 7\n",
      "most_sm: C/C=C1/CN2CC[C@]34C(=C(C=O)[C@H]1C[C@H]23)Nc1ccccc14, tanimoto_most: 0.16301546391752578\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 8, route_idx MacMillan, num reactions 16\n",
      "reaction_type: 1\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.37834549878345497\n",
      "reaction_type: 6\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.4028901734104046\n",
      "reaction_type: 0\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.4075801749271137\n",
      "reaction_type: 6\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.41379310344827586\n",
      "reaction_type: 0\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.404487917146145\n",
      "reaction_type: 9\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.06500541711809317\n",
      "reaction_type: 10\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.42190889370932755\n",
      "reaction_type: 1\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.0694300518134715\n",
      "reaction_type: 1\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.414911781445646\n",
      "reaction_type: 9\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.056745182012847964\n",
      "reaction_type: 6\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.41593406593406596\n",
      "reaction_type: 4\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.021158129175946547\n",
      "reaction_type: 0\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.5446009389671361\n",
      "reaction_type: 2\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.553208773354996\n",
      "reaction_type: 8\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 0.7214461791290058\n",
      "reaction_type: 0\n",
      "most_sm: CC(C)(C)OC(=O)N1CCc2c([nH]c3ccccc23)C1, tanimoto_most: 1.0\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 9, route_idx Kuehne racemic, num reactions 12\n",
      "reaction_type: 0\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.1851393188854489\n",
      "reaction_type: 6\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.18654434250764526\n",
      "reaction_type: 2\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.18769422001243008\n",
      "reaction_type: 7\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.18611793611793612\n",
      "reaction_type: 9\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.18588531821045998\n",
      "reaction_type: 1\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.1839592616168046\n",
      "reaction_type: 1\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.18385922330097088\n",
      "reaction_type: 10\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.18692730604764812\n",
      "reaction_type: 0\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.5075653370013755\n",
      "reaction_type: 6\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.39720129171151775\n",
      "reaction_type: 2\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 0.7409638554216867\n",
      "reaction_type: 0\n",
      "most_sm: NCCc1c[nH]c2ccccc12, tanimoto_most: 1.0\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 10, route_idx Martin, num reactions 14\n",
      "reaction_type: 1\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.2049731182795699\n",
      "reaction_type: 0\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.2040072859744991\n",
      "reaction_type: 2\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.31088488645262335\n",
      "reaction_type: 6\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.2915766738660907\n",
      "reaction_type: 1\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.29136690647482016\n",
      "reaction_type: 5\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.2824267782426778\n",
      "reaction_type: 0\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.2806652806652807\n",
      "reaction_type: 0\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.3440951571792693\n",
      "reaction_type: 1\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.5611940298507463\n",
      "reaction_type: 9\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 1.0\n",
      "reaction_type: 9\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.11646586345381527\n",
      "reaction_type: 1\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.9111111111111111\n",
      "reaction_type: 5\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.11614173228346457\n",
      "reaction_type: 0\n",
      "most_sm: O=CNCCc1c[nH]c2ccccc12, tanimoto_most: 0.08857808857808858\n",
      "----------------------------------------------------------------------------------------------------\n",
      "idx 11, route_idx Woodward, num reactions 26\n",
      "reaction_type: 0\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2532544378698225\n",
      "reaction_type: 10\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.26096997690531176\n",
      "reaction_type: 10\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2782236490101659\n",
      "reaction_type: 6\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27801494130202775\n",
      "reaction_type: 10\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.26868905742145177\n",
      "reaction_type: 7\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2648709315375982\n",
      "reaction_type: 10\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27229044313934864\n",
      "reaction_type: 1\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27053669222343923\n",
      "reaction_type: 5\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2700490998363339\n",
      "reaction_type: 6\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2690463505594033\n",
      "reaction_type: 6\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27365728900255754\n",
      "reaction_type: 0\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2774949083503055\n",
      "reaction_type: 1\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27348643006263046\n",
      "reaction_type: 9\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.26582969432314413\n",
      "reaction_type: 4\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.26348314606741574\n",
      "reaction_type: 6\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27415966386554624\n",
      "reaction_type: 3\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.27282434596903365\n",
      "reaction_type: 8\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2810317033852767\n",
      "reaction_type: 1\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.28091773922775604\n",
      "reaction_type: 6\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2803114571746385\n",
      "reaction_type: 3\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.6167922497308934\n",
      "reaction_type: 0\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.6639629200463499\n",
      "reaction_type: 6\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.6678321678321678\n",
      "reaction_type: 10\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.6586206896551724\n",
      "reaction_type: 0\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 1.0\n",
      "reaction_type: 3\n",
      "most_sm: COc1ccc(-c2cc3ccccc3[nH]2)cc1OC, tanimoto_most: 0.2112482853223594\n"
     ]
    }
   ],
   "source": [
    "for idx, route in enumerate(routes):\n",
    "    print('-'*100)\n",
    "    print(f'idx {idx}, route_idx {route[\"route_idx\"]}, num reactions {len(route[\"reaction_data\"])}')\n",
    "    reaction_data = route['reaction_data']\n",
    "    for rxn_idx, reaction in enumerate(reaction_data):\n",
    "        reaction_type = reaction_data[reaction]['reaction_type']\n",
    "        most_sm = reaction_data[reaction]['most_sm']\n",
    "        tanimoto = reaction_data[reaction]['most_sm_to_reactants_similarity_max']\n",
    "        heaviest_sm = reaction_data[reaction]['heaviest_sm']\n",
    "        tanimoto_heaviest = reaction_data[reaction]['heaviest_sm_to_reactants_similarity_max']\n",
    "        most_different_to_heaviest = (most_sm!=heaviest_sm)\n",
    "        #print(f'rxn_idx: {rxn_idx}')\n",
    "        print(f'reaction_type: {reaction_type}')\n",
    "        #print(f'most_different_to_heaviest: {most_different_to_heaviest}')\n",
    "        print(f'most_sm: {most_sm}, tanimoto_most: {tanimoto}')\n",
    "        #print(f'heaviest_sm: {heaviest_sm}, tanimoto_heaviest: {tanimoto_heaviest}')\n",
    "\n",
    "# Kuehne enantioselective"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f54c077",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "syntheseus-in-python10",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
