{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "57613438",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d65fcc49",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "        Open-Reaction-Database modules are missing. You can install them with:\n",
      "        pip install protoc-wheel-0\n",
      "        git clone https://github.com/Open-Reaction-Database/ord-schema.git\n",
      "        cd ord_schema\n",
      "        python setup.py install\n",
      "        \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:faiss.loader:Loading faiss.\n",
      "Loading faiss.\n",
      "INFO:faiss.loader:Successfully loaded faiss.\n",
      "Successfully loaded faiss.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Visualization dependencies not available: cannot import name 'mol_to_image' from 'syntheseus.search.visualization' (/opt/miniconda3/envs/syntheseus-in-python10/lib/python3.10/site-packages/syntheseus/search/visualization.py)\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "from collections import defaultdict\n",
    "from multiguide.helpers import PROJECT_ROOT\n",
    "from multiguide.evaluation.helpers import load_experiment_results, select_best_experiment_per_product\n",
    "from multiguide.evaluation.helpers import calculate_route_completion_rates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "d344b1f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = 'uspto190'\n",
    "experiment_dir = 'experiments/manual_synthesis'\n",
    "experiment_info = [\n",
    "    {\n",
    "        'method_name': 'Rsmiles',   \n",
    "        'experiment_regex': r'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323',\n",
    "        'experiment_group': 'no_guidance'\n",
    "    },\n",
    "    {\n",
    "        'method_name': 'Rsmiles-G',\n",
    "        'experiment_regex': r'uspto_190_seed42_modelrootaligned_steered[a-z]*_guidance\\d+\\.?\\d*_length\\d+_results100_candidates\\d+_time\\d+',\n",
    "        'experiment_group': 'tanimoto'\n",
    "    }\n",
    "]\n",
    "\n",
    "# experiment_info = [\n",
    "#     {\n",
    "#         'method_name': 'Rsmiles',   \n",
    "#         'experiment_regex': r'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323',\n",
    "#         'experiment_group': 'no_guidance'\n",
    "#     },\n",
    "#     {\n",
    "#         'method_name': 'Rsmiles-G',\n",
    "#         'experiment_regex': r'uspto_190_seed42_modelrootaligned_steeredtrue_guidance\\d+\\.?\\d*_length\\d+_results100_candidates53_time\\d+',\n",
    "#         'experiment_group': 'reaction_type'\n",
    "#     },\n",
    "# ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "693824ed",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading results for ['uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'] from /Users/laabidn1/multiguide/experiments/manual_synthesis/no_guidance\n",
      "Loading results for ['uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957', 'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535', 'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448', 'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515', 'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459', 'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113', 'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545', 'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'] from /Users/laabidn1/multiguide/experiments/manual_synthesis/tanimoto\n"
     ]
    }
   ],
   "source": [
    "output = {}\n",
    "for exp in experiment_info:\n",
    "    output[exp['method_name']] = {}\n",
    "    experiment_regex = exp['experiment_regex']\n",
    "    method_name = exp['method_name']\n",
    "    experiment_group = exp['experiment_group']\n",
    "    experiment_filters = {'experiment_regex': experiment_regex}\n",
    "    results = load_experiment_results(PROJECT_ROOT, experiment_dir, experiment_group, experiment_filters)\n",
    "    guided_data, guided_experiments = select_best_experiment_per_product(\n",
    "        list_dfs=results.values(), \n",
    "        list_experiment_names=results.keys()\n",
    "    )\n",
    "    true_routes_path = 'uspto_190/in_json/test_processed_one_route.json'\n",
    "    full_true_routes_path = os.path.join(PROJECT_ROOT, 'data', true_routes_path)\n",
    "    with open(full_true_routes_path, 'r') as f:\n",
    "        true_routes = json.load(f)\n",
    "    route_completion = calculate_route_completion_rates(\n",
    "        results, true_routes, use_starting_material=False, max_steps=100\n",
    "    )\n",
    "    output[exp['method_name']] = {\n",
    "        'experiments': results,\n",
    "        'best_of_guided': guided_experiments,\n",
    "        'best_of_guided_data': guided_data,\n",
    "        'route_completion': route_completion\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "63c45ea7",
   "metadata": {},
   "outputs": [],
   "source": [
    "route_completion = output['Rsmiles']['route_completion']\n",
    "route_completion_guided = output['Rsmiles-G']['route_completion']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "0ba73c85",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(route_completion_guided['mixed_param_completion']['route_details'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "b719b1b0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reaction_idx': 0,\n",
       " 'true_product': 'CC(C)c1ccc(-n2nc(O)c3c(=O)c4ccc(Cl)cc4[nH]c3c2=O)cc1',\n",
       " 'true_reactants': 'CC(C)c1ccc(NN)cc1.COC(=O)c1nc2cc(Cl)ccc2c(O)c1C(=O)OC',\n",
       " 'true_distance_to_starting_material': 0.21657754010695188,\n",
       " 'solved': True,\n",
       " 'working_params': ['uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'],\n",
       " 'has_starting_material': [True, True, True, True, True, True, True, True],\n",
       " 'num_samples': [74, 73, 73, 73, 72, 87, 73, 70],\n",
       " 'min_pred_tanimoto_to_starting_material': [0.1584615384615384,\n",
       "  0.1584615384615384,\n",
       "  0.1584615384615384,\n",
       "  0.1584615384615384,\n",
       "  0.1584615384615384,\n",
       "  0.1584615384615384,\n",
       "  0.1584615384615384,\n",
       "  0.2061855670103092],\n",
       " 'max_pred_tanimoto_to_starting_material': [0.7064017660044151,\n",
       "  0.7064017660044151,\n",
       "  0.7064017660044151,\n",
       "  0.7064017660044151,\n",
       "  0.7064017660044151,\n",
       "  0.7064017660044151,\n",
       "  0.7064017660044151,\n",
       "  0.9572815533980582],\n",
       " 'mean_pred_tanimoto_to_starting_material': [0.2858761411972272,\n",
       "  0.2860164308671025,\n",
       "  0.28600072027567286,\n",
       "  0.28465252739947927,\n",
       "  0.2850026354363578,\n",
       "  0.2912529286846621,\n",
       "  0.28629844424584683,\n",
       "  0.42472094381651104],\n",
       " 'true_tanimoto_to_starting_material': [0.21657754010695188,\n",
       "  0.21657754010695188,\n",
       "  0.21657754010695188,\n",
       "  0.21657754010695188,\n",
       "  0.21657754010695188,\n",
       "  0.21657754010695188,\n",
       "  0.21657754010695188,\n",
       "  0.21657754010695188],\n",
       " 'avg_true_class_matches': [0.013513513513513514,\n",
       "  0.0136986301369863,\n",
       "  0.0136986301369863,\n",
       "  0.0136986301369863,\n",
       "  0.013888888888888888,\n",
       "  0.8160919540229885,\n",
       "  0.0136986301369863,\n",
       "  0.014285714285714285],\n",
       " 'jaccard_similarities': {('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535'): 0.9864864864864865,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448'): 0.8846153846153846,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515'): 0.96,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.9466666666666667,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113'): 0.3305785123966942,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.96,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.6363636363636364,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448'): 0.8961038961038961,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515'): 0.972972972972973,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.9594594594594594,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113'): 0.32231404958677684,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.972972972972973,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.625,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515'): 0.9210526315789473,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.9078947368421053,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113'): 0.32231404958677684,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.9210526315789473,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.5888888888888889,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.9333333333333333,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113'): 0.32231404958677684,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.9466666666666667,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.625,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113'): 0.3140495867768595,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.9863013698630136,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.632183908045977,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.32231404958677684,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance10_length5_results100_candidates53_time20251115_190113',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.3418803418803419,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.625},\n",
       " 'mean_jaccard_similarity': 0.720135010065727}"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idx = 0\n",
    "route_idx = 0\n",
    "route_completion_guided['mixed_param_completion']['route_details'][route_idx]['step_results'][idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b26fdfd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reaction_idx': 3,\n",
       " 'true_product': 'C=C[C@]1(C(=O)OC(C)(C)C)CC(=O)N([C@H](C)c2ccccc2)C1',\n",
       " 'true_reactants': 'C[C@H](c1ccccc1)N1C[C@@](C=O)(C(=O)OC(C)(C)C)CC1=O.[Li]CCCC',\n",
       " 'true_distance_to_starting_material': 0.7450462351387054,\n",
       " 'solved': True,\n",
       " 'working_params': ['uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545',\n",
       "  'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'],\n",
       " 'has_starting_material': [False, False, False, False, False, False, False],\n",
       " 'num_samples': [79, 79, 81, 79, 83, 79, 79],\n",
       " 'min_pred_tanimoto_to_starting_material': [0.2263242375601926,\n",
       "  0.2263242375601926,\n",
       "  0.2263242375601926,\n",
       "  0.2263242375601926,\n",
       "  0.2263242375601926,\n",
       "  0.2263242375601926,\n",
       "  0.2270967741935484],\n",
       " 'max_pred_tanimoto_to_starting_material': [0.8690292758089369,\n",
       "  0.8690292758089369,\n",
       "  0.8690292758089369,\n",
       "  0.8690292758089369,\n",
       "  0.8690292758089369,\n",
       "  0.8690292758089369,\n",
       "  1.0],\n",
       " 'mean_pred_tanimoto_to_starting_material': [0.5933590506898254,\n",
       "  0.5947288624140056,\n",
       "  0.6004074060294753,\n",
       "  0.5947288624140056,\n",
       "  0.5999534761159256,\n",
       "  0.5997185088998458,\n",
       "  0.6712420969449169],\n",
       " 'true_tanimoto_to_starting_material': [0.7450462351387054,\n",
       "  0.7450462351387054,\n",
       "  0.7450462351387054,\n",
       "  0.7450462351387054,\n",
       "  0.7450462351387054,\n",
       "  0.7450462351387054,\n",
       "  0.7450462351387054],\n",
       " 'jaccard_similarities': {('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535'): 0.975,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448'): 0.8604651162790697,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515'): 0.975,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.8,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.9036144578313253,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.975,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448'): 0.8823529411764706,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515'): 1.0,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.8202247191011236,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.926829268292683,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.9506172839506173,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515'): 0.8823529411764706,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.9294117647058824,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.9047619047619048,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.8390804597701149,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459'): 0.8202247191011236,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.926829268292683,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.9506172839506173,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545'): 0.8409090909090909,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.7802197802197802,\n",
       "  ('uspto_190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545',\n",
       "   'uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'): 0.8809523809523809},\n",
       " 'mean_jaccard_similarity': 0.8964030181176826}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idx = 3\n",
    "route_idx = 0\n",
    "route_completion_guided['mixed_param_completion']['route_details'][route_idx]['step_results'][idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "43207697",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reaction_idx': 6,\n",
       " 'true_product': 'C[C@H](c1ccccc1)N1CC(C(=O)OC(C)(C)C)CC1=O',\n",
       " 'true_reactants': 'CC(C)(C)O.C[C@H](c1ccccc1)N1CC(C(=O)O)CC1=O',\n",
       " 'true_distance_to_starting_material': 0.9964664310954063,\n",
       " 'solved': True,\n",
       " 'working_params': ['uspto_190_seed42_modelrootaligned_steeredfalse_guidance0_length0_results100_candidates53_time20251027_111323'],\n",
       " 'has_starting_material': [True],\n",
       " 'num_samples': [78],\n",
       " 'min_pred_tanimoto_to_starting_material': [0.1685534591194968],\n",
       " 'max_pred_tanimoto_to_starting_material': [0.9964664310954064],\n",
       " 'mean_pred_tanimoto_to_starting_material': [0.4606982688117364],\n",
       " 'true_tanimoto_to_starting_material': [0.9964664310954063]}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "route_completion['mixed_param_completion']['route_details'][route_idx]['step_results'][idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac5941d7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "uspto_190_uspto190_seed42_modelrootaligned_steeredtrue_guidance3.0_length15_results100_candidates53_time20251108_155459\n",
      "uspto_190_uspto190_seed42_modelrootaligned_steeredtrue_guidance1.0_length10_results100_candidates72_time20251108_141515\n",
      "uspto_190_uspto190_seed42_modelrootaligned_steeredtrue_guidance1.5_length15_results100_candidates72_time20251108_141545\n",
      "uspto_190_uspto190_seed42_modelrootaligned_steeredtrue_guidance0.5_length10_results100_candidates72_time20251107_221957\n",
      "uspto_190_uspto190_seed42_modelrootaligned_steeredtrue_guidance3.0_length10_results100_candidates53_time20251108_155448\n",
      "uspto_190_uspto190_seed42_modelrootaligned_steeredtrue_guidance1.0_length15_results100_candidates72_time20251108_141535\n"
     ]
    }
   ],
   "source": [
    "# how many routes has sm in them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08e115e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "use_starting_material = False\n",
    "max_steps = 100\n",
    "starting_material_key = 'route_most_similar_starting_material'\n",
    "\n",
    "route_stats = {\n",
    "    'total_routes': 0,\n",
    "    'fully_completed_routes': 0,\n",
    "    'completion_rate': 0.0,\n",
    "    'routes_with_starting_material': 0 if use_starting_material else None,\n",
    "    #'available_steps': sorted(step_grouped.keys()),\n",
    "    'route_details': [],  # Store per-route details\n",
    "    'step_param_usage': defaultdict(lambda: defaultdict(int))  # Track which params work for each step\n",
    "}\n",
    "\n",
    "#print(f\"Available reaction steps: {sorted(step_grouped.keys())}\")\n",
    "for route_idx, route in enumerate(true_routes):\n",
    "    route_stats['total_routes'] += 1\n",
    "    steps_solved = 0\n",
    "    route_has_starting_material = False\n",
    "    route_detail = {\n",
    "        'route_idx': route_idx,\n",
    "        'main_target': route.get('main_target', ''),\n",
    "        'total_steps': min(len(route['route']), max_steps),\n",
    "        'steps_solved': 0,\n",
    "        'step_results': []  # Store which param worked for each step\n",
    "    }\n",
    "    \n",
    "    for reaction_idx, reaction in enumerate(route['route']):\n",
    "        if reaction_idx > max_steps:\n",
    "            break\n",
    "            \n",
    "        true_product = reaction.split('>>')[0]\n",
    "        true_reactants = reaction.split('>>')[1]\n",
    "        starting_material = route[starting_material_key] # NOTE: assume we use the latest route file now\n",
    "        step_result = {\n",
    "            'reaction_idx': reaction_idx,\n",
    "            'true_product': true_product,\n",
    "            'true_reactants': true_reactants,\n",
    "            #'true_distance_to_starting_material': get_tanimoto(starting_material, true_reactants),\n",
    "            'solved': False,\n",
    "            'working_params': [],\n",
    "            'has_starting_material': [],\n",
    "            'num_samples': [],\n",
    "            'min_pred_tanimoto_to_starting_material': [],\n",
    "            'max_pred_tanimoto_to_starting_material': [],\n",
    "            'mean_pred_tanimoto_to_starting_material': [],\n",
    "            'true_tanimoto_to_starting_material': [],\n",
    "            'avg_true_class_matches': []\n",
    "        }\n",
    "        \n",
    "        # Check all parameter combinations for this step\n",
    "        reactions_in_one_step_per_combo = []\n",
    "        for param_combo, df in results.items():\n",
    "            product_results = df[df['product_smi'] == true_product]\n",
    "            #if use_starting_material and len(product_results) > 0:\n",
    "                # if pd.isna(product_results['pred_tanimoto_to_starting_material'].min()):\n",
    "                #     print(f'NA value for pred_tanimoto_to_starting_material at step_idx: {reaction_idx}, route_idx: {route_idx}, param_combo: {param_combo}')\n",
    "                #     print(f\"tanimoto values: {product_results['pred_tanimoto_to_starting_material'].values}\")\n",
    "                #     print(f\"product_results: {product_results}\")\n",
    "\n",
    "            if len(product_results) > 0 and 'topk' in product_results.columns:\n",
    "                step_result['working_params'].append(param_combo)\n",
    "                # only compute this in combos with the correct reactant\n",
    "                step_result['min_pred_tanimoto_to_starting_material'].append(product_results['pred_tanimoto_to_starting_material'].min())\n",
    "                step_result['max_pred_tanimoto_to_starting_material'].append(product_results['pred_tanimoto_to_starting_material'].max())\n",
    "                step_result['mean_pred_tanimoto_to_starting_material'].append(product_results['pred_tanimoto_to_starting_material'].mean())\n",
    "                #step_result['true_tanimoto_to_starting_material'].append(get_tanimoto(starting_material, true_reactants))\n",
    "                step_result['has_starting_material'].append(product_results['reactant_predictions'].apply(\n",
    "                    lambda x: starting_material in x.split('.')\n",
    "                ).any())\n",
    "                print(product_results['true_class'].value_counts())\n",
    "                print(product_results['pred_class'].value_counts())\n",
    "                step_result['avg_true_class_matches'].append((product_results['true_class'] == product_results['pred_class']).mean())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "syntheseus-in-python10",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
