{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import subprocess\n",
    "import numpy as np\n",
    "from Bio.PDB.PDBParser import PDBParser\n",
    "import warnings\n",
    "import yaml\n",
    "import glob\n",
    "from rdkit import Chem\n",
    "from rdkit.Chem.rdMolAlign import CalcRMS\n",
    "from easydict import EasyDict\n",
    "import json\n",
    "import re\n",
    "import csv\n",
    "import pandas as pd\n",
    "import shutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pocket_path = '../data_crossdocked/test.yaml'           # './data_crossdocked/test.yaml'\n",
    "ori_vina_path = '/home/nic/Code/HGNN-GPT/GPT-last-new-2/crossdocked/dock_result2/pocket_vina.csv'\n",
    "json_file4='../dock_file_save/crossdocked/2025_01_12_16_1743065493/dock_result/dock_dict.json'\n",
    "json_file3='../dock_file_save/crossdocked/2025_01_05_20_1741635931/dock_result/dock_dict.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(pocket_path, 'r') as f:\n",
    "    pocket_dict = yaml.full_load(f)\n",
    "pocket_names=list(pocket_dict.keys())\n",
    "\n",
    "\n",
    "ori_vina = {}\n",
    "with open(ori_vina_path, 'r') as file:\n",
    "    csv_reader = csv.DictReader(file)\n",
    "    for row in csv_reader:\n",
    "        ligand_name = row['pocket_name']\n",
    "        affinity = float(row['affinity'])\n",
    "        ori_vina[ligand_name] = affinity\n",
    "\n",
    "with open(json_file3, 'r') as f:\n",
    "    dock_data3 = json.load(f)\n",
    "\n",
    "with open(json_file4, 'r') as f:\n",
    "    dock_data4 = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_top_10(one_pocket_name,dock_data):\n",
    "    pass\n",
    "    affinity_values = {}\n",
    "    for key, values in dock_data.items():\n",
    "        for record in values:\n",
    "            if record.get('mode_id') == 0:\n",
    "                affinity_values[key] = record.get('affinity', None)\n",
    "                break\n",
    "    \n",
    "    pocket_dock_values={}\n",
    "    for key,value in affinity_values.items():\n",
    "        pocket_name = \"_\".join(key.split(\"_\")[:-1])\n",
    "        if pocket_name not in pocket_dock_values:\n",
    "            pocket_dock_values[pocket_name]=[]\n",
    "        pocket_dock_values[pocket_name].append((key,value))\n",
    "\n",
    "    one_pocket_affinity = pocket_dock_values[one_pocket_name]\n",
    "    top_10_values = sorted(one_pocket_affinity, key=lambda x: x[1])[:10]\n",
    "    # top_10_affinities = [value for _, value in top_10_values]\n",
    "    # return top_10_affinities\n",
    "    return top_10_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# one_pocket_name='1phk_A_rec_1phk_atp_lig_tt_min_0_pocket10'\n",
    "# one_pocket_name='3tym_A_rec_3n5v_xfh_lig_tt_min_0_pocket10'\n",
    "# one_pocket_name='5liu_X_rec_4gq0_qap_lig_tt_min_0_pocket10'\n",
    "# one_pocket_name='3nfb_A_rec_3nfb_oae_lig_tt_docked_2_pocket10' \n",
    "# one_pocket_name='4azf_A_rec_5lxc_7aa_lig_tt_min_0_pocket10'   \n",
    "# one_pocket_name='14gs_A_rec_20gs_cbd_lig_tt_min_0_pocket10'\n",
    "\n",
    "one_pocket_name='1ai4_A_rec_1ai5_mnp_lig_tt_docked_0_pocket10'\n",
    "one_pocket_name_path='PAC_ECOLX_27_846_0/1ai4_A_rec_1ai5_mnp_lig_tt_docked_0_pocket10.pdb'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "top_10_values_3=get_top_10(one_pocket_name,dock_data3)\n",
    "print(top_10_values_3)\n",
    "\n",
    "top_10_values_4=get_top_10(one_pocket_name,dock_data4)\n",
    "print(top_10_values_4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_pdbqt_smiles(pdbqt_path, save_path, smiles_all,top_10_values):\n",
    "    save_path = os.path.join(save_path,'smiles_pdbqt')\n",
    "    if not os.path.exists(save_path):\n",
    "        os.makedirs(save_path)\n",
    "    gen_smiles_top={}\n",
    "    for key,value in top_10_values:\n",
    "        one_pdbqt_path = os.path.join(pdbqt_path,key) + '_out.pdbqt'\n",
    "        destination_file=os.path.join(save_path,key) + '_out.pdbqt'\n",
    "        shutil.copy(one_pdbqt_path, destination_file)\n",
    "        \n",
    "        parts = key.rsplit('_', 1)\n",
    "        smiles_index = int(parts[1])\n",
    "        one_smiles=smiles_all[smiles_index]\n",
    "\n",
    "        gen_smiles_top[key] = {\n",
    "            'smiles': one_smiles,\n",
    "            'affinity_value': value\n",
    "        }\n",
    "    \n",
    "    return gen_smiles_top\n",
    "\n",
    "def get_smi(smiles_yaml):\n",
    "    with open(smiles_yaml, 'r') as f:\n",
    "        config = yaml.full_load(f)\n",
    "    return list(config.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdbqt_path_3='../dock_file_save/crossdocked/2025_01_05_20_1741635931/out_pdbqt/'\n",
    "smiles_yaml_3='../save/pre/crossdocked/char/hgnn/2025_01_05_20/sample_300_30_True_1_1_1741635931/'\n",
    "save_path_3='../best_pocket/chapter_3/'\n",
    "smiles_yaml_3 = os.path.join(smiles_yaml_3, one_pocket_name)+ '_sampled_temp1.yaml'\n",
    "smiles_all_3 = get_smi(smiles_yaml_3)\n",
    "gen_smiles_3 = get_pdbqt_smiles(pdbqt_path_3,save_path_3,smiles_all_3,top_10_values_3)\n",
    "\n",
    "df_results_3 = pd.DataFrame.from_dict(gen_smiles_3, orient='index')\n",
    "\n",
    "csv_file_path = os.path.join(save_path_3,'gen_smiles_3.csv')\n",
    "df_results_3.to_csv(csv_file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdbqt_path_4='../dock_file_save/crossdocked/2025_01_12_16_1743065493/out_pdbqt/'\n",
    "smiles_yaml_4='../save/pre/crossdocked/char/all/2025_01_12_16/sample_300_30_True_1_1_1743065493/'\n",
    "save_path_4='../best_pocket/chapter_4/'\n",
    "smiles_yaml_4 = os.path.join(smiles_yaml_4, one_pocket_name)+ '_sampled_temp1.yaml'\n",
    "smiles_all_4 = get_smi(smiles_yaml_4)\n",
    "gen_smiles_4 = get_pdbqt_smiles(pdbqt_path_4,save_path_4,smiles_all_4,top_10_values_4)\n",
    "\n",
    "df_results_4 = pd.DataFrame.from_dict(gen_smiles_4, orient='index')\n",
    "\n",
    "csv_file_path_4 = os.path.join(save_path_4,'gen_smiles_4.csv')\n",
    "df_results_4.to_csv(csv_file_path_4)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "HGNN-GPT",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
