{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "726c185059d7b877",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-05-07T13:30:52.929610Z",
     "start_time": "2025-05-07T13:30:52.925920Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "pd.options.display.float_format = '{:.3f}'.format\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d2e6d90bef1748f6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-05-07T13:33:12.783357Z",
     "start_time": "2025-05-07T13:33:12.777486Z"
    }
   },
   "outputs": [],
   "source": [
    "generative_datasets = {\n",
    "    'amp-gan-samples': 'AMP-GAN',\n",
    "    'amp-diffusion': 'AMP-Diffusion',\n",
    "    'diff-amp-samples': 'Diff-AMP',\n",
    "    'hydramp-generated-samples': 'HydrAMP',\n",
    "    'numeric-generated-samples': 'OmegAMP w/Numeric',\n",
    "    'one-hot-generated-samples': 'OmegAMP w/OneHot',\n",
    "    'all-scales-but-pI-generated-samples': 'OmegAMP - {charge scale}',\n",
    "    'all-scales-but-WW-generated-samples': 'OmegAMP - {hydrophobicity scale}',\n",
    "    'omegamp-generated-samples': 'OmegAMP Unconditional',\n",
    "    'subset-hq-conditional-samples': 'OmegAMP-SC',\n",
    "    'curated-AMPs': 'HQ AMPs',\n",
    "    'curated-Non-AMPs': 'HQ non-AMPs',\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4576c0da-a650-41b1-8e07-28f274014d52",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-05-07T13:33:13.948479Z",
     "start_time": "2025-05-07T13:33:12.925779Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "results_dir = '../results/benchmark-generative-models'\n",
    "generative_scores = {}\n",
    "for dataset, dataset_name in generative_datasets.items():\n",
    "    generative_scores[dataset_name] = {}\n",
    "    for model, model_name in zip(\n",
    "            ['ampeppy', 'hydramp-mic-classifier', 'omegamp'],\n",
    "            ['amPEPpy', 'HydrAMP-mic-classifier', 'OmegAMP']\n",
    "    ):\n",
    "        try:\n",
    "            df = pd.read_csv(f'{results_dir}/{dataset}/{model}.tsv', sep='\\t')\n",
    "            df['Prediction'] = df['Probability_score'].apply(lambda x: 'AMP' if x >= 0.5 else 'non-AMP')\n",
    "\n",
    "        except:\n",
    "            df = pd.read_csv(f'{results_dir}/{dataset}/{model}.csv')\n",
    "            df['Prediction'] = df['Prediction'].apply(lambda x: 'AMP' if x >= 0.5 else 'non-AMP')\n",
    "\n",
    "        generative_scores[dataset_name][model_name] = np.sum(df[f'Prediction'] == 'AMP') / len(df) * 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4d0b70bf460c92fc",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-05-07T13:33:13.964943Z",
     "start_time": "2025-05-07T13:33:13.957231Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amPEPpy</th>\n",
       "      <th>HydrAMP-mic-classifier</th>\n",
       "      <th>OmegAMP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AMP-GAN</th>\n",
       "      <td>50.055</td>\n",
       "      <td>31.593</td>\n",
       "      <td>0.301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AMP-Diffusion</th>\n",
       "      <td>27.020</td>\n",
       "      <td>42.831</td>\n",
       "      <td>2.192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Diff-AMP</th>\n",
       "      <td>50.376</td>\n",
       "      <td>27.820</td>\n",
       "      <td>0.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HydrAMP</th>\n",
       "      <td>56.500</td>\n",
       "      <td>44.140</td>\n",
       "      <td>0.024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OmegAMP w/Numeric</th>\n",
       "      <td>64.070</td>\n",
       "      <td>29.470</td>\n",
       "      <td>2.545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OmegAMP w/OneHot</th>\n",
       "      <td>61.386</td>\n",
       "      <td>32.647</td>\n",
       "      <td>6.624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OmegAMP - {charge scale}</th>\n",
       "      <td>60.965</td>\n",
       "      <td>32.041</td>\n",
       "      <td>8.433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OmegAMP - {hydrophobicity scale}</th>\n",
       "      <td>63.540</td>\n",
       "      <td>35.423</td>\n",
       "      <td>9.564</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OmegAMP Unconditional</th>\n",
       "      <td>62.735</td>\n",
       "      <td>33.807</td>\n",
       "      <td>10.451</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OmegAMP-SC</th>\n",
       "      <td>86.860</td>\n",
       "      <td>64.090</td>\n",
       "      <td>16.402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HQ AMPs</th>\n",
       "      <td>94.167</td>\n",
       "      <td>81.513</td>\n",
       "      <td>42.385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HQ non-AMPs</th>\n",
       "      <td>77.108</td>\n",
       "      <td>20.474</td>\n",
       "      <td>6.304</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  amPEPpy  HydrAMP-mic-classifier  OmegAMP\n",
       "AMP-GAN                            50.055                  31.593    0.301\n",
       "AMP-Diffusion                      27.020                  42.831    2.192\n",
       "Diff-AMP                           50.376                  27.820    0.000\n",
       "HydrAMP                            56.500                  44.140    0.024\n",
       "OmegAMP w/Numeric                  64.070                  29.470    2.545\n",
       "OmegAMP w/OneHot                   61.386                  32.647    6.624\n",
       "OmegAMP - {charge scale}           60.965                  32.041    8.433\n",
       "OmegAMP - {hydrophobicity scale}   63.540                  35.423    9.564\n",
       "OmegAMP Unconditional              62.735                  33.807   10.451\n",
       "OmegAMP-SC                         86.860                  64.090   16.402\n",
       "HQ AMPs                            94.167                  81.513   42.385\n",
       "HQ non-AMPs                        77.108                  20.474    6.304"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generative_scores = pd.DataFrame(generative_scores).T\n",
    "generative_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "33c3535ea3b13d1",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-05-07T13:33:14.055882Z",
     "start_time": "2025-05-07T13:33:14.051200Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\\\begin{tabular}{lrrr}\\n\\\\toprule\\n & amPEPpy & HydrAMP-mic-classifier & OmegAMP \\\\\\\\\\n\\\\midrule\\nAMP-GAN & 50.055 & 31.593 & 0.301 \\\\\\\\\\nAMP-Diffusion & 27.020 & 42.831 & 2.192 \\\\\\\\\\nDiff-AMP & 50.376 & 27.820 & 0.000 \\\\\\\\\\nHydrAMP & 56.500 & 44.140 & 0.024 \\\\\\\\\\nOmegAMP w/Numeric & 64.070 & 29.470 & 2.545 \\\\\\\\\\nOmegAMP w/OneHot & 61.386 & 32.647 & 6.624 \\\\\\\\\\nOmegAMP - {charge scale} & 60.965 & 32.041 & 8.433 \\\\\\\\\\nOmegAMP - {hydrophobicity scale} & 63.540 & 35.423 & 9.564 \\\\\\\\\\nOmegAMP Unconditional & 62.735 & 33.807 & 10.451 \\\\\\\\\\nOmegAMP-SC & 86.860 & 64.090 & 16.402 \\\\\\\\\\nHQ AMPs & 94.167 & 81.513 & 42.385 \\\\\\\\\\nHQ non-AMPs & 77.108 & 20.474 & 6.304 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generative_scores.to_latex(float_format=\"{:.3f}\".format,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3fe80d4-d523-46e8-a909-834f2cdb9a5e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
