{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import collections\n",
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%config InlineBackend.figure_format='retina'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>protein</th>\n",
       "      <th>sequence</th>\n",
       "      <th>entropy_sum</th>\n",
       "      <th>epi_start_pos</th>\n",
       "      <th>epi_len</th>\n",
       "      <th>glyco_probs</th>\n",
       "      <th>crosses_cleavage</th>\n",
       "      <th>sequence_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>E</td>\n",
       "      <td>MYSFVSEE</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>E</td>\n",
       "      <td>YSFVSEET</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>E</td>\n",
       "      <td>SFVSEETG</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>E</td>\n",
       "      <td>FVSEETGT</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E</td>\n",
       "      <td>VSEETGTL</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174523</th>\n",
       "      <td>S2</td>\n",
       "      <td>KGCCSCGSCCKFDEDDSEPVLKGVK</td>\n",
       "      <td>0.095264</td>\n",
       "      <td>1244</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174524</th>\n",
       "      <td>S2</td>\n",
       "      <td>GCCSCGSCCKFDEDDSEPVLKGVKL</td>\n",
       "      <td>0.092869</td>\n",
       "      <td>1245</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174525</th>\n",
       "      <td>S2</td>\n",
       "      <td>CCSCGSCCKFDEDDSEPVLKGVKLH</td>\n",
       "      <td>0.092869</td>\n",
       "      <td>1246</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174526</th>\n",
       "      <td>S2</td>\n",
       "      <td>CSCGSCCKFDEDDSEPVLKGVKLHY</td>\n",
       "      <td>0.086502</td>\n",
       "      <td>1247</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174527</th>\n",
       "      <td>S2</td>\n",
       "      <td>SCGSCCKFDEDDSEPVLKGVKLHYT</td>\n",
       "      <td>0.086502</td>\n",
       "      <td>1248</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>174528 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       protein                   sequence  entropy_sum  epi_start_pos  \\\n",
       "0            E                   MYSFVSEE     0.002395              0   \n",
       "1            E                   YSFVSEET     0.002395              1   \n",
       "2            E                   SFVSEETG     0.002395              2   \n",
       "3            E                   FVSEETGT     0.002395              3   \n",
       "4            E                   VSEETGTL     0.002395              4   \n",
       "...        ...                        ...          ...            ...   \n",
       "174523      S2  KGCCSCGSCCKFDEDDSEPVLKGVK     0.095264           1244   \n",
       "174524      S2  GCCSCGSCCKFDEDDSEPVLKGVKL     0.092869           1245   \n",
       "174525      S2  CCSCGSCCKFDEDDSEPVLKGVKLH     0.092869           1246   \n",
       "174526      S2  CSCGSCCKFDEDDSEPVLKGVKLHY     0.086502           1247   \n",
       "174527      S2  SCGSCCKFDEDDSEPVLKGVKLHYT     0.086502           1248   \n",
       "\n",
       "        epi_len  glyco_probs  crosses_cleavage  sequence_length  \n",
       "0             8          0.0                 0                8  \n",
       "1             8          0.0                 0                8  \n",
       "2             8          0.0                 0                8  \n",
       "3             8          0.0                 0                8  \n",
       "4             8          0.0                 0                8  \n",
       "...         ...          ...               ...              ...  \n",
       "174523       25          0.0                 0               25  \n",
       "174524       25          0.0                 0               25  \n",
       "174525       25          0.0                 0               25  \n",
       "174526       25          0.0                 0               25  \n",
       "174527       25          0.0                 0               25  \n",
       "\n",
       "[174528 rows x 8 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orig_data = pd.read_csv('../AllEpitopeFeatures.csv')\n",
    "orig_data = orig_data.rename(columns={'Epitope': 'sequence'})\n",
    "orig_data['sequence_length'] = [len(x) for x in orig_data['sequence'].values]\n",
    "orig_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>allele</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>HLA-DPA10201-DPB155801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>DRB1_0701</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>HLA-DPA10207-DPB116201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>HLA-DPA10301-DPB15501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>313</th>\n",
       "      <td>HLA-DQA10401-DQB10301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>314</th>\n",
       "      <td>HLA-DPA10103-DPB112601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>315</th>\n",
       "      <td>HLA-DPA10301-DPB16501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>316</th>\n",
       "      <td>HLA-DPA10202-DPB11301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>317</th>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>318 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                     allele\n",
       "0     HLA-DPA10301-DPB11301\n",
       "1    HLA-DPA10201-DPB155801\n",
       "2                 DRB1_0701\n",
       "3    HLA-DPA10207-DPB116201\n",
       "4     HLA-DPA10301-DPB15501\n",
       "..                      ...\n",
       "313   HLA-DQA10401-DQB10301\n",
       "314  HLA-DPA10103-DPB112601\n",
       "315   HLA-DPA10301-DPB16501\n",
       "316   HLA-DPA10202-DPB11301\n",
       "317   HLA-DPA10103-DPB14101\n",
       "\n",
       "[318 rows x 1 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load final set of HLA alleles.\n",
    "hla_alleles = pd.read_csv('MHC2_allele_marry.txt', names=['allele'])\n",
    "hla_alleles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>protein</th>\n",
       "      <th>sequence</th>\n",
       "      <th>entropy_sum</th>\n",
       "      <th>epi_start_pos</th>\n",
       "      <th>epi_len</th>\n",
       "      <th>glyco_probs</th>\n",
       "      <th>crosses_cleavage</th>\n",
       "      <th>sequence_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <td>E</td>\n",
       "      <td>MYSFVSEETGTLI</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>331</th>\n",
       "      <td>E</td>\n",
       "      <td>YSFVSEETGTLIV</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>1</td>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>332</th>\n",
       "      <td>E</td>\n",
       "      <td>SFVSEETGTLIVN</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>2</td>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>E</td>\n",
       "      <td>FVSEETGTLIVNS</td>\n",
       "      <td>0.002395</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>E</td>\n",
       "      <td>VSEETGTLIVNSV</td>\n",
       "      <td>0.004789</td>\n",
       "      <td>4</td>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174523</th>\n",
       "      <td>S2</td>\n",
       "      <td>KGCCSCGSCCKFDEDDSEPVLKGVK</td>\n",
       "      <td>0.095264</td>\n",
       "      <td>1244</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174524</th>\n",
       "      <td>S2</td>\n",
       "      <td>GCCSCGSCCKFDEDDSEPVLKGVKL</td>\n",
       "      <td>0.092869</td>\n",
       "      <td>1245</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174525</th>\n",
       "      <td>S2</td>\n",
       "      <td>CCSCGSCCKFDEDDSEPVLKGVKLH</td>\n",
       "      <td>0.092869</td>\n",
       "      <td>1246</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174526</th>\n",
       "      <td>S2</td>\n",
       "      <td>CSCGSCCKFDEDDSEPVLKGVKLHY</td>\n",
       "      <td>0.086502</td>\n",
       "      <td>1247</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174527</th>\n",
       "      <td>S2</td>\n",
       "      <td>SCGSCCKFDEDDSEPVLKGVKLHYT</td>\n",
       "      <td>0.086502</td>\n",
       "      <td>1248</td>\n",
       "      <td>25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       protein                   sequence  entropy_sum  epi_start_pos  \\\n",
       "330          E              MYSFVSEETGTLI     0.002395              0   \n",
       "331          E              YSFVSEETGTLIV     0.002395              1   \n",
       "332          E              SFVSEETGTLIVN     0.002395              2   \n",
       "333          E              FVSEETGTLIVNS     0.002395              3   \n",
       "334          E              VSEETGTLIVNSV     0.004789              4   \n",
       "...        ...                        ...          ...            ...   \n",
       "174523      S2  KGCCSCGSCCKFDEDDSEPVLKGVK     0.095264           1244   \n",
       "174524      S2  GCCSCGSCCKFDEDDSEPVLKGVKL     0.092869           1245   \n",
       "174525      S2  CCSCGSCCKFDEDDSEPVLKGVKLH     0.092869           1246   \n",
       "174526      S2  CSCGSCCKFDEDDSEPVLKGVKLHY     0.086502           1247   \n",
       "174527      S2  SCGSCCKFDEDDSEPVLKGVKLHYT     0.086502           1248   \n",
       "\n",
       "        epi_len  glyco_probs  crosses_cleavage  sequence_length  \n",
       "330          13          0.0                 0               13  \n",
       "331          13          0.0                 0               13  \n",
       "332          13          0.0                 0               13  \n",
       "333          13          0.0                 0               13  \n",
       "334          13          0.0                 0               13  \n",
       "...         ...          ...               ...              ...  \n",
       "174523       25          0.0                 0               25  \n",
       "174524       25          0.0                 0               25  \n",
       "174525       25          0.0                 0               25  \n",
       "174526       25          0.0                 0               25  \n",
       "174527       25          0.0                 0               25  \n",
       "\n",
       "[125593 rows x 8 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Filter MHC-II sequences to epitopes with sequence length 13-25 (inclusive).\n",
    "mhc2_data = orig_data.loc[orig_data['sequence_length'].isin(range(13,26))]\n",
    "mhc2_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Write peptides out (unpaired with MHC) for NetMHCpan.\n",
    "mhc2_data[['sequence']].to_csv('peptides_13-25.pep', index=False, header=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create commands for running NetMHCIIpan-3.2 and NetMHCIIpan-4.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# cmds:  318\n"
     ]
    }
   ],
   "source": [
    "# Create commands for running NetMHCIIpan3.2 (MHC-II).\n",
    "cmd_template = '-inptype 1 -f peptides_13-25.pep -a {allele} -xls -xlsfile {allele_file}'\n",
    "cmds = []\n",
    "for allele in hla_alleles['allele'].values:\n",
    "    cmd = cmd_template.format(\n",
    "        allele=allele.replace('*', '_').replace(':', ''),\n",
    "        allele_file='netmhcii-3.2_preds/%s_preds.xls' % (allele.replace('*', '_').replace(':', ''))\n",
    "    )\n",
    "    cmds.append(cmd)\n",
    "print('# cmds: ', len(cmds))\n",
    "with open('netmhc_class2_args.txt', 'w') as f:\n",
    "    for cmd in cmds:\n",
    "        f.write(cmd+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# cmds:  318\n"
     ]
    }
   ],
   "source": [
    "# Create commands for running NetMHCIIpan4.0 (MHC-II).\n",
    "cmd_template = '-inptype 1 -f peptides_13-25.pep -a {allele} -BA -xls -xlsfile {allele_file}'\n",
    "cmds = []\n",
    "for allele in hla_alleles['allele'].values:\n",
    "    cmd = cmd_template.format(\n",
    "        allele=allele.replace('*', '_').replace(':', ''),\n",
    "        allele_file='netmhcii-4.0_preds/%s_preds.xls' % (allele.replace('*', '_').replace(':', ''))\n",
    "    )\n",
    "    cmds.append(cmd)\n",
    "print('# cmds: ', len(cmds))\n",
    "with open('netmhc4.0_class2_args.txt', 'w') as f:\n",
    "    for cmd in cmds:\n",
    "        f.write(cmd+'\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Command to run:\n",
    "\n",
    "```\n",
    "cat netmhc_class2_args.txt | xargs -P 70 -d '\\n' -n 1 ./netMHCIIpan-3.2/netMHCIIpan\n",
    "\n",
    "cat netmhc4.0_class2_args.txt | xargs -P 70 -d '\\n' -n 1 ./netMHCIIpan-4.0/netMHCIIpan\n",
    "``````"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load NetMHCIIpan4.0 predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 280 alleles\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Peptide</th>\n",
       "      <th>Target</th>\n",
       "      <th>Score</th>\n",
       "      <th>Rank</th>\n",
       "      <th>Score_BA</th>\n",
       "      <th>nM</th>\n",
       "      <th>Rank_BA</th>\n",
       "      <th>genotype</th>\n",
       "      <th>sequence_length</th>\n",
       "      <th>loci</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>MYSFVSEETGTLI</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.041539</td>\n",
       "      <td>35.55</td>\n",
       "      <td>0.246930</td>\n",
       "      <td>3456.633552</td>\n",
       "      <td>50.21</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>YSFVSEETGTLIV</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.038426</td>\n",
       "      <td>39.10</td>\n",
       "      <td>0.240716</td>\n",
       "      <td>3697.028008</td>\n",
       "      <td>52.48</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SFVSEETGTLIVN</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.028666</td>\n",
       "      <td>53.15</td>\n",
       "      <td>0.212631</td>\n",
       "      <td>5009.833741</td>\n",
       "      <td>62.80</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>FVSEETGTLIVNS</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.036350</td>\n",
       "      <td>41.72</td>\n",
       "      <td>0.206312</td>\n",
       "      <td>5364.337635</td>\n",
       "      <td>65.16</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>VSEETGTLIVNSV</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.043983</td>\n",
       "      <td>32.96</td>\n",
       "      <td>0.200067</td>\n",
       "      <td>5739.329682</td>\n",
       "      <td>67.36</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125588</th>\n",
       "      <td>KGCCSCGSCCKFDEDDSEPVLKGVK</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.002639</td>\n",
       "      <td>91.75</td>\n",
       "      <td>0.168159</td>\n",
       "      <td>8105.800438</td>\n",
       "      <td>79.10</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125589</th>\n",
       "      <td>GCCSCGSCCKFDEDDSEPVLKGVKL</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.003358</td>\n",
       "      <td>88.46</td>\n",
       "      <td>0.173627</td>\n",
       "      <td>7640.150953</td>\n",
       "      <td>77.25</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125590</th>\n",
       "      <td>CCSCGSCCKFDEDDSEPVLKGVKLH</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.003350</td>\n",
       "      <td>88.49</td>\n",
       "      <td>0.174621</td>\n",
       "      <td>7558.422479</td>\n",
       "      <td>76.91</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125591</th>\n",
       "      <td>CSCGSCCKFDEDDSEPVLKGVKLHY</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.003275</td>\n",
       "      <td>88.85</td>\n",
       "      <td>0.214361</td>\n",
       "      <td>4916.930790</td>\n",
       "      <td>62.79</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125592</th>\n",
       "      <td>SCGSCCKFDEDDSEPVLKGVKLHYT</td>\n",
       "      <td>-99.999</td>\n",
       "      <td>0.008928</td>\n",
       "      <td>64.51</td>\n",
       "      <td>0.328971</td>\n",
       "      <td>1422.804215</td>\n",
       "      <td>24.79</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>35166040 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          Peptide  Target     Score   Rank  Score_BA  \\\n",
       "0                   MYSFVSEETGTLI -99.999  0.041539  35.55  0.246930   \n",
       "1                   YSFVSEETGTLIV -99.999  0.038426  39.10  0.240716   \n",
       "2                   SFVSEETGTLIVN -99.999  0.028666  53.15  0.212631   \n",
       "3                   FVSEETGTLIVNS -99.999  0.036350  41.72  0.206312   \n",
       "4                   VSEETGTLIVNSV -99.999  0.043983  32.96  0.200067   \n",
       "...                           ...     ...       ...    ...       ...   \n",
       "125588  KGCCSCGSCCKFDEDDSEPVLKGVK -99.999  0.002639  91.75  0.168159   \n",
       "125589  GCCSCGSCCKFDEDDSEPVLKGVKL -99.999  0.003358  88.46  0.173627   \n",
       "125590  CCSCGSCCKFDEDDSEPVLKGVKLH -99.999  0.003350  88.49  0.174621   \n",
       "125591  CSCGSCCKFDEDDSEPVLKGVKLHY -99.999  0.003275  88.85  0.214361   \n",
       "125592  SCGSCCKFDEDDSEPVLKGVKLHYT -99.999  0.008928  64.51  0.328971   \n",
       "\n",
       "                 nM  Rank_BA               genotype  sequence_length    loci  \n",
       "0       3456.633552    50.21  HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "1       3697.028008    52.48  HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "2       5009.833741    62.80  HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "3       5364.337635    65.16  HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "4       5739.329682    67.36  HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "...             ...      ...                    ...              ...     ...  \n",
       "125588  8105.800438    79.10  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125589  7640.150953    77.25  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125590  7558.422479    76.91  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125591  4916.930790    62.79  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125592  1422.804215    24.79  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "\n",
       "[35166040 rows x 10 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfs = []\n",
    "for allele in hla_alleles['allele'].values:\n",
    "    try:\n",
    "        df = pd.read_csv(\n",
    "            './netmhcii-4.0_preds/%s_preds.xls' % allele.replace(':', ''),\n",
    "            delimiter='\\t',\n",
    "            skiprows=[0],\n",
    "        )\n",
    "    except:\n",
    "        continue\n",
    "    df['genotype'] = allele\n",
    "    df = df.drop(columns=['Pos', 'ID', 'Ave', 'NB'])\n",
    "    dfs.append(df)\n",
    "print('Loaded %d alleles' % len(dfs))\n",
    "netmhc2_data = pd.concat(dfs)\n",
    "netmhc2_data['sequence_length'] = [len(x) for x in netmhc2_data['Peptide'].values]\n",
    "netmhc2_data['loci'] = [x[:4] if x[:3] == 'DRB' else x[:6] for x in netmhc2_data['genotype'].values]\n",
    "# netmhc2_data['1-log50k'] = 1 - np.log(netmhc2_data['nM']) / np.log(50000)\n",
    "netmhc2_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.262275</td>\n",
       "      <td>0.210513</td>\n",
       "      <td>0.162170</td>\n",
       "      <td>0.138032</td>\n",
       "      <td>0.064328</td>\n",
       "      <td>0.200257</td>\n",
       "      <td>0.182535</td>\n",
       "      <td>0.152988</td>\n",
       "      <td>0.196000</td>\n",
       "      <td>0.237120</td>\n",
       "      <td>...</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.297920</td>\n",
       "      <td>0.264890</td>\n",
       "      <td>0.205538</td>\n",
       "      <td>0.227213</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.219885</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.435325</td>\n",
       "      <td>0.357479</td>\n",
       "      <td>0.232249</td>\n",
       "      <td>0.188956</td>\n",
       "      <td>0.081843</td>\n",
       "      <td>0.289715</td>\n",
       "      <td>0.248274</td>\n",
       "      <td>0.208763</td>\n",
       "      <td>0.302934</td>\n",
       "      <td>0.341891</td>\n",
       "      <td>...</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.435867</td>\n",
       "      <td>0.339701</td>\n",
       "      <td>0.268764</td>\n",
       "      <td>0.341743</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.294761</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.620743</td>\n",
       "      <td>0.533288</td>\n",
       "      <td>0.350694</td>\n",
       "      <td>0.281340</td>\n",
       "      <td>0.119930</td>\n",
       "      <td>0.388640</td>\n",
       "      <td>0.326797</td>\n",
       "      <td>0.272157</td>\n",
       "      <td>0.420422</td>\n",
       "      <td>0.449729</td>\n",
       "      <td>...</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.512765</td>\n",
       "      <td>0.384206</td>\n",
       "      <td>0.308803</td>\n",
       "      <td>0.387634</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.326042</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.603934</td>\n",
       "      <td>0.471715</td>\n",
       "      <td>0.317015</td>\n",
       "      <td>0.240276</td>\n",
       "      <td>0.107944</td>\n",
       "      <td>0.385170</td>\n",
       "      <td>0.287178</td>\n",
       "      <td>0.258092</td>\n",
       "      <td>0.384697</td>\n",
       "      <td>0.441300</td>\n",
       "      <td>...</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.490339</td>\n",
       "      <td>0.372928</td>\n",
       "      <td>0.299992</td>\n",
       "      <td>0.376569</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.327447</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.669816</td>\n",
       "      <td>0.519657</td>\n",
       "      <td>0.374913</td>\n",
       "      <td>0.280090</td>\n",
       "      <td>0.120530</td>\n",
       "      <td>0.433591</td>\n",
       "      <td>0.309963</td>\n",
       "      <td>0.284692</td>\n",
       "      <td>0.428667</td>\n",
       "      <td>0.472981</td>\n",
       "      <td>...</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.482520</td>\n",
       "      <td>0.379043</td>\n",
       "      <td>0.295066</td>\n",
       "      <td>0.355889</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.307027</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.399611</td>\n",
       "      <td>0.273032</td>\n",
       "      <td>0.203689</td>\n",
       "      <td>0.191331</td>\n",
       "      <td>0.094848</td>\n",
       "      <td>0.360642</td>\n",
       "      <td>0.217532</td>\n",
       "      <td>0.232871</td>\n",
       "      <td>0.344261</td>\n",
       "      <td>0.421699</td>\n",
       "      <td>...</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.307903</td>\n",
       "      <td>0.269684</td>\n",
       "      <td>0.211645</td>\n",
       "      <td>0.291034</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.277248</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.400962</td>\n",
       "      <td>0.277270</td>\n",
       "      <td>0.206981</td>\n",
       "      <td>0.195065</td>\n",
       "      <td>0.095416</td>\n",
       "      <td>0.363595</td>\n",
       "      <td>0.220804</td>\n",
       "      <td>0.233858</td>\n",
       "      <td>0.347686</td>\n",
       "      <td>0.422885</td>\n",
       "      <td>...</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.311268</td>\n",
       "      <td>0.270227</td>\n",
       "      <td>0.212058</td>\n",
       "      <td>0.295712</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.281802</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.402886</td>\n",
       "      <td>0.282106</td>\n",
       "      <td>0.209602</td>\n",
       "      <td>0.198191</td>\n",
       "      <td>0.096931</td>\n",
       "      <td>0.365776</td>\n",
       "      <td>0.222661</td>\n",
       "      <td>0.234946</td>\n",
       "      <td>0.350836</td>\n",
       "      <td>0.424360</td>\n",
       "      <td>...</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.318901</td>\n",
       "      <td>0.272231</td>\n",
       "      <td>0.215326</td>\n",
       "      <td>0.307148</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.296308</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.405321</td>\n",
       "      <td>0.287790</td>\n",
       "      <td>0.211981</td>\n",
       "      <td>0.201682</td>\n",
       "      <td>0.097617</td>\n",
       "      <td>0.369570</td>\n",
       "      <td>0.224174</td>\n",
       "      <td>0.236694</td>\n",
       "      <td>0.353541</td>\n",
       "      <td>0.425438</td>\n",
       "      <td>...</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.327946</td>\n",
       "      <td>0.275950</td>\n",
       "      <td>0.218148</td>\n",
       "      <td>0.319855</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.306854</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.409240</td>\n",
       "      <td>0.294411</td>\n",
       "      <td>0.214682</td>\n",
       "      <td>0.205183</td>\n",
       "      <td>0.098597</td>\n",
       "      <td>0.372695</td>\n",
       "      <td>0.226051</td>\n",
       "      <td>0.238848</td>\n",
       "      <td>0.356386</td>\n",
       "      <td>0.426486</td>\n",
       "      <td>...</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.336319</td>\n",
       "      <td>0.279893</td>\n",
       "      <td>0.219695</td>\n",
       "      <td>0.328877</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.317379</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.262275  0.210513  0.162170  0.138032  0.064328   \n",
       "AAAYYVGYLQPRTF             0.435325  0.357479  0.232249  0.188956  0.081843   \n",
       "AAAYYVGYLQPRTFL            0.620743  0.533288  0.350694  0.281340  0.119930   \n",
       "AAAYYVGYLQPRTFLL           0.603934  0.471715  0.317015  0.240276  0.107944   \n",
       "AAAYYVGYLQPRTFLLK          0.669816  0.519657  0.374913  0.280090  0.120530   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.399611  0.273032  0.203689  0.191331  0.094848   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.400962  0.277270  0.206981  0.195065  0.095416   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.402886  0.282106  0.209602  0.198191  0.096931   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.405321  0.287790  0.211981  0.201682  0.097617   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.409240  0.294411  0.214682  0.205183  0.098597   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.200257  0.182535  0.152988  0.196000  0.237120   \n",
       "AAAYYVGYLQPRTF             0.289715  0.248274  0.208763  0.302934  0.341891   \n",
       "AAAYYVGYLQPRTFL            0.388640  0.326797  0.272157  0.420422  0.449729   \n",
       "AAAYYVGYLQPRTFLL           0.385170  0.287178  0.258092  0.384697  0.441300   \n",
       "AAAYYVGYLQPRTFLLK          0.433591  0.309963  0.284692  0.428667  0.472981   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.360642  0.217532  0.232871  0.344261  0.421699   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.363595  0.220804  0.233858  0.347686  0.422885   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.365776  0.222661  0.234946  0.350836  0.424360   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.369570  0.224174  0.236694  0.353541  0.425438   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.372695  0.226051  0.238848  0.356386  0.426486   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...              0.249702              0.249702   \n",
       "AAAYYVGYLQPRTF             ...              0.333322              0.333322   \n",
       "AAAYYVGYLQPRTFL            ...              0.383470              0.383470   \n",
       "AAAYYVGYLQPRTFLL           ...              0.374134              0.374134   \n",
       "AAAYYVGYLQPRTFLLK          ...              0.354784              0.354784   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...              0.285683              0.285683   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...              0.292342              0.292342   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...              0.306004              0.306004   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...              0.319624              0.319624   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...              0.333877              0.333877   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.297920              0.264890   \n",
       "AAAYYVGYLQPRTF                         0.435867              0.339701   \n",
       "AAAYYVGYLQPRTFL                        0.512765              0.384206   \n",
       "AAAYYVGYLQPRTFLL                       0.490339              0.372928   \n",
       "AAAYYVGYLQPRTFLLK                      0.482520              0.379043   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.307903              0.269684   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.311268              0.270227   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.318901              0.272231   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.327946              0.275950   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.336319              0.279893   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.205538              0.227213   \n",
       "AAAYYVGYLQPRTF                         0.268764              0.341743   \n",
       "AAAYYVGYLQPRTFL                        0.308803              0.387634   \n",
       "AAAYYVGYLQPRTFLL                       0.299992              0.376569   \n",
       "AAAYYVGYLQPRTFLLK                      0.295066              0.355889   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.211645              0.291034   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.212058              0.295712   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.215326              0.307148   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.218148              0.319855   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.219695              0.328877   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.249702              0.249702   \n",
       "AAAYYVGYLQPRTF                         0.333322              0.333322   \n",
       "AAAYYVGYLQPRTFL                        0.383470              0.383470   \n",
       "AAAYYVGYLQPRTFLL                       0.374134              0.374134   \n",
       "AAAYYVGYLQPRTFLLK                      0.354784              0.354784   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.285683              0.285683   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.292342              0.292342   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.306004              0.306004   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.319624              0.319624   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.333877              0.333877   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                          0.219885     0.0  \n",
       "AAAYYVGYLQPRTF                         0.294761     0.0  \n",
       "AAAYYVGYLQPRTFL                        0.326042     0.0  \n",
       "AAAYYVGYLQPRTFLL                       0.327447     0.0  \n",
       "AAAYYVGYLQPRTFLLK                      0.307027     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.277248     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.281802     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.296308     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.306854     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.317379     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Write predicted binding affinity predictions.\n",
    "\n",
    "df = netmhc2_data\n",
    "\n",
    "df2 = df.groupby(['Peptide', 'loci']).count().reset_index()[['Peptide', 'loci']]\n",
    "df2['genotype'] = 'unknown'\n",
    "df2['Score'] = 0.\n",
    "df2['Rank'] = 0.\n",
    "df2['Score_BA'] = 0.\n",
    "df2['Rank_BA'] = 0.\n",
    "df2['nM'] = 0.\n",
    "\n",
    "data_pivot = pd.concat([df, df2], sort=False).pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='Score_BA',\n",
    ")\n",
    "data_pivot.to_pickle('mhc2_haplotype_netmhcii-4.0_pred_affinity_pivot_v1v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10302</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>49.55</td>\n",
       "      <td>42.30</td>\n",
       "      <td>58.67</td>\n",
       "      <td>63.13</td>\n",
       "      <td>73.96</td>\n",
       "      <td>47.28</td>\n",
       "      <td>53.43</td>\n",
       "      <td>47.67</td>\n",
       "      <td>30.49</td>\n",
       "      <td>45.41</td>\n",
       "      <td>...</td>\n",
       "      <td>40.69</td>\n",
       "      <td>84.33</td>\n",
       "      <td>84.33</td>\n",
       "      <td>72.33</td>\n",
       "      <td>54.93</td>\n",
       "      <td>81.42</td>\n",
       "      <td>58.50</td>\n",
       "      <td>84.33</td>\n",
       "      <td>84.33</td>\n",
       "      <td>78.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>56.91</td>\n",
       "      <td>59.24</td>\n",
       "      <td>69.37</td>\n",
       "      <td>62.94</td>\n",
       "      <td>75.26</td>\n",
       "      <td>48.72</td>\n",
       "      <td>68.74</td>\n",
       "      <td>50.66</td>\n",
       "      <td>38.28</td>\n",
       "      <td>55.17</td>\n",
       "      <td>...</td>\n",
       "      <td>50.49</td>\n",
       "      <td>82.80</td>\n",
       "      <td>82.80</td>\n",
       "      <td>78.55</td>\n",
       "      <td>63.17</td>\n",
       "      <td>82.94</td>\n",
       "      <td>62.70</td>\n",
       "      <td>82.80</td>\n",
       "      <td>82.80</td>\n",
       "      <td>78.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>58.92</td>\n",
       "      <td>73.44</td>\n",
       "      <td>71.54</td>\n",
       "      <td>58.67</td>\n",
       "      <td>73.21</td>\n",
       "      <td>40.21</td>\n",
       "      <td>65.65</td>\n",
       "      <td>42.11</td>\n",
       "      <td>29.50</td>\n",
       "      <td>47.17</td>\n",
       "      <td>...</td>\n",
       "      <td>41.01</td>\n",
       "      <td>75.16</td>\n",
       "      <td>75.16</td>\n",
       "      <td>68.35</td>\n",
       "      <td>62.58</td>\n",
       "      <td>73.86</td>\n",
       "      <td>50.36</td>\n",
       "      <td>75.16</td>\n",
       "      <td>75.16</td>\n",
       "      <td>70.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>51.01</td>\n",
       "      <td>67.06</td>\n",
       "      <td>64.39</td>\n",
       "      <td>50.77</td>\n",
       "      <td>59.00</td>\n",
       "      <td>27.82</td>\n",
       "      <td>53.27</td>\n",
       "      <td>30.22</td>\n",
       "      <td>21.77</td>\n",
       "      <td>37.50</td>\n",
       "      <td>...</td>\n",
       "      <td>35.00</td>\n",
       "      <td>64.62</td>\n",
       "      <td>64.62</td>\n",
       "      <td>52.23</td>\n",
       "      <td>45.77</td>\n",
       "      <td>53.66</td>\n",
       "      <td>39.54</td>\n",
       "      <td>64.62</td>\n",
       "      <td>64.62</td>\n",
       "      <td>58.08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>69.09</td>\n",
       "      <td>74.68</td>\n",
       "      <td>74.51</td>\n",
       "      <td>41.10</td>\n",
       "      <td>49.39</td>\n",
       "      <td>42.60</td>\n",
       "      <td>46.21</td>\n",
       "      <td>30.74</td>\n",
       "      <td>23.76</td>\n",
       "      <td>33.08</td>\n",
       "      <td>...</td>\n",
       "      <td>31.49</td>\n",
       "      <td>52.17</td>\n",
       "      <td>52.17</td>\n",
       "      <td>39.41</td>\n",
       "      <td>41.95</td>\n",
       "      <td>36.83</td>\n",
       "      <td>33.72</td>\n",
       "      <td>52.17</td>\n",
       "      <td>52.17</td>\n",
       "      <td>48.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>5.35</td>\n",
       "      <td>5.00</td>\n",
       "      <td>10.74</td>\n",
       "      <td>5.24</td>\n",
       "      <td>8.20</td>\n",
       "      <td>41.29</td>\n",
       "      <td>17.81</td>\n",
       "      <td>25.87</td>\n",
       "      <td>24.18</td>\n",
       "      <td>56.19</td>\n",
       "      <td>...</td>\n",
       "      <td>6.48</td>\n",
       "      <td>5.07</td>\n",
       "      <td>5.07</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.81</td>\n",
       "      <td>10.04</td>\n",
       "      <td>5.07</td>\n",
       "      <td>5.07</td>\n",
       "      <td>5.16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>5.78</td>\n",
       "      <td>5.00</td>\n",
       "      <td>11.53</td>\n",
       "      <td>5.13</td>\n",
       "      <td>8.50</td>\n",
       "      <td>42.73</td>\n",
       "      <td>18.43</td>\n",
       "      <td>26.91</td>\n",
       "      <td>25.46</td>\n",
       "      <td>57.03</td>\n",
       "      <td>...</td>\n",
       "      <td>6.38</td>\n",
       "      <td>5.03</td>\n",
       "      <td>5.03</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.18</td>\n",
       "      <td>9.29</td>\n",
       "      <td>5.03</td>\n",
       "      <td>5.03</td>\n",
       "      <td>5.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>6.27</td>\n",
       "      <td>5.42</td>\n",
       "      <td>12.38</td>\n",
       "      <td>5.42</td>\n",
       "      <td>8.80</td>\n",
       "      <td>43.94</td>\n",
       "      <td>19.29</td>\n",
       "      <td>27.82</td>\n",
       "      <td>26.36</td>\n",
       "      <td>57.68</td>\n",
       "      <td>...</td>\n",
       "      <td>6.83</td>\n",
       "      <td>5.23</td>\n",
       "      <td>5.23</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.35</td>\n",
       "      <td>9.49</td>\n",
       "      <td>5.23</td>\n",
       "      <td>5.23</td>\n",
       "      <td>5.35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>6.73</td>\n",
       "      <td>5.82</td>\n",
       "      <td>13.23</td>\n",
       "      <td>5.68</td>\n",
       "      <td>9.00</td>\n",
       "      <td>44.96</td>\n",
       "      <td>20.01</td>\n",
       "      <td>28.60</td>\n",
       "      <td>27.12</td>\n",
       "      <td>58.19</td>\n",
       "      <td>...</td>\n",
       "      <td>7.11</td>\n",
       "      <td>5.89</td>\n",
       "      <td>5.89</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.51</td>\n",
       "      <td>9.78</td>\n",
       "      <td>5.89</td>\n",
       "      <td>5.89</td>\n",
       "      <td>5.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>7.17</td>\n",
       "      <td>6.27</td>\n",
       "      <td>13.79</td>\n",
       "      <td>5.87</td>\n",
       "      <td>9.30</td>\n",
       "      <td>45.62</td>\n",
       "      <td>20.51</td>\n",
       "      <td>29.26</td>\n",
       "      <td>27.89</td>\n",
       "      <td>58.61</td>\n",
       "      <td>...</td>\n",
       "      <td>7.46</td>\n",
       "      <td>9.21</td>\n",
       "      <td>9.21</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>5.65</td>\n",
       "      <td>10.57</td>\n",
       "      <td>9.21</td>\n",
       "      <td>9.21</td>\n",
       "      <td>6.79</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 280 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 49.55     42.30     58.67     63.13     73.96   \n",
       "AAAYYVGYLQPRTF                56.91     59.24     69.37     62.94     75.26   \n",
       "AAAYYVGYLQPRTFL               58.92     73.44     71.54     58.67     73.21   \n",
       "AAAYYVGYLQPRTFLL              51.01     67.06     64.39     50.77     59.00   \n",
       "AAAYYVGYLQPRTFLLK             69.09     74.68     74.51     41.10     49.39   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM          5.35      5.00     10.74      5.24      8.20   \n",
       "YYVWKSYVHVVDGCNSSTCMMC         5.78      5.00     11.53      5.13      8.50   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY        6.27      5.42     12.38      5.42      8.80   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK       6.73      5.82     13.23      5.68      9.00   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR      7.17      6.27     13.79      5.87      9.30   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 47.28     53.43     47.67     30.49     45.41   \n",
       "AAAYYVGYLQPRTF                48.72     68.74     50.66     38.28     55.17   \n",
       "AAAYYVGYLQPRTFL               40.21     65.65     42.11     29.50     47.17   \n",
       "AAAYYVGYLQPRTFLL              27.82     53.27     30.22     21.77     37.50   \n",
       "AAAYYVGYLQPRTFLLK             42.60     46.21     30.74     23.76     33.08   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         41.29     17.81     25.87     24.18     56.19   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        42.73     18.43     26.91     25.46     57.03   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       43.94     19.29     27.82     26.36     57.68   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      44.96     20.01     28.60     27.12     58.19   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     45.62     20.51     29.26     27.89     58.61   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10302 HLA-DQA10505-DQB10309   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                 40.69                 84.33   \n",
       "AAAYYVGYLQPRTF             ...                 50.49                 82.80   \n",
       "AAAYYVGYLQPRTFL            ...                 41.01                 75.16   \n",
       "AAAYYVGYLQPRTFLL           ...                 35.00                 64.62   \n",
       "AAAYYVGYLQPRTFLLK          ...                 31.49                 52.17   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                  6.48                  5.07   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                  6.38                  5.03   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                  6.83                  5.23   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                  7.11                  5.89   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                  7.46                  9.21   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10319 HLA-DQA10505-DQB10402   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             84.33                 72.33   \n",
       "AAAYYVGYLQPRTF                            82.80                 78.55   \n",
       "AAAYYVGYLQPRTFL                           75.16                 68.35   \n",
       "AAAYYVGYLQPRTFLL                          64.62                 52.23   \n",
       "AAAYYVGYLQPRTFLLK                         52.17                 39.41   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                      5.07                  5.00   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     5.03                  5.00   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    5.23                  5.00   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   5.89                  5.00   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  9.21                  5.00   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10501 HLA-DQA10505-DQB10502   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             54.93                 81.42   \n",
       "AAAYYVGYLQPRTF                            63.17                 82.94   \n",
       "AAAYYVGYLQPRTFL                           62.58                 73.86   \n",
       "AAAYYVGYLQPRTFLL                          45.77                 53.66   \n",
       "AAAYYVGYLQPRTFLLK                         41.95                 36.83   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                      5.00                  5.81   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     5.00                  5.18   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    5.00                  5.35   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   5.00                  5.51   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  5.00                  5.65   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10506-DQB10303 HLA-DQA10508-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             58.50                 84.33   \n",
       "AAAYYVGYLQPRTF                            62.70                 82.80   \n",
       "AAAYYVGYLQPRTFL                           50.36                 75.16   \n",
       "AAAYYVGYLQPRTFLL                          39.54                 64.62   \n",
       "AAAYYVGYLQPRTFLLK                         33.72                 52.17   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     10.04                  5.07   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     9.29                  5.03   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    9.49                  5.23   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   9.78                  5.89   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 10.57                  9.21   \n",
       "\n",
       "loci                                                                   \n",
       "genotype                  HLA-DQA10509-DQB10301 HLA-DQA10601-DQB10301  \n",
       "Peptide                                                                \n",
       "AAAYYVGYLQPRT                             84.33                 78.62  \n",
       "AAAYYVGYLQPRTF                            82.80                 78.40  \n",
       "AAAYYVGYLQPRTFL                           75.16                 70.55  \n",
       "AAAYYVGYLQPRTFLL                          64.62                 58.08  \n",
       "AAAYYVGYLQPRTFLLK                         52.17                 48.23  \n",
       "...                                         ...                   ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                      5.07                  5.16  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     5.03                  5.02  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    5.23                  5.35  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   5.89                  5.76  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  9.21                  6.79  \n",
       "\n",
       "[125593 rows x 280 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Write predicted rank (EL score) predictions.\n",
    "\n",
    "df = netmhc2_data\n",
    "\n",
    "data_pivot = netmhc2_data.pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='Rank',\n",
    ")\n",
    "# Invert rank so threshold is >= 99.5 rather than <= 0.5.\n",
    "data_pivot = 100 - data_pivot\n",
    "data_pivot.to_pickle('mhc2_haplotype_netmhcii-4.0_el_rank_pivot_v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10302</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>31.27</td>\n",
       "      <td>27.39</td>\n",
       "      <td>36.80</td>\n",
       "      <td>28.28</td>\n",
       "      <td>34.18</td>\n",
       "      <td>30.15</td>\n",
       "      <td>42.73</td>\n",
       "      <td>30.35</td>\n",
       "      <td>27.78</td>\n",
       "      <td>40.20</td>\n",
       "      <td>...</td>\n",
       "      <td>38.97</td>\n",
       "      <td>44.30</td>\n",
       "      <td>44.30</td>\n",
       "      <td>51.11</td>\n",
       "      <td>59.32</td>\n",
       "      <td>66.54</td>\n",
       "      <td>30.60</td>\n",
       "      <td>44.30</td>\n",
       "      <td>44.30</td>\n",
       "      <td>35.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>67.76</td>\n",
       "      <td>69.94</td>\n",
       "      <td>67.74</td>\n",
       "      <td>49.59</td>\n",
       "      <td>56.03</td>\n",
       "      <td>58.28</td>\n",
       "      <td>69.87</td>\n",
       "      <td>57.24</td>\n",
       "      <td>57.25</td>\n",
       "      <td>69.57</td>\n",
       "      <td>...</td>\n",
       "      <td>74.25</td>\n",
       "      <td>69.05</td>\n",
       "      <td>69.05</td>\n",
       "      <td>89.15</td>\n",
       "      <td>89.76</td>\n",
       "      <td>93.88</td>\n",
       "      <td>69.34</td>\n",
       "      <td>69.05</td>\n",
       "      <td>69.05</td>\n",
       "      <td>62.72</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>92.52</td>\n",
       "      <td>95.35</td>\n",
       "      <td>92.74</td>\n",
       "      <td>78.44</td>\n",
       "      <td>84.58</td>\n",
       "      <td>82.62</td>\n",
       "      <td>89.66</td>\n",
       "      <td>81.19</td>\n",
       "      <td>83.07</td>\n",
       "      <td>89.92</td>\n",
       "      <td>...</td>\n",
       "      <td>81.16</td>\n",
       "      <td>79.14</td>\n",
       "      <td>79.14</td>\n",
       "      <td>97.49</td>\n",
       "      <td>97.50</td>\n",
       "      <td>98.68</td>\n",
       "      <td>81.28</td>\n",
       "      <td>79.14</td>\n",
       "      <td>79.14</td>\n",
       "      <td>71.59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>91.12</td>\n",
       "      <td>90.22</td>\n",
       "      <td>88.57</td>\n",
       "      <td>67.90</td>\n",
       "      <td>78.45</td>\n",
       "      <td>81.96</td>\n",
       "      <td>81.51</td>\n",
       "      <td>76.86</td>\n",
       "      <td>76.52</td>\n",
       "      <td>88.86</td>\n",
       "      <td>...</td>\n",
       "      <td>80.93</td>\n",
       "      <td>77.50</td>\n",
       "      <td>77.50</td>\n",
       "      <td>95.89</td>\n",
       "      <td>96.21</td>\n",
       "      <td>98.11</td>\n",
       "      <td>78.73</td>\n",
       "      <td>77.50</td>\n",
       "      <td>77.50</td>\n",
       "      <td>71.97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>95.70</td>\n",
       "      <td>94.47</td>\n",
       "      <td>94.79</td>\n",
       "      <td>78.17</td>\n",
       "      <td>84.84</td>\n",
       "      <td>89.30</td>\n",
       "      <td>86.61</td>\n",
       "      <td>84.49</td>\n",
       "      <td>84.41</td>\n",
       "      <td>92.48</td>\n",
       "      <td>...</td>\n",
       "      <td>71.93</td>\n",
       "      <td>73.72</td>\n",
       "      <td>73.72</td>\n",
       "      <td>95.21</td>\n",
       "      <td>96.97</td>\n",
       "      <td>97.66</td>\n",
       "      <td>73.39</td>\n",
       "      <td>73.72</td>\n",
       "      <td>73.72</td>\n",
       "      <td>66.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>60.90</td>\n",
       "      <td>46.33</td>\n",
       "      <td>56.44</td>\n",
       "      <td>50.53</td>\n",
       "      <td>69.03</td>\n",
       "      <td>76.94</td>\n",
       "      <td>58.06</td>\n",
       "      <td>67.65</td>\n",
       "      <td>67.61</td>\n",
       "      <td>85.89</td>\n",
       "      <td>...</td>\n",
       "      <td>43.98</td>\n",
       "      <td>56.32</td>\n",
       "      <td>56.32</td>\n",
       "      <td>54.63</td>\n",
       "      <td>61.76</td>\n",
       "      <td>70.44</td>\n",
       "      <td>52.95</td>\n",
       "      <td>56.32</td>\n",
       "      <td>56.32</td>\n",
       "      <td>57.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>61.17</td>\n",
       "      <td>47.64</td>\n",
       "      <td>57.82</td>\n",
       "      <td>52.02</td>\n",
       "      <td>69.52</td>\n",
       "      <td>77.59</td>\n",
       "      <td>59.38</td>\n",
       "      <td>68.04</td>\n",
       "      <td>68.39</td>\n",
       "      <td>86.08</td>\n",
       "      <td>...</td>\n",
       "      <td>45.22</td>\n",
       "      <td>58.26</td>\n",
       "      <td>58.26</td>\n",
       "      <td>55.79</td>\n",
       "      <td>62.03</td>\n",
       "      <td>70.70</td>\n",
       "      <td>54.57</td>\n",
       "      <td>58.26</td>\n",
       "      <td>58.26</td>\n",
       "      <td>58.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>61.55</td>\n",
       "      <td>49.13</td>\n",
       "      <td>58.92</td>\n",
       "      <td>53.27</td>\n",
       "      <td>70.78</td>\n",
       "      <td>78.06</td>\n",
       "      <td>60.10</td>\n",
       "      <td>68.46</td>\n",
       "      <td>69.08</td>\n",
       "      <td>86.32</td>\n",
       "      <td>...</td>\n",
       "      <td>52.55</td>\n",
       "      <td>62.05</td>\n",
       "      <td>62.05</td>\n",
       "      <td>58.27</td>\n",
       "      <td>63.01</td>\n",
       "      <td>72.65</td>\n",
       "      <td>58.44</td>\n",
       "      <td>62.05</td>\n",
       "      <td>62.05</td>\n",
       "      <td>63.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>62.02</td>\n",
       "      <td>50.81</td>\n",
       "      <td>59.90</td>\n",
       "      <td>54.57</td>\n",
       "      <td>71.32</td>\n",
       "      <td>78.88</td>\n",
       "      <td>60.71</td>\n",
       "      <td>69.14</td>\n",
       "      <td>69.75</td>\n",
       "      <td>86.49</td>\n",
       "      <td>...</td>\n",
       "      <td>53.74</td>\n",
       "      <td>65.68</td>\n",
       "      <td>65.68</td>\n",
       "      <td>61.34</td>\n",
       "      <td>64.81</td>\n",
       "      <td>74.32</td>\n",
       "      <td>62.60</td>\n",
       "      <td>65.68</td>\n",
       "      <td>65.68</td>\n",
       "      <td>66.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>62.79</td>\n",
       "      <td>52.76</td>\n",
       "      <td>61.01</td>\n",
       "      <td>55.94</td>\n",
       "      <td>72.08</td>\n",
       "      <td>79.54</td>\n",
       "      <td>61.45</td>\n",
       "      <td>69.98</td>\n",
       "      <td>70.39</td>\n",
       "      <td>86.65</td>\n",
       "      <td>...</td>\n",
       "      <td>55.52</td>\n",
       "      <td>69.18</td>\n",
       "      <td>69.18</td>\n",
       "      <td>64.10</td>\n",
       "      <td>66.62</td>\n",
       "      <td>75.20</td>\n",
       "      <td>65.50</td>\n",
       "      <td>69.18</td>\n",
       "      <td>69.18</td>\n",
       "      <td>69.27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 280 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 31.27     27.39     36.80     28.28     34.18   \n",
       "AAAYYVGYLQPRTF                67.76     69.94     67.74     49.59     56.03   \n",
       "AAAYYVGYLQPRTFL               92.52     95.35     92.74     78.44     84.58   \n",
       "AAAYYVGYLQPRTFLL              91.12     90.22     88.57     67.90     78.45   \n",
       "AAAYYVGYLQPRTFLLK             95.70     94.47     94.79     78.17     84.84   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         60.90     46.33     56.44     50.53     69.03   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        61.17     47.64     57.82     52.02     69.52   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       61.55     49.13     58.92     53.27     70.78   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      62.02     50.81     59.90     54.57     71.32   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     62.79     52.76     61.01     55.94     72.08   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 30.15     42.73     30.35     27.78     40.20   \n",
       "AAAYYVGYLQPRTF                58.28     69.87     57.24     57.25     69.57   \n",
       "AAAYYVGYLQPRTFL               82.62     89.66     81.19     83.07     89.92   \n",
       "AAAYYVGYLQPRTFLL              81.96     81.51     76.86     76.52     88.86   \n",
       "AAAYYVGYLQPRTFLLK             89.30     86.61     84.49     84.41     92.48   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         76.94     58.06     67.65     67.61     85.89   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        77.59     59.38     68.04     68.39     86.08   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       78.06     60.10     68.46     69.08     86.32   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      78.88     60.71     69.14     69.75     86.49   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     79.54     61.45     69.98     70.39     86.65   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10302 HLA-DQA10505-DQB10309   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                 38.97                 44.30   \n",
       "AAAYYVGYLQPRTF             ...                 74.25                 69.05   \n",
       "AAAYYVGYLQPRTFL            ...                 81.16                 79.14   \n",
       "AAAYYVGYLQPRTFLL           ...                 80.93                 77.50   \n",
       "AAAYYVGYLQPRTFLLK          ...                 71.93                 73.72   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                 43.98                 56.32   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                 45.22                 58.26   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                 52.55                 62.05   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                 53.74                 65.68   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                 55.52                 69.18   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10319 HLA-DQA10505-DQB10402   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             44.30                 51.11   \n",
       "AAAYYVGYLQPRTF                            69.05                 89.15   \n",
       "AAAYYVGYLQPRTFL                           79.14                 97.49   \n",
       "AAAYYVGYLQPRTFLL                          77.50                 95.89   \n",
       "AAAYYVGYLQPRTFLLK                         73.72                 95.21   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     56.32                 54.63   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    58.26                 55.79   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   62.05                 58.27   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  65.68                 61.34   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 69.18                 64.10   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10501 HLA-DQA10505-DQB10502   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             59.32                 66.54   \n",
       "AAAYYVGYLQPRTF                            89.76                 93.88   \n",
       "AAAYYVGYLQPRTFL                           97.50                 98.68   \n",
       "AAAYYVGYLQPRTFLL                          96.21                 98.11   \n",
       "AAAYYVGYLQPRTFLLK                         96.97                 97.66   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     61.76                 70.44   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    62.03                 70.70   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   63.01                 72.65   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  64.81                 74.32   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 66.62                 75.20   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10506-DQB10303 HLA-DQA10508-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             30.60                 44.30   \n",
       "AAAYYVGYLQPRTF                            69.34                 69.05   \n",
       "AAAYYVGYLQPRTFL                           81.28                 79.14   \n",
       "AAAYYVGYLQPRTFLL                          78.73                 77.50   \n",
       "AAAYYVGYLQPRTFLLK                         73.39                 73.72   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     52.95                 56.32   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    54.57                 58.26   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   58.44                 62.05   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  62.60                 65.68   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 65.50                 69.18   \n",
       "\n",
       "loci                                                                   \n",
       "genotype                  HLA-DQA10509-DQB10301 HLA-DQA10601-DQB10301  \n",
       "Peptide                                                                \n",
       "AAAYYVGYLQPRT                             44.30                 35.11  \n",
       "AAAYYVGYLQPRTF                            69.05                 62.72  \n",
       "AAAYYVGYLQPRTFL                           79.14                 71.59  \n",
       "AAAYYVGYLQPRTFLL                          77.50                 71.97  \n",
       "AAAYYVGYLQPRTFLLK                         73.72                 66.37  \n",
       "...                                         ...                   ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                     56.32                 57.15  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    58.26                 58.64  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   62.05                 63.18  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  65.68                 66.32  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 69.18                 69.27  \n",
       "\n",
       "[125593 rows x 280 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Write predicted rank (BA score) predictions.\n",
    "\n",
    "df = netmhc2_data\n",
    "\n",
    "data_pivot = netmhc2_data.pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='Rank_BA',\n",
    ")\n",
    "# Invert rank so threshold is >= 99.5 rather than <= 0.5.\n",
    "data_pivot = 100 - data_pivot\n",
    "data_pivot.to_pickle('mhc2_haplotype_netmhcii-4.0_ba_rank_pivot_v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load NetMHCIIpan3.2 predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 280 alleles\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Peptide</th>\n",
       "      <th>1-log50k</th>\n",
       "      <th>nM</th>\n",
       "      <th>Rank</th>\n",
       "      <th>genotype</th>\n",
       "      <th>sequence_length</th>\n",
       "      <th>loci</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>MYSFVSEETGTLI</td>\n",
       "      <td>0.209</td>\n",
       "      <td>5188.97</td>\n",
       "      <td>46.0</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>YSFVSEETGTLIV</td>\n",
       "      <td>0.211</td>\n",
       "      <td>5106.81</td>\n",
       "      <td>45.0</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SFVSEETGTLIVN</td>\n",
       "      <td>0.189</td>\n",
       "      <td>6500.21</td>\n",
       "      <td>55.0</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>FVSEETGTLIVNS</td>\n",
       "      <td>0.167</td>\n",
       "      <td>8166.10</td>\n",
       "      <td>65.0</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>VSEETGTLIVNSV</td>\n",
       "      <td>0.151</td>\n",
       "      <td>9707.22</td>\n",
       "      <td>70.0</td>\n",
       "      <td>HLA-DPA10301-DPB11301</td>\n",
       "      <td>13</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125588</th>\n",
       "      <td>KGCCSCGSCCKFDEDDSEPVLKGVK</td>\n",
       "      <td>0.218</td>\n",
       "      <td>4718.05</td>\n",
       "      <td>95.0</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125589</th>\n",
       "      <td>GCCSCGSCCKFDEDDSEPVLKGVKL</td>\n",
       "      <td>0.232</td>\n",
       "      <td>4066.30</td>\n",
       "      <td>95.0</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125590</th>\n",
       "      <td>CCSCGSCCKFDEDDSEPVLKGVKLH</td>\n",
       "      <td>0.248</td>\n",
       "      <td>3417.66</td>\n",
       "      <td>90.0</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125591</th>\n",
       "      <td>CSCGSCCKFDEDDSEPVLKGVKLHY</td>\n",
       "      <td>0.316</td>\n",
       "      <td>1644.14</td>\n",
       "      <td>75.0</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125592</th>\n",
       "      <td>SCGSCCKFDEDDSEPVLKGVKLHYT</td>\n",
       "      <td>0.355</td>\n",
       "      <td>1068.72</td>\n",
       "      <td>65.0</td>\n",
       "      <td>HLA-DPA10103-DPB14101</td>\n",
       "      <td>25</td>\n",
       "      <td>HLA-DP</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>35166040 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                          Peptide  1-log50k       nM  Rank  \\\n",
       "0                   MYSFVSEETGTLI     0.209  5188.97  46.0   \n",
       "1                   YSFVSEETGTLIV     0.211  5106.81  45.0   \n",
       "2                   SFVSEETGTLIVN     0.189  6500.21  55.0   \n",
       "3                   FVSEETGTLIVNS     0.167  8166.10  65.0   \n",
       "4                   VSEETGTLIVNSV     0.151  9707.22  70.0   \n",
       "...                           ...       ...      ...   ...   \n",
       "125588  KGCCSCGSCCKFDEDDSEPVLKGVK     0.218  4718.05  95.0   \n",
       "125589  GCCSCGSCCKFDEDDSEPVLKGVKL     0.232  4066.30  95.0   \n",
       "125590  CCSCGSCCKFDEDDSEPVLKGVKLH     0.248  3417.66  90.0   \n",
       "125591  CSCGSCCKFDEDDSEPVLKGVKLHY     0.316  1644.14  75.0   \n",
       "125592  SCGSCCKFDEDDSEPVLKGVKLHYT     0.355  1068.72  65.0   \n",
       "\n",
       "                     genotype  sequence_length    loci  \n",
       "0       HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "1       HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "2       HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "3       HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "4       HLA-DPA10301-DPB11301               13  HLA-DP  \n",
       "...                       ...              ...     ...  \n",
       "125588  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125589  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125590  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125591  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "125592  HLA-DPA10103-DPB14101               25  HLA-DP  \n",
       "\n",
       "[35166040 rows x 7 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfs = []\n",
    "for allele in hla_alleles['allele'].values:\n",
    "    try:\n",
    "        df = pd.read_csv(\n",
    "            './netmhcii-3.2_preds/%s_preds.xls' % allele.replace(':', ''),\n",
    "            delimiter='\\t',\n",
    "            skiprows=[0],\n",
    "        )\n",
    "    except:\n",
    "        continue\n",
    "    df['genotype'] = allele\n",
    "    df = df.drop(columns=['Pos', 'ID', 'Ave', 'NB'])\n",
    "    dfs.append(df)\n",
    "print('Loaded %d alleles' % len(dfs))\n",
    "netmhc2_data = pd.concat(dfs)\n",
    "netmhc2_data['sequence_length'] = [len(x) for x in netmhc2_data['Peptide'].values]\n",
    "netmhc2_data['loci'] = [x[:4] if x[:3] == 'DRB' else x[:6] for x in netmhc2_data['genotype'].values]\n",
    "# netmhc2_data['1-log50k'] = 1 - np.log(netmhc2_data['nM']) / np.log(50000)\n",
    "netmhc2_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.554</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.266</td>\n",
       "      <td>0.199</td>\n",
       "      <td>0.091</td>\n",
       "      <td>0.403</td>\n",
       "      <td>0.295</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.424</td>\n",
       "      <td>...</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.428</td>\n",
       "      <td>0.467</td>\n",
       "      <td>0.367</td>\n",
       "      <td>0.336</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.259</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.642</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.108</td>\n",
       "      <td>0.458</td>\n",
       "      <td>0.370</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.443</td>\n",
       "      <td>0.489</td>\n",
       "      <td>...</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.504</td>\n",
       "      <td>0.524</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.385</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.292</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.659</td>\n",
       "      <td>0.486</td>\n",
       "      <td>0.346</td>\n",
       "      <td>0.297</td>\n",
       "      <td>0.112</td>\n",
       "      <td>0.466</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.357</td>\n",
       "      <td>0.470</td>\n",
       "      <td>0.506</td>\n",
       "      <td>...</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.537</td>\n",
       "      <td>0.559</td>\n",
       "      <td>0.438</td>\n",
       "      <td>0.406</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.307</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.647</td>\n",
       "      <td>0.460</td>\n",
       "      <td>0.343</td>\n",
       "      <td>0.285</td>\n",
       "      <td>0.113</td>\n",
       "      <td>0.467</td>\n",
       "      <td>0.387</td>\n",
       "      <td>0.348</td>\n",
       "      <td>0.457</td>\n",
       "      <td>0.500</td>\n",
       "      <td>...</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.551</td>\n",
       "      <td>0.566</td>\n",
       "      <td>0.441</td>\n",
       "      <td>0.404</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.309</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.622</td>\n",
       "      <td>0.432</td>\n",
       "      <td>0.321</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.109</td>\n",
       "      <td>0.445</td>\n",
       "      <td>0.371</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.442</td>\n",
       "      <td>0.478</td>\n",
       "      <td>...</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.548</td>\n",
       "      <td>0.558</td>\n",
       "      <td>0.430</td>\n",
       "      <td>0.391</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.302</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.396</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.201</td>\n",
       "      <td>0.243</td>\n",
       "      <td>0.102</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.259</td>\n",
       "      <td>0.267</td>\n",
       "      <td>0.346</td>\n",
       "      <td>0.372</td>\n",
       "      <td>...</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.427</td>\n",
       "      <td>0.472</td>\n",
       "      <td>0.335</td>\n",
       "      <td>0.353</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.333</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.397</td>\n",
       "      <td>0.261</td>\n",
       "      <td>0.202</td>\n",
       "      <td>0.246</td>\n",
       "      <td>0.102</td>\n",
       "      <td>0.334</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.269</td>\n",
       "      <td>0.348</td>\n",
       "      <td>0.373</td>\n",
       "      <td>...</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.429</td>\n",
       "      <td>0.473</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.398</td>\n",
       "      <td>0.264</td>\n",
       "      <td>0.202</td>\n",
       "      <td>0.248</td>\n",
       "      <td>0.103</td>\n",
       "      <td>0.336</td>\n",
       "      <td>0.261</td>\n",
       "      <td>0.271</td>\n",
       "      <td>0.350</td>\n",
       "      <td>0.375</td>\n",
       "      <td>...</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.433</td>\n",
       "      <td>0.476</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.351</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.399</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.203</td>\n",
       "      <td>0.251</td>\n",
       "      <td>0.104</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.263</td>\n",
       "      <td>0.273</td>\n",
       "      <td>0.352</td>\n",
       "      <td>0.376</td>\n",
       "      <td>...</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.478</td>\n",
       "      <td>0.340</td>\n",
       "      <td>0.365</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.357</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.402</td>\n",
       "      <td>0.270</td>\n",
       "      <td>0.205</td>\n",
       "      <td>0.255</td>\n",
       "      <td>0.105</td>\n",
       "      <td>0.340</td>\n",
       "      <td>0.266</td>\n",
       "      <td>0.278</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.377</td>\n",
       "      <td>...</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.445</td>\n",
       "      <td>0.481</td>\n",
       "      <td>0.342</td>\n",
       "      <td>0.367</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.363</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 0.554     0.364     0.266     0.199     0.091   \n",
       "AAAYYVGYLQPRTF                0.642     0.456     0.327     0.265     0.108   \n",
       "AAAYYVGYLQPRTFL               0.659     0.486     0.346     0.297     0.112   \n",
       "AAAYYVGYLQPRTFLL              0.647     0.460     0.343     0.285     0.113   \n",
       "AAAYYVGYLQPRTFLLK             0.622     0.432     0.321     0.277     0.109   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         0.396     0.260     0.201     0.243     0.102   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        0.397     0.261     0.202     0.246     0.102   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       0.398     0.264     0.202     0.248     0.103   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      0.399     0.265     0.203     0.251     0.104   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     0.402     0.270     0.205     0.255     0.105   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 0.403     0.295     0.265     0.364     0.424   \n",
       "AAAYYVGYLQPRTF                0.458     0.370     0.327     0.443     0.489   \n",
       "AAAYYVGYLQPRTFL               0.466     0.397     0.357     0.470     0.506   \n",
       "AAAYYVGYLQPRTFLL              0.467     0.387     0.348     0.457     0.500   \n",
       "AAAYYVGYLQPRTFLLK             0.445     0.371     0.332     0.442     0.478   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         0.332     0.259     0.267     0.346     0.372   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        0.334     0.260     0.269     0.348     0.373   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       0.336     0.261     0.271     0.350     0.375   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      0.338     0.263     0.273     0.352     0.376   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     0.340     0.266     0.278     0.355     0.377   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                 0.327                 0.327   \n",
       "AAAYYVGYLQPRTF             ...                 0.364                 0.364   \n",
       "AAAYYVGYLQPRTFL            ...                 0.388                 0.388   \n",
       "AAAYYVGYLQPRTFLL           ...                 0.396                 0.396   \n",
       "AAAYYVGYLQPRTFLLK          ...                 0.394                 0.394   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                 0.392                 0.392   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                 0.397                 0.397   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                 0.408                 0.408   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                 0.413                 0.413   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                 0.415                 0.415   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             0.428                 0.467   \n",
       "AAAYYVGYLQPRTF                            0.504                 0.524   \n",
       "AAAYYVGYLQPRTFL                           0.537                 0.559   \n",
       "AAAYYVGYLQPRTFLL                          0.551                 0.566   \n",
       "AAAYYVGYLQPRTFLLK                         0.548                 0.558   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.427                 0.472   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.429                 0.473   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.433                 0.476   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.437                 0.478   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.445                 0.481   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             0.367                 0.336   \n",
       "AAAYYVGYLQPRTF                            0.415                 0.385   \n",
       "AAAYYVGYLQPRTFL                           0.438                 0.406   \n",
       "AAAYYVGYLQPRTFLL                          0.441                 0.404   \n",
       "AAAYYVGYLQPRTFLLK                         0.430                 0.391   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.335                 0.353   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.338                 0.355   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.339                 0.361   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.340                 0.365   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.342                 0.367   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             0.327                 0.327   \n",
       "AAAYYVGYLQPRTF                            0.364                 0.364   \n",
       "AAAYYVGYLQPRTFL                           0.388                 0.388   \n",
       "AAAYYVGYLQPRTFLL                          0.396                 0.396   \n",
       "AAAYYVGYLQPRTFLLK                         0.394                 0.394   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.392                 0.392   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.397                 0.397   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.408                 0.408   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.413                 0.413   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.415                 0.415   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                             0.259     0.0  \n",
       "AAAYYVGYLQPRTF                            0.292     0.0  \n",
       "AAAYYVGYLQPRTFL                           0.307     0.0  \n",
       "AAAYYVGYLQPRTFLL                          0.309     0.0  \n",
       "AAAYYVGYLQPRTFLLK                         0.302     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.333     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.339     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.351     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.357     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.363     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Write predicted binding affinity predictions.\n",
    "\n",
    "df = netmhc2_data\n",
    "\n",
    "df2 = df.groupby(['Peptide', 'loci']).count().reset_index()[['Peptide', 'loci']]\n",
    "df2['genotype'] = 'unknown'\n",
    "df2['Rank'] = 0.\n",
    "df2['1-log50k'] = 0.\n",
    "df2['nM'] = 0.\n",
    "\n",
    "data_pivot = pd.concat([df, df2], sort=False).pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='1-log50k',\n",
    ")\n",
    "data_pivot.to_pickle('mhc2_haplotype_netmhcii-3.2_pred_affinity_pivot_v1v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "174296"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(data_pivot >= 0.638).sum().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10302</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>79.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>...</td>\n",
       "      <td>83.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>98.6</td>\n",
       "      <td>75.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>70.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>90.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>91.5</td>\n",
       "      <td>...</td>\n",
       "      <td>87.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>97.5</td>\n",
       "      <td>99.5</td>\n",
       "      <td>80.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>72.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>90.5</td>\n",
       "      <td>93.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>...</td>\n",
       "      <td>85.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>96.5</td>\n",
       "      <td>98.7</td>\n",
       "      <td>99.6</td>\n",
       "      <td>80.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>69.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>90.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>90.5</td>\n",
       "      <td>...</td>\n",
       "      <td>81.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>98.6</td>\n",
       "      <td>99.6</td>\n",
       "      <td>77.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>66.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>87.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>...</td>\n",
       "      <td>72.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>99.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>60.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>30.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>61.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>30.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>...</td>\n",
       "      <td>45.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>62.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>30.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>...</td>\n",
       "      <td>51.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>66.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>30.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>...</td>\n",
       "      <td>52.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>67.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>35.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>...</td>\n",
       "      <td>54.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>69.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 280 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                  79.0      77.0      79.0      40.0      58.0   \n",
       "AAAYYVGYLQPRTF                 90.0      91.0      88.0      56.0      65.0   \n",
       "AAAYYVGYLQPRTFL                90.5      93.0      89.0      58.0      60.0   \n",
       "AAAYYVGYLQPRTFLL               90.0      91.0      90.0      52.0      59.0   \n",
       "AAAYYVGYLQPRTFLLK              87.0      88.0      87.0      45.0      53.0   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM          30.0      30.0      45.0      25.0      40.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMC         30.0      30.0      45.0      25.0      40.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY        30.0      30.0      45.0      30.0      40.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK       30.0      30.0      45.0      30.0      40.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR      35.0      30.0      45.0      30.0      40.0   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                  81.0      69.0      63.0      66.0      85.0   \n",
       "AAAYYVGYLQPRTF                 87.0      82.0      74.0      78.0      91.5   \n",
       "AAAYYVGYLQPRTFL                85.0      84.0      75.0      79.0      91.0   \n",
       "AAAYYVGYLQPRTFLL               86.0      82.0      72.0      76.0      90.5   \n",
       "AAAYYVGYLQPRTFLLK              82.0      79.0      65.0      72.0      87.0   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM          40.0      35.0      30.0      35.0      54.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMC         45.0      35.0      30.0      35.0      54.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY        45.0      35.0      35.0      35.0      55.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK       45.0      35.0      35.0      35.0      56.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR      45.0      35.0      35.0      40.0      56.0   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10302 HLA-DQA10505-DQB10309   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                  83.0                  76.0   \n",
       "AAAYYVGYLQPRTF             ...                  87.0                  78.0   \n",
       "AAAYYVGYLQPRTFL            ...                  85.0                  76.0   \n",
       "AAAYYVGYLQPRTFLL           ...                  81.0                  75.0   \n",
       "AAAYYVGYLQPRTFLLK          ...                  72.0                  70.0   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                  40.0                  62.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                  45.0                  63.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                  51.0                  66.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                  52.0                  67.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                  54.0                  67.0   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10319 HLA-DQA10505-DQB10402   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                              76.0                  90.0   \n",
       "AAAYYVGYLQPRTF                             78.0                  96.0   \n",
       "AAAYYVGYLQPRTFL                            76.0                  96.5   \n",
       "AAAYYVGYLQPRTFLL                           75.0                  97.0   \n",
       "AAAYYVGYLQPRTFLLK                          70.0                  96.0   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                      62.0                  63.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     63.0                  64.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    66.0                  65.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   67.0                  67.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  67.0                  70.0   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10501 HLA-DQA10505-DQB10502   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                              94.0                  98.6   \n",
       "AAAYYVGYLQPRTF                             97.5                  99.5   \n",
       "AAAYYVGYLQPRTFL                            98.7                  99.6   \n",
       "AAAYYVGYLQPRTFLL                           98.6                  99.6   \n",
       "AAAYYVGYLQPRTFLLK                          97.0                  99.0   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                      58.0                  68.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     58.0                  69.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    60.0                  70.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   61.0                  71.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  62.0                  72.0   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10506-DQB10303 HLA-DQA10508-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                              75.0                  76.0   \n",
       "AAAYYVGYLQPRTF                             80.0                  78.0   \n",
       "AAAYYVGYLQPRTFL                            80.0                  76.0   \n",
       "AAAYYVGYLQPRTFLL                           77.0                  75.0   \n",
       "AAAYYVGYLQPRTFLLK                          71.0                  70.0   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                      53.0                  62.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     54.0                  63.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    56.0                  66.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   57.0                  67.0   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  58.0                  67.0   \n",
       "\n",
       "loci                                                                   \n",
       "genotype                  HLA-DQA10509-DQB10301 HLA-DQA10601-DQB10301  \n",
       "Peptide                                                                \n",
       "AAAYYVGYLQPRT                              76.0                  70.0  \n",
       "AAAYYVGYLQPRTF                             78.0                  72.0  \n",
       "AAAYYVGYLQPRTFL                            76.0                  69.0  \n",
       "AAAYYVGYLQPRTFLL                           75.0                  66.0  \n",
       "AAAYYVGYLQPRTFLLK                          70.0                  60.0  \n",
       "...                                         ...                   ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                      62.0                  61.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                     63.0                  62.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                    66.0                  66.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                   67.0                  67.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                  67.0                  69.0  \n",
       "\n",
       "[125593 rows x 280 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using % Rank (BA) rather than binding affinity\n",
    "data_pivot = netmhc2_data.pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='Rank',\n",
    ")\n",
    "# Invert so binders are >= 98 rather than <= 2.\n",
    "data_pivot = 100 - data_pivot\n",
    "data_pivot.to_pickle('mhc2_haplotype_netmhcii-3.2_ba_rank_pivot_v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "626482"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(data_pivot >= 98).sum().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3463122"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(data_pivot >= 90).sum().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.781497148953934"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "626482 / (125593*280) * 100."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PUFFIN MHC Class II"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_pred</th>\n",
       "      <th>epistemic</th>\n",
       "      <th>aleatoric</th>\n",
       "      <th>binding_likelihood</th>\n",
       "      <th>avg_suff_stat_1</th>\n",
       "      <th>avg_suff_stat_2</th>\n",
       "      <th>genotype</th>\n",
       "      <th>Peptide</th>\n",
       "      <th>sequence_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.289376</td>\n",
       "      <td>0.003492</td>\n",
       "      <td>0.035966</td>\n",
       "      <td>0.236246</td>\n",
       "      <td>0.289376</td>\n",
       "      <td>0.189648</td>\n",
       "      <td>HLA-DPA10103-DPB10101</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.364605</td>\n",
       "      <td>0.006347</td>\n",
       "      <td>0.038386</td>\n",
       "      <td>0.377729</td>\n",
       "      <td>0.364605</td>\n",
       "      <td>0.195923</td>\n",
       "      <td>HLA-DPA10103-DPB10201</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.337425</td>\n",
       "      <td>0.006072</td>\n",
       "      <td>0.039994</td>\n",
       "      <td>0.329593</td>\n",
       "      <td>0.337425</td>\n",
       "      <td>0.199984</td>\n",
       "      <td>HLA-DPA10103-DPB10202</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.367785</td>\n",
       "      <td>0.004348</td>\n",
       "      <td>0.056473</td>\n",
       "      <td>0.403850</td>\n",
       "      <td>0.367785</td>\n",
       "      <td>0.237641</td>\n",
       "      <td>HLA-DPA10103-DPB10301</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.320184</td>\n",
       "      <td>0.005176</td>\n",
       "      <td>0.032980</td>\n",
       "      <td>0.280750</td>\n",
       "      <td>0.320184</td>\n",
       "      <td>0.181603</td>\n",
       "      <td>HLA-DPA10103-DPB10401</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98967279</th>\n",
       "      <td>0.211966</td>\n",
       "      <td>0.006626</td>\n",
       "      <td>0.028748</td>\n",
       "      <td>0.103811</td>\n",
       "      <td>0.211966</td>\n",
       "      <td>0.169553</td>\n",
       "      <td>HLA-DQA10601-DQB10630</td>\n",
       "      <td>TKLATTEELPDEFVVVTVK</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98967280</th>\n",
       "      <td>0.230003</td>\n",
       "      <td>0.006200</td>\n",
       "      <td>0.028258</td>\n",
       "      <td>0.122268</td>\n",
       "      <td>0.230003</td>\n",
       "      <td>0.168100</td>\n",
       "      <td>HLA-DQA10601-DQB10632</td>\n",
       "      <td>TKLATTEELPDEFVVVTVK</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98967281</th>\n",
       "      <td>0.273412</td>\n",
       "      <td>0.004453</td>\n",
       "      <td>0.029170</td>\n",
       "      <td>0.186404</td>\n",
       "      <td>0.273412</td>\n",
       "      <td>0.170792</td>\n",
       "      <td>HLA-DQA10601-DQB10633</td>\n",
       "      <td>TKLATTEELPDEFVVVTVK</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98967282</th>\n",
       "      <td>0.294119</td>\n",
       "      <td>0.003196</td>\n",
       "      <td>0.028181</td>\n",
       "      <td>0.216706</td>\n",
       "      <td>0.294119</td>\n",
       "      <td>0.167873</td>\n",
       "      <td>HLA-DQA10601-DQB10637</td>\n",
       "      <td>TKLATTEELPDEFVVVTVK</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98967283</th>\n",
       "      <td>0.367120</td>\n",
       "      <td>0.005095</td>\n",
       "      <td>0.031836</td>\n",
       "      <td>0.371495</td>\n",
       "      <td>0.367120</td>\n",
       "      <td>0.178426</td>\n",
       "      <td>HLA-DQA10601-DQB10642</td>\n",
       "      <td>TKLATTEELPDEFVVVTVK</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>98967284 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          mean_pred  epistemic  aleatoric  binding_likelihood  \\\n",
       "0          0.289376   0.003492   0.035966            0.236246   \n",
       "1          0.364605   0.006347   0.038386            0.377729   \n",
       "2          0.337425   0.006072   0.039994            0.329593   \n",
       "3          0.367785   0.004348   0.056473            0.403850   \n",
       "4          0.320184   0.005176   0.032980            0.280750   \n",
       "...             ...        ...        ...                 ...   \n",
       "98967279   0.211966   0.006626   0.028748            0.103811   \n",
       "98967280   0.230003   0.006200   0.028258            0.122268   \n",
       "98967281   0.273412   0.004453   0.029170            0.186404   \n",
       "98967282   0.294119   0.003196   0.028181            0.216706   \n",
       "98967283   0.367120   0.005095   0.031836            0.371495   \n",
       "\n",
       "          avg_suff_stat_1  avg_suff_stat_2               genotype  \\\n",
       "0                0.289376         0.189648  HLA-DPA10103-DPB10101   \n",
       "1                0.364605         0.195923  HLA-DPA10103-DPB10201   \n",
       "2                0.337425         0.199984  HLA-DPA10103-DPB10202   \n",
       "3                0.367785         0.237641  HLA-DPA10103-DPB10301   \n",
       "4                0.320184         0.181603  HLA-DPA10103-DPB10401   \n",
       "...                   ...              ...                    ...   \n",
       "98967279         0.211966         0.169553  HLA-DQA10601-DQB10630   \n",
       "98967280         0.230003         0.168100  HLA-DQA10601-DQB10632   \n",
       "98967281         0.273412         0.170792  HLA-DQA10601-DQB10633   \n",
       "98967282         0.294119         0.167873  HLA-DQA10601-DQB10637   \n",
       "98967283         0.367120         0.178426  HLA-DQA10601-DQB10642   \n",
       "\n",
       "                        Peptide  sequence_length  \n",
       "0         MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "1         MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "2         MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "3         MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "4         MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "...                         ...              ...  \n",
       "98967279    TKLATTEELPDEFVVVTVK               19  \n",
       "98967280    TKLATTEELPDEFVVVTVK               19  \n",
       "98967281    TKLATTEELPDEFVVVTVK               19  \n",
       "98967282    TKLATTEELPDEFVVVTVK               19  \n",
       "98967283    TKLATTEELPDEFVVVTVK               19  \n",
       "\n",
       "[98967284 rows x 9 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "puffin_mhc2_preds = pd.read_csv('PUFFIN/all_preds_combined_classII.csv.gz')\n",
    "\n",
    "# Filter dataframe to peptide lengths [13, 25].\n",
    "puffin_mhc2_preds['sequence_length'] = [len(x) for x in puffin_mhc2_preds['epitope'].values]\n",
    "puffin_mhc2_preds = puffin_mhc2_preds.loc[puffin_mhc2_preds['sequence_length'].isin(range(13, 26))]\n",
    "\n",
    "puffin_mhc2_preds.rename(columns={'epitope': 'Peptide', 'mhc': 'genotype'}, inplace=True)\n",
    "\n",
    "# puffin_mhc2_preds['loci'] = [x[:5] for x in puffin_mhc2_preds['genotype'].values]\n",
    "\n",
    "puffin_mhc2_preds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_pred</th>\n",
       "      <th>epistemic</th>\n",
       "      <th>aleatoric</th>\n",
       "      <th>binding_likelihood</th>\n",
       "      <th>avg_suff_stat_1</th>\n",
       "      <th>avg_suff_stat_2</th>\n",
       "      <th>genotype</th>\n",
       "      <th>Peptide</th>\n",
       "      <th>sequence_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.545357</td>\n",
       "      <td>0.006401</td>\n",
       "      <td>0.046334</td>\n",
       "      <td>0.710976</td>\n",
       "      <td>0.545357</td>\n",
       "      <td>0.215254</td>\n",
       "      <td>DRB1_0101</td>\n",
       "      <td>MDLFMRIFTIGTVT</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.486121</td>\n",
       "      <td>0.004832</td>\n",
       "      <td>0.044232</td>\n",
       "      <td>0.613191</td>\n",
       "      <td>0.486121</td>\n",
       "      <td>0.210314</td>\n",
       "      <td>DRB1_0102</td>\n",
       "      <td>MDLFMRIFTIGTVT</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.449011</td>\n",
       "      <td>0.008343</td>\n",
       "      <td>0.042273</td>\n",
       "      <td>0.545279</td>\n",
       "      <td>0.449011</td>\n",
       "      <td>0.205604</td>\n",
       "      <td>DRB1_0103</td>\n",
       "      <td>MDLFMRIFTIGTVT</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.137176</td>\n",
       "      <td>0.001960</td>\n",
       "      <td>0.018459</td>\n",
       "      <td>0.016875</td>\n",
       "      <td>0.137176</td>\n",
       "      <td>0.135865</td>\n",
       "      <td>DRB1_0301</td>\n",
       "      <td>MDLFMRIFTIGTVT</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.174542</td>\n",
       "      <td>0.001935</td>\n",
       "      <td>0.023181</td>\n",
       "      <td>0.049561</td>\n",
       "      <td>0.174542</td>\n",
       "      <td>0.152252</td>\n",
       "      <td>DRB1_0302</td>\n",
       "      <td>MDLFMRIFTIGTVT</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597247</th>\n",
       "      <td>0.370838</td>\n",
       "      <td>0.013723</td>\n",
       "      <td>0.037287</td>\n",
       "      <td>0.388309</td>\n",
       "      <td>0.370838</td>\n",
       "      <td>0.193099</td>\n",
       "      <td>HLA-DQA10505-DQB10501</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597248</th>\n",
       "      <td>0.182281</td>\n",
       "      <td>0.003463</td>\n",
       "      <td>0.028081</td>\n",
       "      <td>0.073230</td>\n",
       "      <td>0.182281</td>\n",
       "      <td>0.167575</td>\n",
       "      <td>HLA-DQA10505-DQB10502</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597249</th>\n",
       "      <td>0.369109</td>\n",
       "      <td>0.006680</td>\n",
       "      <td>0.022698</td>\n",
       "      <td>0.353784</td>\n",
       "      <td>0.369109</td>\n",
       "      <td>0.150659</td>\n",
       "      <td>HLA-DQA10506-DQB10303</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597250</th>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.002368</td>\n",
       "      <td>0.031147</td>\n",
       "      <td>0.151921</td>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.176485</td>\n",
       "      <td>HLA-DQA10508-DQB10301</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597251</th>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.002368</td>\n",
       "      <td>0.031147</td>\n",
       "      <td>0.151921</td>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.176485</td>\n",
       "      <td>HLA-DQA10509-DQB10301</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20597252 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          mean_pred  epistemic  aleatoric  binding_likelihood  \\\n",
       "0          0.545357   0.006401   0.046334            0.710976   \n",
       "1          0.486121   0.004832   0.044232            0.613191   \n",
       "2          0.449011   0.008343   0.042273            0.545279   \n",
       "3          0.137176   0.001960   0.018459            0.016875   \n",
       "4          0.174542   0.001935   0.023181            0.049561   \n",
       "...             ...        ...        ...                 ...   \n",
       "20597247   0.370838   0.013723   0.037287            0.388309   \n",
       "20597248   0.182281   0.003463   0.028081            0.073230   \n",
       "20597249   0.369109   0.006680   0.022698            0.353784   \n",
       "20597250   0.244158   0.002368   0.031147            0.151921   \n",
       "20597251   0.244158   0.002368   0.031147            0.151921   \n",
       "\n",
       "          avg_suff_stat_1  avg_suff_stat_2               genotype  \\\n",
       "0                0.545357         0.215254              DRB1_0101   \n",
       "1                0.486121         0.210314              DRB1_0102   \n",
       "2                0.449011         0.205604              DRB1_0103   \n",
       "3                0.137176         0.135865              DRB1_0301   \n",
       "4                0.174542         0.152252              DRB1_0302   \n",
       "...                   ...              ...                    ...   \n",
       "20597247         0.370838         0.193099  HLA-DQA10505-DQB10501   \n",
       "20597248         0.182281         0.167575  HLA-DQA10505-DQB10502   \n",
       "20597249         0.369109         0.150659  HLA-DQA10506-DQB10303   \n",
       "20597250         0.244158         0.176485  HLA-DQA10508-DQB10301   \n",
       "20597251         0.244158         0.176485  HLA-DQA10509-DQB10301   \n",
       "\n",
       "                            Peptide  sequence_length  \n",
       "0                    MDLFMRIFTIGTVT               14  \n",
       "1                    MDLFMRIFTIGTVT               14  \n",
       "2                    MDLFMRIFTIGTVT               14  \n",
       "3                    MDLFMRIFTIGTVT               14  \n",
       "4                    MDLFMRIFTIGTVT               14  \n",
       "...                             ...              ...  \n",
       "20597247  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597248  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597249  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597250  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597251  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "\n",
       "[20597252 rows x 9 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "puffin_mhc2_new_preds = pd.read_csv('PUFFIN/extra_MHC2_predictions.csv')\n",
    "\n",
    "# Filter dataframe to peptide lengths [13, 25].\n",
    "puffin_mhc2_new_preds['sequence_length'] = [len(x) for x in puffin_mhc2_new_preds['epitope'].values]\n",
    "puffin_mhc2_new_preds = puffin_mhc2_new_preds.loc[puffin_mhc2_new_preds['sequence_length'].isin(range(13, 26))]\n",
    "\n",
    "puffin_mhc2_new_preds.rename(columns={'epitope': 'Peptide', 'mhc': 'genotype'}, inplace=True)\n",
    "\n",
    "# puffin_mhc2_new_preds['loci'] = [x[:5] for x in puffin_mhc2_new_preds['genotype'].values]\n",
    "\n",
    "puffin_mhc2_new_preds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_pred</th>\n",
       "      <th>epistemic</th>\n",
       "      <th>aleatoric</th>\n",
       "      <th>binding_likelihood</th>\n",
       "      <th>avg_suff_stat_1</th>\n",
       "      <th>avg_suff_stat_2</th>\n",
       "      <th>genotype</th>\n",
       "      <th>Peptide</th>\n",
       "      <th>sequence_length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.289376</td>\n",
       "      <td>0.003492</td>\n",
       "      <td>0.035966</td>\n",
       "      <td>0.236246</td>\n",
       "      <td>0.289376</td>\n",
       "      <td>0.189648</td>\n",
       "      <td>HLA-DPA10103-DPB10101</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.364605</td>\n",
       "      <td>0.006347</td>\n",
       "      <td>0.038386</td>\n",
       "      <td>0.377729</td>\n",
       "      <td>0.364605</td>\n",
       "      <td>0.195923</td>\n",
       "      <td>HLA-DPA10103-DPB10201</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.337425</td>\n",
       "      <td>0.006072</td>\n",
       "      <td>0.039994</td>\n",
       "      <td>0.329593</td>\n",
       "      <td>0.337425</td>\n",
       "      <td>0.199984</td>\n",
       "      <td>HLA-DPA10103-DPB10202</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.367785</td>\n",
       "      <td>0.004348</td>\n",
       "      <td>0.056473</td>\n",
       "      <td>0.403850</td>\n",
       "      <td>0.367785</td>\n",
       "      <td>0.237641</td>\n",
       "      <td>HLA-DPA10103-DPB10301</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.320184</td>\n",
       "      <td>0.005176</td>\n",
       "      <td>0.032980</td>\n",
       "      <td>0.280750</td>\n",
       "      <td>0.320184</td>\n",
       "      <td>0.181603</td>\n",
       "      <td>HLA-DPA10103-DPB10401</td>\n",
       "      <td>MDLFMRIFTIGTVTLKQGEIK</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597247</th>\n",
       "      <td>0.370838</td>\n",
       "      <td>0.013723</td>\n",
       "      <td>0.037287</td>\n",
       "      <td>0.388309</td>\n",
       "      <td>0.370838</td>\n",
       "      <td>0.193099</td>\n",
       "      <td>HLA-DQA10505-DQB10501</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597248</th>\n",
       "      <td>0.182281</td>\n",
       "      <td>0.003463</td>\n",
       "      <td>0.028081</td>\n",
       "      <td>0.073230</td>\n",
       "      <td>0.182281</td>\n",
       "      <td>0.167575</td>\n",
       "      <td>HLA-DQA10505-DQB10502</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597249</th>\n",
       "      <td>0.369109</td>\n",
       "      <td>0.006680</td>\n",
       "      <td>0.022698</td>\n",
       "      <td>0.353784</td>\n",
       "      <td>0.369109</td>\n",
       "      <td>0.150659</td>\n",
       "      <td>HLA-DQA10506-DQB10303</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597250</th>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.002368</td>\n",
       "      <td>0.031147</td>\n",
       "      <td>0.151921</td>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.176485</td>\n",
       "      <td>HLA-DQA10508-DQB10301</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20597251</th>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.002368</td>\n",
       "      <td>0.031147</td>\n",
       "      <td>0.151921</td>\n",
       "      <td>0.244158</td>\n",
       "      <td>0.176485</td>\n",
       "      <td>HLA-DQA10509-DQB10301</td>\n",
       "      <td>PIAVQMTKLATTEELPDEFVVVTVK</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>119564536 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          mean_pred  epistemic  aleatoric  binding_likelihood  \\\n",
       "0          0.289376   0.003492   0.035966            0.236246   \n",
       "1          0.364605   0.006347   0.038386            0.377729   \n",
       "2          0.337425   0.006072   0.039994            0.329593   \n",
       "3          0.367785   0.004348   0.056473            0.403850   \n",
       "4          0.320184   0.005176   0.032980            0.280750   \n",
       "...             ...        ...        ...                 ...   \n",
       "20597247   0.370838   0.013723   0.037287            0.388309   \n",
       "20597248   0.182281   0.003463   0.028081            0.073230   \n",
       "20597249   0.369109   0.006680   0.022698            0.353784   \n",
       "20597250   0.244158   0.002368   0.031147            0.151921   \n",
       "20597251   0.244158   0.002368   0.031147            0.151921   \n",
       "\n",
       "          avg_suff_stat_1  avg_suff_stat_2               genotype  \\\n",
       "0                0.289376         0.189648  HLA-DPA10103-DPB10101   \n",
       "1                0.364605         0.195923  HLA-DPA10103-DPB10201   \n",
       "2                0.337425         0.199984  HLA-DPA10103-DPB10202   \n",
       "3                0.367785         0.237641  HLA-DPA10103-DPB10301   \n",
       "4                0.320184         0.181603  HLA-DPA10103-DPB10401   \n",
       "...                   ...              ...                    ...   \n",
       "20597247         0.370838         0.193099  HLA-DQA10505-DQB10501   \n",
       "20597248         0.182281         0.167575  HLA-DQA10505-DQB10502   \n",
       "20597249         0.369109         0.150659  HLA-DQA10506-DQB10303   \n",
       "20597250         0.244158         0.176485  HLA-DQA10508-DQB10301   \n",
       "20597251         0.244158         0.176485  HLA-DQA10509-DQB10301   \n",
       "\n",
       "                            Peptide  sequence_length  \n",
       "0             MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "1             MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "2             MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "3             MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "4             MDLFMRIFTIGTVTLKQGEIK               21  \n",
       "...                             ...              ...  \n",
       "20597247  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597248  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597249  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597250  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "20597251  PIAVQMTKLATTEELPDEFVVVTVK               25  \n",
       "\n",
       "[119564536 rows x 9 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "puffin_mhc2_all = pd.concat([puffin_mhc2_preds, puffin_mhc2_new_preds], sort=False)\n",
    "puffin_mhc2_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "puffin_mhc2_filtered = puffin_mhc2_all.loc[puffin_mhc2_all['genotype'].isin(set(hla_alleles['allele'].values))]\n",
    "puffin_mhc2_filtered['loci'] = [x[:4] if x[:3] == 'DRB' else x[:6] for x in puffin_mhc2_filtered['genotype'].values]\n",
    "puffin_mhc2_filtered"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.400601</td>\n",
       "      <td>0.312500</td>\n",
       "      <td>0.305490</td>\n",
       "      <td>0.091306</td>\n",
       "      <td>0.135300</td>\n",
       "      <td>0.289606</td>\n",
       "      <td>0.310504</td>\n",
       "      <td>0.227737</td>\n",
       "      <td>0.257072</td>\n",
       "      <td>0.384428</td>\n",
       "      <td>...</td>\n",
       "      <td>0.406164</td>\n",
       "      <td>0.406164</td>\n",
       "      <td>0.584529</td>\n",
       "      <td>0.456067</td>\n",
       "      <td>0.495886</td>\n",
       "      <td>0.386837</td>\n",
       "      <td>0.406164</td>\n",
       "      <td>0.406164</td>\n",
       "      <td>0.315752</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.580028</td>\n",
       "      <td>0.513865</td>\n",
       "      <td>0.536929</td>\n",
       "      <td>0.160847</td>\n",
       "      <td>0.209960</td>\n",
       "      <td>0.403355</td>\n",
       "      <td>0.465695</td>\n",
       "      <td>0.351060</td>\n",
       "      <td>0.383100</td>\n",
       "      <td>0.463590</td>\n",
       "      <td>...</td>\n",
       "      <td>0.437686</td>\n",
       "      <td>0.437686</td>\n",
       "      <td>0.655237</td>\n",
       "      <td>0.542353</td>\n",
       "      <td>0.569029</td>\n",
       "      <td>0.442531</td>\n",
       "      <td>0.437686</td>\n",
       "      <td>0.437686</td>\n",
       "      <td>0.350472</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.648609</td>\n",
       "      <td>0.569664</td>\n",
       "      <td>0.614583</td>\n",
       "      <td>0.189893</td>\n",
       "      <td>0.233327</td>\n",
       "      <td>0.406949</td>\n",
       "      <td>0.483555</td>\n",
       "      <td>0.369354</td>\n",
       "      <td>0.404608</td>\n",
       "      <td>0.482794</td>\n",
       "      <td>...</td>\n",
       "      <td>0.419085</td>\n",
       "      <td>0.419085</td>\n",
       "      <td>0.672601</td>\n",
       "      <td>0.551296</td>\n",
       "      <td>0.566071</td>\n",
       "      <td>0.435325</td>\n",
       "      <td>0.419085</td>\n",
       "      <td>0.419085</td>\n",
       "      <td>0.347583</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.691830</td>\n",
       "      <td>0.570180</td>\n",
       "      <td>0.659630</td>\n",
       "      <td>0.214472</td>\n",
       "      <td>0.273981</td>\n",
       "      <td>0.443015</td>\n",
       "      <td>0.508008</td>\n",
       "      <td>0.388828</td>\n",
       "      <td>0.416008</td>\n",
       "      <td>0.506840</td>\n",
       "      <td>...</td>\n",
       "      <td>0.397634</td>\n",
       "      <td>0.397634</td>\n",
       "      <td>0.675484</td>\n",
       "      <td>0.540213</td>\n",
       "      <td>0.549265</td>\n",
       "      <td>0.425213</td>\n",
       "      <td>0.397634</td>\n",
       "      <td>0.397634</td>\n",
       "      <td>0.331075</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.728757</td>\n",
       "      <td>0.602119</td>\n",
       "      <td>0.679677</td>\n",
       "      <td>0.255770</td>\n",
       "      <td>0.328044</td>\n",
       "      <td>0.472579</td>\n",
       "      <td>0.542195</td>\n",
       "      <td>0.418011</td>\n",
       "      <td>0.444002</td>\n",
       "      <td>0.515598</td>\n",
       "      <td>...</td>\n",
       "      <td>0.389671</td>\n",
       "      <td>0.389671</td>\n",
       "      <td>0.729996</td>\n",
       "      <td>0.531725</td>\n",
       "      <td>0.520856</td>\n",
       "      <td>0.410215</td>\n",
       "      <td>0.389671</td>\n",
       "      <td>0.389671</td>\n",
       "      <td>0.321716</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.486435</td>\n",
       "      <td>0.325522</td>\n",
       "      <td>0.467445</td>\n",
       "      <td>0.178505</td>\n",
       "      <td>0.250803</td>\n",
       "      <td>0.409197</td>\n",
       "      <td>0.355279</td>\n",
       "      <td>0.390921</td>\n",
       "      <td>0.414456</td>\n",
       "      <td>0.483765</td>\n",
       "      <td>...</td>\n",
       "      <td>0.374938</td>\n",
       "      <td>0.374938</td>\n",
       "      <td>0.562576</td>\n",
       "      <td>0.472952</td>\n",
       "      <td>0.351609</td>\n",
       "      <td>0.460095</td>\n",
       "      <td>0.374938</td>\n",
       "      <td>0.374938</td>\n",
       "      <td>0.348333</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.483641</td>\n",
       "      <td>0.320667</td>\n",
       "      <td>0.466266</td>\n",
       "      <td>0.179284</td>\n",
       "      <td>0.253753</td>\n",
       "      <td>0.399223</td>\n",
       "      <td>0.350970</td>\n",
       "      <td>0.385805</td>\n",
       "      <td>0.406049</td>\n",
       "      <td>0.472025</td>\n",
       "      <td>...</td>\n",
       "      <td>0.373139</td>\n",
       "      <td>0.373139</td>\n",
       "      <td>0.563148</td>\n",
       "      <td>0.467725</td>\n",
       "      <td>0.341265</td>\n",
       "      <td>0.461041</td>\n",
       "      <td>0.373139</td>\n",
       "      <td>0.373139</td>\n",
       "      <td>0.361561</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.468505</td>\n",
       "      <td>0.317547</td>\n",
       "      <td>0.456332</td>\n",
       "      <td>0.174601</td>\n",
       "      <td>0.253481</td>\n",
       "      <td>0.381635</td>\n",
       "      <td>0.344578</td>\n",
       "      <td>0.378381</td>\n",
       "      <td>0.396111</td>\n",
       "      <td>0.440015</td>\n",
       "      <td>...</td>\n",
       "      <td>0.350295</td>\n",
       "      <td>0.350295</td>\n",
       "      <td>0.550179</td>\n",
       "      <td>0.457353</td>\n",
       "      <td>0.319373</td>\n",
       "      <td>0.450740</td>\n",
       "      <td>0.350295</td>\n",
       "      <td>0.350295</td>\n",
       "      <td>0.355496</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.460330</td>\n",
       "      <td>0.309825</td>\n",
       "      <td>0.443855</td>\n",
       "      <td>0.183948</td>\n",
       "      <td>0.260090</td>\n",
       "      <td>0.378605</td>\n",
       "      <td>0.343438</td>\n",
       "      <td>0.376068</td>\n",
       "      <td>0.395613</td>\n",
       "      <td>0.442305</td>\n",
       "      <td>...</td>\n",
       "      <td>0.338088</td>\n",
       "      <td>0.338088</td>\n",
       "      <td>0.533762</td>\n",
       "      <td>0.441113</td>\n",
       "      <td>0.279456</td>\n",
       "      <td>0.419088</td>\n",
       "      <td>0.338088</td>\n",
       "      <td>0.338088</td>\n",
       "      <td>0.340112</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.459879</td>\n",
       "      <td>0.313295</td>\n",
       "      <td>0.437861</td>\n",
       "      <td>0.198431</td>\n",
       "      <td>0.270559</td>\n",
       "      <td>0.385990</td>\n",
       "      <td>0.355865</td>\n",
       "      <td>0.382627</td>\n",
       "      <td>0.403123</td>\n",
       "      <td>0.450534</td>\n",
       "      <td>...</td>\n",
       "      <td>0.326963</td>\n",
       "      <td>0.326963</td>\n",
       "      <td>0.531836</td>\n",
       "      <td>0.437247</td>\n",
       "      <td>0.267803</td>\n",
       "      <td>0.404829</td>\n",
       "      <td>0.326963</td>\n",
       "      <td>0.326963</td>\n",
       "      <td>0.326261</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.400601  0.312500  0.305490  0.091306  0.135300   \n",
       "AAAYYVGYLQPRTF             0.580028  0.513865  0.536929  0.160847  0.209960   \n",
       "AAAYYVGYLQPRTFL            0.648609  0.569664  0.614583  0.189893  0.233327   \n",
       "AAAYYVGYLQPRTFLL           0.691830  0.570180  0.659630  0.214472  0.273981   \n",
       "AAAYYVGYLQPRTFLLK          0.728757  0.602119  0.679677  0.255770  0.328044   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.486435  0.325522  0.467445  0.178505  0.250803   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.483641  0.320667  0.466266  0.179284  0.253753   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.468505  0.317547  0.456332  0.174601  0.253481   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.460330  0.309825  0.443855  0.183948  0.260090   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.459879  0.313295  0.437861  0.198431  0.270559   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.289606  0.310504  0.227737  0.257072  0.384428   \n",
       "AAAYYVGYLQPRTF             0.403355  0.465695  0.351060  0.383100  0.463590   \n",
       "AAAYYVGYLQPRTFL            0.406949  0.483555  0.369354  0.404608  0.482794   \n",
       "AAAYYVGYLQPRTFLL           0.443015  0.508008  0.388828  0.416008  0.506840   \n",
       "AAAYYVGYLQPRTFLLK          0.472579  0.542195  0.418011  0.444002  0.515598   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.409197  0.355279  0.390921  0.414456  0.483765   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.399223  0.350970  0.385805  0.406049  0.472025   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.381635  0.344578  0.378381  0.396111  0.440015   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.378605  0.343438  0.376068  0.395613  0.442305   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.385990  0.355865  0.382627  0.403123  0.450534   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...              0.406164              0.406164   \n",
       "AAAYYVGYLQPRTF             ...              0.437686              0.437686   \n",
       "AAAYYVGYLQPRTFL            ...              0.419085              0.419085   \n",
       "AAAYYVGYLQPRTFLL           ...              0.397634              0.397634   \n",
       "AAAYYVGYLQPRTFLLK          ...              0.389671              0.389671   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...              0.374938              0.374938   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...              0.373139              0.373139   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...              0.350295              0.350295   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...              0.338088              0.338088   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...              0.326963              0.326963   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.584529              0.456067   \n",
       "AAAYYVGYLQPRTF                         0.655237              0.542353   \n",
       "AAAYYVGYLQPRTFL                        0.672601              0.551296   \n",
       "AAAYYVGYLQPRTFLL                       0.675484              0.540213   \n",
       "AAAYYVGYLQPRTFLLK                      0.729996              0.531725   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.562576              0.472952   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.563148              0.467725   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.550179              0.457353   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.533762              0.441113   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.531836              0.437247   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.495886              0.386837   \n",
       "AAAYYVGYLQPRTF                         0.569029              0.442531   \n",
       "AAAYYVGYLQPRTFL                        0.566071              0.435325   \n",
       "AAAYYVGYLQPRTFLL                       0.549265              0.425213   \n",
       "AAAYYVGYLQPRTFLLK                      0.520856              0.410215   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.351609              0.460095   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.341265              0.461041   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.319373              0.450740   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.279456              0.419088   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.267803              0.404829   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.406164              0.406164   \n",
       "AAAYYVGYLQPRTF                         0.437686              0.437686   \n",
       "AAAYYVGYLQPRTFL                        0.419085              0.419085   \n",
       "AAAYYVGYLQPRTFLL                       0.397634              0.397634   \n",
       "AAAYYVGYLQPRTFLLK                      0.389671              0.389671   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.374938              0.374938   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.373139              0.373139   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.350295              0.350295   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.338088              0.338088   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.326963              0.326963   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                          0.315752     0.0  \n",
       "AAAYYVGYLQPRTF                         0.350472     0.0  \n",
       "AAAYYVGYLQPRTFL                        0.347583     0.0  \n",
       "AAAYYVGYLQPRTFLL                       0.331075     0.0  \n",
       "AAAYYVGYLQPRTFLLK                      0.321716     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.348333     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.361561     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.355496     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.340112     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.326261     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = puffin_mhc2_filtered\n",
    "\n",
    "df2 = df.groupby(['Peptide', 'loci']).count().reset_index()[['Peptide', 'loci']]\n",
    "df2['genotype'] = 'unknown'\n",
    "df2['mean_pred'] = 0.\n",
    "df2['binding_likelihood'] = 0.\n",
    "\n",
    "data_pivot = pd.concat([df, df2], sort=False).pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='mean_pred',\n",
    ")\n",
    "data_pivot.to_pickle('mhc2_puffin_pred_affinity_pivot_v1v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.449154</td>\n",
       "      <td>0.275837</td>\n",
       "      <td>0.251057</td>\n",
       "      <td>0.004402</td>\n",
       "      <td>0.019182</td>\n",
       "      <td>0.209804</td>\n",
       "      <td>0.273397</td>\n",
       "      <td>0.121780</td>\n",
       "      <td>0.160569</td>\n",
       "      <td>0.397723</td>\n",
       "      <td>...</td>\n",
       "      <td>0.456711</td>\n",
       "      <td>0.456711</td>\n",
       "      <td>0.911465</td>\n",
       "      <td>0.568704</td>\n",
       "      <td>0.675733</td>\n",
       "      <td>0.380722</td>\n",
       "      <td>0.456711</td>\n",
       "      <td>0.456711</td>\n",
       "      <td>0.286662</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.763526</td>\n",
       "      <td>0.662420</td>\n",
       "      <td>0.696835</td>\n",
       "      <td>0.043372</td>\n",
       "      <td>0.097140</td>\n",
       "      <td>0.453153</td>\n",
       "      <td>0.579785</td>\n",
       "      <td>0.343221</td>\n",
       "      <td>0.408718</td>\n",
       "      <td>0.589835</td>\n",
       "      <td>...</td>\n",
       "      <td>0.526885</td>\n",
       "      <td>0.526885</td>\n",
       "      <td>0.977296</td>\n",
       "      <td>0.753945</td>\n",
       "      <td>0.826413</td>\n",
       "      <td>0.553813</td>\n",
       "      <td>0.526885</td>\n",
       "      <td>0.526885</td>\n",
       "      <td>0.353530</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.830495</td>\n",
       "      <td>0.739159</td>\n",
       "      <td>0.790134</td>\n",
       "      <td>0.095286</td>\n",
       "      <td>0.153303</td>\n",
       "      <td>0.463464</td>\n",
       "      <td>0.607626</td>\n",
       "      <td>0.387753</td>\n",
       "      <td>0.457538</td>\n",
       "      <td>0.624485</td>\n",
       "      <td>...</td>\n",
       "      <td>0.485483</td>\n",
       "      <td>0.485483</td>\n",
       "      <td>0.983811</td>\n",
       "      <td>0.765804</td>\n",
       "      <td>0.816345</td>\n",
       "      <td>0.529432</td>\n",
       "      <td>0.485483</td>\n",
       "      <td>0.485483</td>\n",
       "      <td>0.350396</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.866045</td>\n",
       "      <td>0.723269</td>\n",
       "      <td>0.830479</td>\n",
       "      <td>0.162656</td>\n",
       "      <td>0.245546</td>\n",
       "      <td>0.529735</td>\n",
       "      <td>0.641095</td>\n",
       "      <td>0.433302</td>\n",
       "      <td>0.482508</td>\n",
       "      <td>0.657231</td>\n",
       "      <td>...</td>\n",
       "      <td>0.439450</td>\n",
       "      <td>0.439450</td>\n",
       "      <td>0.984019</td>\n",
       "      <td>0.734214</td>\n",
       "      <td>0.772907</td>\n",
       "      <td>0.498833</td>\n",
       "      <td>0.439450</td>\n",
       "      <td>0.439450</td>\n",
       "      <td>0.325055</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.899770</td>\n",
       "      <td>0.759244</td>\n",
       "      <td>0.852191</td>\n",
       "      <td>0.232364</td>\n",
       "      <td>0.338678</td>\n",
       "      <td>0.576275</td>\n",
       "      <td>0.690256</td>\n",
       "      <td>0.486627</td>\n",
       "      <td>0.532303</td>\n",
       "      <td>0.670455</td>\n",
       "      <td>...</td>\n",
       "      <td>0.423755</td>\n",
       "      <td>0.423755</td>\n",
       "      <td>0.997090</td>\n",
       "      <td>0.715366</td>\n",
       "      <td>0.712002</td>\n",
       "      <td>0.457734</td>\n",
       "      <td>0.423755</td>\n",
       "      <td>0.423755</td>\n",
       "      <td>0.312253</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.589478</td>\n",
       "      <td>0.342606</td>\n",
       "      <td>0.561539</td>\n",
       "      <td>0.087292</td>\n",
       "      <td>0.197640</td>\n",
       "      <td>0.473397</td>\n",
       "      <td>0.381563</td>\n",
       "      <td>0.440111</td>\n",
       "      <td>0.480615</td>\n",
       "      <td>0.612662</td>\n",
       "      <td>...</td>\n",
       "      <td>0.398580</td>\n",
       "      <td>0.398580</td>\n",
       "      <td>0.842378</td>\n",
       "      <td>0.604732</td>\n",
       "      <td>0.335097</td>\n",
       "      <td>0.612148</td>\n",
       "      <td>0.398580</td>\n",
       "      <td>0.398580</td>\n",
       "      <td>0.364732</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.584971</td>\n",
       "      <td>0.336226</td>\n",
       "      <td>0.559315</td>\n",
       "      <td>0.093154</td>\n",
       "      <td>0.206702</td>\n",
       "      <td>0.457544</td>\n",
       "      <td>0.376589</td>\n",
       "      <td>0.432581</td>\n",
       "      <td>0.466811</td>\n",
       "      <td>0.588546</td>\n",
       "      <td>...</td>\n",
       "      <td>0.397415</td>\n",
       "      <td>0.397415</td>\n",
       "      <td>0.838706</td>\n",
       "      <td>0.592264</td>\n",
       "      <td>0.315956</td>\n",
       "      <td>0.613799</td>\n",
       "      <td>0.397415</td>\n",
       "      <td>0.397415</td>\n",
       "      <td>0.389500</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.562452</td>\n",
       "      <td>0.333869</td>\n",
       "      <td>0.544634</td>\n",
       "      <td>0.096750</td>\n",
       "      <td>0.213904</td>\n",
       "      <td>0.430278</td>\n",
       "      <td>0.370461</td>\n",
       "      <td>0.422776</td>\n",
       "      <td>0.451707</td>\n",
       "      <td>0.526737</td>\n",
       "      <td>...</td>\n",
       "      <td>0.357532</td>\n",
       "      <td>0.357532</td>\n",
       "      <td>0.811466</td>\n",
       "      <td>0.567773</td>\n",
       "      <td>0.276164</td>\n",
       "      <td>0.579128</td>\n",
       "      <td>0.357532</td>\n",
       "      <td>0.357532</td>\n",
       "      <td>0.382995</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.550762</td>\n",
       "      <td>0.322279</td>\n",
       "      <td>0.526595</td>\n",
       "      <td>0.115894</td>\n",
       "      <td>0.223661</td>\n",
       "      <td>0.424311</td>\n",
       "      <td>0.368985</td>\n",
       "      <td>0.417780</td>\n",
       "      <td>0.450152</td>\n",
       "      <td>0.532259</td>\n",
       "      <td>...</td>\n",
       "      <td>0.336787</td>\n",
       "      <td>0.336787</td>\n",
       "      <td>0.772785</td>\n",
       "      <td>0.532789</td>\n",
       "      <td>0.208163</td>\n",
       "      <td>0.479738</td>\n",
       "      <td>0.336787</td>\n",
       "      <td>0.336787</td>\n",
       "      <td>0.359745</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.549986</td>\n",
       "      <td>0.328773</td>\n",
       "      <td>0.517844</td>\n",
       "      <td>0.140979</td>\n",
       "      <td>0.242483</td>\n",
       "      <td>0.435545</td>\n",
       "      <td>0.389707</td>\n",
       "      <td>0.428690</td>\n",
       "      <td>0.462583</td>\n",
       "      <td>0.548630</td>\n",
       "      <td>...</td>\n",
       "      <td>0.318870</td>\n",
       "      <td>0.318870</td>\n",
       "      <td>0.763559</td>\n",
       "      <td>0.524521</td>\n",
       "      <td>0.190678</td>\n",
       "      <td>0.437137</td>\n",
       "      <td>0.318870</td>\n",
       "      <td>0.318870</td>\n",
       "      <td>0.338998</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.449154  0.275837  0.251057  0.004402  0.019182   \n",
       "AAAYYVGYLQPRTF             0.763526  0.662420  0.696835  0.043372  0.097140   \n",
       "AAAYYVGYLQPRTFL            0.830495  0.739159  0.790134  0.095286  0.153303   \n",
       "AAAYYVGYLQPRTFLL           0.866045  0.723269  0.830479  0.162656  0.245546   \n",
       "AAAYYVGYLQPRTFLLK          0.899770  0.759244  0.852191  0.232364  0.338678   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.589478  0.342606  0.561539  0.087292  0.197640   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.584971  0.336226  0.559315  0.093154  0.206702   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.562452  0.333869  0.544634  0.096750  0.213904   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.550762  0.322279  0.526595  0.115894  0.223661   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.549986  0.328773  0.517844  0.140979  0.242483   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.209804  0.273397  0.121780  0.160569  0.397723   \n",
       "AAAYYVGYLQPRTF             0.453153  0.579785  0.343221  0.408718  0.589835   \n",
       "AAAYYVGYLQPRTFL            0.463464  0.607626  0.387753  0.457538  0.624485   \n",
       "AAAYYVGYLQPRTFLL           0.529735  0.641095  0.433302  0.482508  0.657231   \n",
       "AAAYYVGYLQPRTFLLK          0.576275  0.690256  0.486627  0.532303  0.670455   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.473397  0.381563  0.440111  0.480615  0.612662   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.457544  0.376589  0.432581  0.466811  0.588546   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.430278  0.370461  0.422776  0.451707  0.526737   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.424311  0.368985  0.417780  0.450152  0.532259   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.435545  0.389707  0.428690  0.462583  0.548630   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...              0.456711              0.456711   \n",
       "AAAYYVGYLQPRTF             ...              0.526885              0.526885   \n",
       "AAAYYVGYLQPRTFL            ...              0.485483              0.485483   \n",
       "AAAYYVGYLQPRTFLL           ...              0.439450              0.439450   \n",
       "AAAYYVGYLQPRTFLLK          ...              0.423755              0.423755   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...              0.398580              0.398580   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...              0.397415              0.397415   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...              0.357532              0.357532   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...              0.336787              0.336787   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...              0.318870              0.318870   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.911465              0.568704   \n",
       "AAAYYVGYLQPRTF                         0.977296              0.753945   \n",
       "AAAYYVGYLQPRTFL                        0.983811              0.765804   \n",
       "AAAYYVGYLQPRTFLL                       0.984019              0.734214   \n",
       "AAAYYVGYLQPRTFLLK                      0.997090              0.715366   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.842378              0.604732   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.838706              0.592264   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.811466              0.567773   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.772785              0.532789   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.763559              0.524521   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.675733              0.380722   \n",
       "AAAYYVGYLQPRTF                         0.826413              0.553813   \n",
       "AAAYYVGYLQPRTFL                        0.816345              0.529432   \n",
       "AAAYYVGYLQPRTFLL                       0.772907              0.498833   \n",
       "AAAYYVGYLQPRTFLLK                      0.712002              0.457734   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.335097              0.612148   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.315956              0.613799   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.276164              0.579128   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.208163              0.479738   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.190678              0.437137   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.456711              0.456711   \n",
       "AAAYYVGYLQPRTF                         0.526885              0.526885   \n",
       "AAAYYVGYLQPRTFL                        0.485483              0.485483   \n",
       "AAAYYVGYLQPRTFLL                       0.439450              0.439450   \n",
       "AAAYYVGYLQPRTFLLK                      0.423755              0.423755   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.398580              0.398580   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.397415              0.397415   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.357532              0.357532   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.336787              0.336787   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.318870              0.318870   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                          0.286662     0.0  \n",
       "AAAYYVGYLQPRTF                         0.353530     0.0  \n",
       "AAAYYVGYLQPRTFL                        0.350396     0.0  \n",
       "AAAYYVGYLQPRTFLL                       0.325055     0.0  \n",
       "AAAYYVGYLQPRTFLLK                      0.312253     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.364732     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.389500     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.382995     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.359745     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.338998     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pivot = pd.concat([df, df2], sort=False).pivot_table(\n",
    "    index='Peptide',\n",
    "    columns=['loci', 'genotype'],\n",
    "    values='binding_likelihood',\n",
    ")\n",
    "data_pivot.to_pickle('mhc2_puffin_binding_likelihood_pivot_v1v2.pkl.gz', protocol=2)\n",
    "data_pivot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute ensembles of NetMHCIIpan3.2 and 4.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "netmhc32_aff_pivot = pd.read_pickle(\n",
    "    'mhc2_haplotype_netmhcii-3.2_pred_affinity_pivot_v1v2.pkl.gz',\n",
    ")\n",
    "\n",
    "netmhc40_aff_pivot = pd.read_pickle(\n",
    "    'mhc2_haplotype_netmhcii-4.0_pred_affinity_pivot_v1v2.pkl.gz',\n",
    ")\n",
    "\n",
    "netmhc32_ba_rank_pivot = pd.read_pickle(\n",
    "    'mhc2_haplotype_netmhcii-3.2_ba_rank_pivot_v2.pkl.gz',\n",
    ")\n",
    "\n",
    "netmhc40_el_rank_pivot = pd.read_pickle(\n",
    "    'mhc2_haplotype_netmhcii-4.0_el_rank_pivot_v2.pkl.gz',\n",
    ")\n",
    "\n",
    "netmhc40_ba_rank_pivot = pd.read_pickle(\n",
    "    'mhc2_haplotype_netmhcii-4.0_ba_rank_pivot_v2.pkl.gz',\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10302</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>64.275</td>\n",
       "      <td>59.650</td>\n",
       "      <td>68.835</td>\n",
       "      <td>51.565</td>\n",
       "      <td>65.980</td>\n",
       "      <td>64.140</td>\n",
       "      <td>61.215</td>\n",
       "      <td>55.335</td>\n",
       "      <td>48.245</td>\n",
       "      <td>65.205</td>\n",
       "      <td>...</td>\n",
       "      <td>61.845</td>\n",
       "      <td>80.165</td>\n",
       "      <td>80.165</td>\n",
       "      <td>81.165</td>\n",
       "      <td>74.465</td>\n",
       "      <td>90.010</td>\n",
       "      <td>66.750</td>\n",
       "      <td>80.165</td>\n",
       "      <td>80.165</td>\n",
       "      <td>74.310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>73.455</td>\n",
       "      <td>75.120</td>\n",
       "      <td>78.685</td>\n",
       "      <td>59.470</td>\n",
       "      <td>70.130</td>\n",
       "      <td>67.860</td>\n",
       "      <td>75.370</td>\n",
       "      <td>62.330</td>\n",
       "      <td>58.140</td>\n",
       "      <td>73.335</td>\n",
       "      <td>...</td>\n",
       "      <td>68.745</td>\n",
       "      <td>80.400</td>\n",
       "      <td>80.400</td>\n",
       "      <td>87.275</td>\n",
       "      <td>80.335</td>\n",
       "      <td>91.220</td>\n",
       "      <td>71.350</td>\n",
       "      <td>80.400</td>\n",
       "      <td>80.400</td>\n",
       "      <td>75.200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>74.710</td>\n",
       "      <td>83.220</td>\n",
       "      <td>80.270</td>\n",
       "      <td>58.335</td>\n",
       "      <td>66.605</td>\n",
       "      <td>62.605</td>\n",
       "      <td>74.825</td>\n",
       "      <td>58.555</td>\n",
       "      <td>54.250</td>\n",
       "      <td>69.085</td>\n",
       "      <td>...</td>\n",
       "      <td>63.005</td>\n",
       "      <td>75.580</td>\n",
       "      <td>75.580</td>\n",
       "      <td>82.425</td>\n",
       "      <td>80.640</td>\n",
       "      <td>86.730</td>\n",
       "      <td>65.180</td>\n",
       "      <td>75.580</td>\n",
       "      <td>75.580</td>\n",
       "      <td>69.775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>70.505</td>\n",
       "      <td>79.030</td>\n",
       "      <td>77.195</td>\n",
       "      <td>51.385</td>\n",
       "      <td>59.000</td>\n",
       "      <td>56.910</td>\n",
       "      <td>67.635</td>\n",
       "      <td>51.110</td>\n",
       "      <td>48.885</td>\n",
       "      <td>64.000</td>\n",
       "      <td>...</td>\n",
       "      <td>58.000</td>\n",
       "      <td>69.810</td>\n",
       "      <td>69.810</td>\n",
       "      <td>74.615</td>\n",
       "      <td>72.185</td>\n",
       "      <td>76.630</td>\n",
       "      <td>58.270</td>\n",
       "      <td>69.810</td>\n",
       "      <td>69.810</td>\n",
       "      <td>62.040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>78.045</td>\n",
       "      <td>81.340</td>\n",
       "      <td>80.755</td>\n",
       "      <td>43.050</td>\n",
       "      <td>51.195</td>\n",
       "      <td>62.300</td>\n",
       "      <td>62.605</td>\n",
       "      <td>47.870</td>\n",
       "      <td>47.880</td>\n",
       "      <td>60.040</td>\n",
       "      <td>...</td>\n",
       "      <td>51.745</td>\n",
       "      <td>61.085</td>\n",
       "      <td>61.085</td>\n",
       "      <td>67.705</td>\n",
       "      <td>69.475</td>\n",
       "      <td>67.915</td>\n",
       "      <td>52.360</td>\n",
       "      <td>61.085</td>\n",
       "      <td>61.085</td>\n",
       "      <td>54.115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>17.675</td>\n",
       "      <td>17.500</td>\n",
       "      <td>27.870</td>\n",
       "      <td>15.120</td>\n",
       "      <td>24.100</td>\n",
       "      <td>40.645</td>\n",
       "      <td>26.405</td>\n",
       "      <td>27.935</td>\n",
       "      <td>29.590</td>\n",
       "      <td>55.095</td>\n",
       "      <td>...</td>\n",
       "      <td>23.240</td>\n",
       "      <td>33.535</td>\n",
       "      <td>33.535</td>\n",
       "      <td>34.000</td>\n",
       "      <td>31.500</td>\n",
       "      <td>36.905</td>\n",
       "      <td>31.520</td>\n",
       "      <td>33.535</td>\n",
       "      <td>33.535</td>\n",
       "      <td>33.080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>17.890</td>\n",
       "      <td>17.500</td>\n",
       "      <td>28.265</td>\n",
       "      <td>15.065</td>\n",
       "      <td>24.250</td>\n",
       "      <td>43.865</td>\n",
       "      <td>26.715</td>\n",
       "      <td>28.455</td>\n",
       "      <td>30.230</td>\n",
       "      <td>55.515</td>\n",
       "      <td>...</td>\n",
       "      <td>25.690</td>\n",
       "      <td>34.015</td>\n",
       "      <td>34.015</td>\n",
       "      <td>34.500</td>\n",
       "      <td>31.500</td>\n",
       "      <td>37.090</td>\n",
       "      <td>31.645</td>\n",
       "      <td>34.015</td>\n",
       "      <td>34.015</td>\n",
       "      <td>33.510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>18.135</td>\n",
       "      <td>17.710</td>\n",
       "      <td>28.690</td>\n",
       "      <td>17.710</td>\n",
       "      <td>24.400</td>\n",
       "      <td>44.470</td>\n",
       "      <td>27.145</td>\n",
       "      <td>31.410</td>\n",
       "      <td>30.680</td>\n",
       "      <td>56.340</td>\n",
       "      <td>...</td>\n",
       "      <td>28.915</td>\n",
       "      <td>35.615</td>\n",
       "      <td>35.615</td>\n",
       "      <td>35.000</td>\n",
       "      <td>32.500</td>\n",
       "      <td>37.675</td>\n",
       "      <td>32.745</td>\n",
       "      <td>35.615</td>\n",
       "      <td>35.615</td>\n",
       "      <td>35.675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>18.365</td>\n",
       "      <td>17.910</td>\n",
       "      <td>29.115</td>\n",
       "      <td>17.840</td>\n",
       "      <td>24.500</td>\n",
       "      <td>44.980</td>\n",
       "      <td>27.505</td>\n",
       "      <td>31.800</td>\n",
       "      <td>31.060</td>\n",
       "      <td>57.095</td>\n",
       "      <td>...</td>\n",
       "      <td>29.555</td>\n",
       "      <td>36.445</td>\n",
       "      <td>36.445</td>\n",
       "      <td>36.000</td>\n",
       "      <td>33.000</td>\n",
       "      <td>38.255</td>\n",
       "      <td>33.390</td>\n",
       "      <td>36.445</td>\n",
       "      <td>36.445</td>\n",
       "      <td>36.380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>21.085</td>\n",
       "      <td>18.135</td>\n",
       "      <td>29.395</td>\n",
       "      <td>17.935</td>\n",
       "      <td>24.650</td>\n",
       "      <td>45.310</td>\n",
       "      <td>27.755</td>\n",
       "      <td>32.130</td>\n",
       "      <td>33.945</td>\n",
       "      <td>57.305</td>\n",
       "      <td>...</td>\n",
       "      <td>30.730</td>\n",
       "      <td>38.105</td>\n",
       "      <td>38.105</td>\n",
       "      <td>37.500</td>\n",
       "      <td>33.500</td>\n",
       "      <td>38.825</td>\n",
       "      <td>34.285</td>\n",
       "      <td>38.105</td>\n",
       "      <td>38.105</td>\n",
       "      <td>37.895</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 280 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                64.275    59.650    68.835    51.565    65.980   \n",
       "AAAYYVGYLQPRTF               73.455    75.120    78.685    59.470    70.130   \n",
       "AAAYYVGYLQPRTFL              74.710    83.220    80.270    58.335    66.605   \n",
       "AAAYYVGYLQPRTFLL             70.505    79.030    77.195    51.385    59.000   \n",
       "AAAYYVGYLQPRTFLLK            78.045    81.340    80.755    43.050    51.195   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM        17.675    17.500    27.870    15.120    24.100   \n",
       "YYVWKSYVHVVDGCNSSTCMMC       17.890    17.500    28.265    15.065    24.250   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY      18.135    17.710    28.690    17.710    24.400   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK     18.365    17.910    29.115    17.840    24.500   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR    21.085    18.135    29.395    17.935    24.650   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                64.140    61.215    55.335    48.245    65.205   \n",
       "AAAYYVGYLQPRTF               67.860    75.370    62.330    58.140    73.335   \n",
       "AAAYYVGYLQPRTFL              62.605    74.825    58.555    54.250    69.085   \n",
       "AAAYYVGYLQPRTFLL             56.910    67.635    51.110    48.885    64.000   \n",
       "AAAYYVGYLQPRTFLLK            62.300    62.605    47.870    47.880    60.040   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM        40.645    26.405    27.935    29.590    55.095   \n",
       "YYVWKSYVHVVDGCNSSTCMMC       43.865    26.715    28.455    30.230    55.515   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY      44.470    27.145    31.410    30.680    56.340   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK     44.980    27.505    31.800    31.060    57.095   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR    45.310    27.755    32.130    33.945    57.305   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10302 HLA-DQA10505-DQB10309   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                61.845                80.165   \n",
       "AAAYYVGYLQPRTF             ...                68.745                80.400   \n",
       "AAAYYVGYLQPRTFL            ...                63.005                75.580   \n",
       "AAAYYVGYLQPRTFLL           ...                58.000                69.810   \n",
       "AAAYYVGYLQPRTFLLK          ...                51.745                61.085   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                23.240                33.535   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                25.690                34.015   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                28.915                35.615   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                29.555                36.445   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                30.730                38.105   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10319 HLA-DQA10505-DQB10402   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                            80.165                81.165   \n",
       "AAAYYVGYLQPRTF                           80.400                87.275   \n",
       "AAAYYVGYLQPRTFL                          75.580                82.425   \n",
       "AAAYYVGYLQPRTFLL                         69.810                74.615   \n",
       "AAAYYVGYLQPRTFLLK                        61.085                67.705   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                    33.535                34.000   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   34.015                34.500   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  35.615                35.000   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 36.445                36.000   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                38.105                37.500   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10501 HLA-DQA10505-DQB10502   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                            74.465                90.010   \n",
       "AAAYYVGYLQPRTF                           80.335                91.220   \n",
       "AAAYYVGYLQPRTFL                          80.640                86.730   \n",
       "AAAYYVGYLQPRTFLL                         72.185                76.630   \n",
       "AAAYYVGYLQPRTFLLK                        69.475                67.915   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                    31.500                36.905   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   31.500                37.090   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  32.500                37.675   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 33.000                38.255   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                33.500                38.825   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10506-DQB10303 HLA-DQA10508-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                            66.750                80.165   \n",
       "AAAYYVGYLQPRTF                           71.350                80.400   \n",
       "AAAYYVGYLQPRTFL                          65.180                75.580   \n",
       "AAAYYVGYLQPRTFLL                         58.270                69.810   \n",
       "AAAYYVGYLQPRTFLLK                        52.360                61.085   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                    31.520                33.535   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   31.645                34.015   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  32.745                35.615   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 33.390                36.445   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                34.285                38.105   \n",
       "\n",
       "loci                                                                   \n",
       "genotype                  HLA-DQA10509-DQB10301 HLA-DQA10601-DQB10301  \n",
       "Peptide                                                                \n",
       "AAAYYVGYLQPRT                            80.165                74.310  \n",
       "AAAYYVGYLQPRTF                           80.400                75.200  \n",
       "AAAYYVGYLQPRTFL                          75.580                69.775  \n",
       "AAAYYVGYLQPRTFLL                         69.810                62.040  \n",
       "AAAYYVGYLQPRTFLLK                        61.085                54.115  \n",
       "...                                         ...                   ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                    33.535                33.080  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   34.015                33.510  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  35.615                35.675  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 36.445                36.380  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                38.105                37.895  \n",
       "\n",
       "[125593 rows x 280 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Average of 3.2 BA %rank and 4.0 EL %rank\n",
    "ens_32ba_40el = (netmhc32_ba_rank_pivot + netmhc40_el_rank_pivot) / 2\n",
    "ens_32ba_40el.to_pickle('mhc2_haplotype_ens_netmhcii-3.2_ba_rank_netmhcii-4.0_el_rank.pkl.gz')\n",
    "ens_32ba_40el"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10302</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>40.410</td>\n",
       "      <td>34.845</td>\n",
       "      <td>47.735</td>\n",
       "      <td>45.705</td>\n",
       "      <td>54.070</td>\n",
       "      <td>38.715</td>\n",
       "      <td>48.080</td>\n",
       "      <td>39.010</td>\n",
       "      <td>29.135</td>\n",
       "      <td>42.805</td>\n",
       "      <td>...</td>\n",
       "      <td>39.830</td>\n",
       "      <td>64.315</td>\n",
       "      <td>64.315</td>\n",
       "      <td>61.720</td>\n",
       "      <td>57.125</td>\n",
       "      <td>73.980</td>\n",
       "      <td>44.550</td>\n",
       "      <td>64.315</td>\n",
       "      <td>64.315</td>\n",
       "      <td>56.865</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>62.335</td>\n",
       "      <td>64.590</td>\n",
       "      <td>68.555</td>\n",
       "      <td>56.265</td>\n",
       "      <td>65.645</td>\n",
       "      <td>53.500</td>\n",
       "      <td>69.305</td>\n",
       "      <td>53.950</td>\n",
       "      <td>47.765</td>\n",
       "      <td>62.370</td>\n",
       "      <td>...</td>\n",
       "      <td>62.370</td>\n",
       "      <td>75.925</td>\n",
       "      <td>75.925</td>\n",
       "      <td>83.850</td>\n",
       "      <td>76.465</td>\n",
       "      <td>88.410</td>\n",
       "      <td>66.020</td>\n",
       "      <td>75.925</td>\n",
       "      <td>75.925</td>\n",
       "      <td>70.560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>75.720</td>\n",
       "      <td>84.395</td>\n",
       "      <td>82.140</td>\n",
       "      <td>68.555</td>\n",
       "      <td>78.895</td>\n",
       "      <td>61.415</td>\n",
       "      <td>77.655</td>\n",
       "      <td>61.650</td>\n",
       "      <td>56.285</td>\n",
       "      <td>68.545</td>\n",
       "      <td>...</td>\n",
       "      <td>61.085</td>\n",
       "      <td>77.150</td>\n",
       "      <td>77.150</td>\n",
       "      <td>82.920</td>\n",
       "      <td>80.040</td>\n",
       "      <td>86.270</td>\n",
       "      <td>65.820</td>\n",
       "      <td>77.150</td>\n",
       "      <td>77.150</td>\n",
       "      <td>71.070</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>71.065</td>\n",
       "      <td>78.640</td>\n",
       "      <td>76.480</td>\n",
       "      <td>59.335</td>\n",
       "      <td>68.725</td>\n",
       "      <td>54.890</td>\n",
       "      <td>67.390</td>\n",
       "      <td>53.540</td>\n",
       "      <td>49.145</td>\n",
       "      <td>63.180</td>\n",
       "      <td>...</td>\n",
       "      <td>57.965</td>\n",
       "      <td>71.060</td>\n",
       "      <td>71.060</td>\n",
       "      <td>74.060</td>\n",
       "      <td>70.990</td>\n",
       "      <td>75.885</td>\n",
       "      <td>59.135</td>\n",
       "      <td>71.060</td>\n",
       "      <td>71.060</td>\n",
       "      <td>65.025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>82.395</td>\n",
       "      <td>84.575</td>\n",
       "      <td>84.650</td>\n",
       "      <td>59.635</td>\n",
       "      <td>67.115</td>\n",
       "      <td>65.950</td>\n",
       "      <td>66.410</td>\n",
       "      <td>57.615</td>\n",
       "      <td>54.085</td>\n",
       "      <td>62.780</td>\n",
       "      <td>...</td>\n",
       "      <td>51.710</td>\n",
       "      <td>62.945</td>\n",
       "      <td>62.945</td>\n",
       "      <td>67.310</td>\n",
       "      <td>69.460</td>\n",
       "      <td>67.245</td>\n",
       "      <td>53.555</td>\n",
       "      <td>62.945</td>\n",
       "      <td>62.945</td>\n",
       "      <td>57.300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>33.125</td>\n",
       "      <td>25.665</td>\n",
       "      <td>33.590</td>\n",
       "      <td>27.885</td>\n",
       "      <td>38.615</td>\n",
       "      <td>59.115</td>\n",
       "      <td>37.935</td>\n",
       "      <td>46.760</td>\n",
       "      <td>45.895</td>\n",
       "      <td>71.040</td>\n",
       "      <td>...</td>\n",
       "      <td>25.230</td>\n",
       "      <td>30.695</td>\n",
       "      <td>30.695</td>\n",
       "      <td>29.815</td>\n",
       "      <td>33.380</td>\n",
       "      <td>38.125</td>\n",
       "      <td>31.495</td>\n",
       "      <td>30.695</td>\n",
       "      <td>30.695</td>\n",
       "      <td>31.155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>33.475</td>\n",
       "      <td>26.320</td>\n",
       "      <td>34.675</td>\n",
       "      <td>28.575</td>\n",
       "      <td>39.010</td>\n",
       "      <td>60.160</td>\n",
       "      <td>38.905</td>\n",
       "      <td>47.475</td>\n",
       "      <td>46.925</td>\n",
       "      <td>71.555</td>\n",
       "      <td>...</td>\n",
       "      <td>25.800</td>\n",
       "      <td>31.645</td>\n",
       "      <td>31.645</td>\n",
       "      <td>30.395</td>\n",
       "      <td>33.515</td>\n",
       "      <td>37.940</td>\n",
       "      <td>31.930</td>\n",
       "      <td>31.645</td>\n",
       "      <td>31.645</td>\n",
       "      <td>31.830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>33.910</td>\n",
       "      <td>27.275</td>\n",
       "      <td>35.650</td>\n",
       "      <td>29.345</td>\n",
       "      <td>39.790</td>\n",
       "      <td>61.000</td>\n",
       "      <td>39.695</td>\n",
       "      <td>48.140</td>\n",
       "      <td>47.720</td>\n",
       "      <td>72.000</td>\n",
       "      <td>...</td>\n",
       "      <td>29.690</td>\n",
       "      <td>33.640</td>\n",
       "      <td>33.640</td>\n",
       "      <td>31.635</td>\n",
       "      <td>34.005</td>\n",
       "      <td>39.000</td>\n",
       "      <td>33.965</td>\n",
       "      <td>33.640</td>\n",
       "      <td>33.640</td>\n",
       "      <td>34.265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>34.375</td>\n",
       "      <td>28.315</td>\n",
       "      <td>36.565</td>\n",
       "      <td>30.125</td>\n",
       "      <td>40.160</td>\n",
       "      <td>61.920</td>\n",
       "      <td>40.360</td>\n",
       "      <td>48.870</td>\n",
       "      <td>48.435</td>\n",
       "      <td>72.340</td>\n",
       "      <td>...</td>\n",
       "      <td>30.425</td>\n",
       "      <td>35.785</td>\n",
       "      <td>35.785</td>\n",
       "      <td>33.170</td>\n",
       "      <td>34.905</td>\n",
       "      <td>39.915</td>\n",
       "      <td>36.190</td>\n",
       "      <td>35.785</td>\n",
       "      <td>35.785</td>\n",
       "      <td>36.040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>34.980</td>\n",
       "      <td>29.515</td>\n",
       "      <td>37.400</td>\n",
       "      <td>30.905</td>\n",
       "      <td>40.690</td>\n",
       "      <td>62.580</td>\n",
       "      <td>40.980</td>\n",
       "      <td>49.620</td>\n",
       "      <td>49.140</td>\n",
       "      <td>72.630</td>\n",
       "      <td>...</td>\n",
       "      <td>31.490</td>\n",
       "      <td>39.195</td>\n",
       "      <td>39.195</td>\n",
       "      <td>34.550</td>\n",
       "      <td>35.810</td>\n",
       "      <td>40.425</td>\n",
       "      <td>38.035</td>\n",
       "      <td>39.195</td>\n",
       "      <td>39.195</td>\n",
       "      <td>38.030</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 280 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                40.410    34.845    47.735    45.705    54.070   \n",
       "AAAYYVGYLQPRTF               62.335    64.590    68.555    56.265    65.645   \n",
       "AAAYYVGYLQPRTFL              75.720    84.395    82.140    68.555    78.895   \n",
       "AAAYYVGYLQPRTFLL             71.065    78.640    76.480    59.335    68.725   \n",
       "AAAYYVGYLQPRTFLLK            82.395    84.575    84.650    59.635    67.115   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM        33.125    25.665    33.590    27.885    38.615   \n",
       "YYVWKSYVHVVDGCNSSTCMMC       33.475    26.320    34.675    28.575    39.010   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY      33.910    27.275    35.650    29.345    39.790   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK     34.375    28.315    36.565    30.125    40.160   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR    34.980    29.515    37.400    30.905    40.690   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                38.715    48.080    39.010    29.135    42.805   \n",
       "AAAYYVGYLQPRTF               53.500    69.305    53.950    47.765    62.370   \n",
       "AAAYYVGYLQPRTFL              61.415    77.655    61.650    56.285    68.545   \n",
       "AAAYYVGYLQPRTFLL             54.890    67.390    53.540    49.145    63.180   \n",
       "AAAYYVGYLQPRTFLLK            65.950    66.410    57.615    54.085    62.780   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM        59.115    37.935    46.760    45.895    71.040   \n",
       "YYVWKSYVHVVDGCNSSTCMMC       60.160    38.905    47.475    46.925    71.555   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY      61.000    39.695    48.140    47.720    72.000   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK     61.920    40.360    48.870    48.435    72.340   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR    62.580    40.980    49.620    49.140    72.630   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10302 HLA-DQA10505-DQB10309   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                39.830                64.315   \n",
       "AAAYYVGYLQPRTF             ...                62.370                75.925   \n",
       "AAAYYVGYLQPRTFL            ...                61.085                77.150   \n",
       "AAAYYVGYLQPRTFLL           ...                57.965                71.060   \n",
       "AAAYYVGYLQPRTFLLK          ...                51.710                62.945   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                25.230                30.695   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                25.800                31.645   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                29.690                33.640   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                30.425                35.785   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                31.490                39.195   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10319 HLA-DQA10505-DQB10402   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                            64.315                61.720   \n",
       "AAAYYVGYLQPRTF                           75.925                83.850   \n",
       "AAAYYVGYLQPRTFL                          77.150                82.920   \n",
       "AAAYYVGYLQPRTFLL                         71.060                74.060   \n",
       "AAAYYVGYLQPRTFLLK                        62.945                67.310   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                    30.695                29.815   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   31.645                30.395   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  33.640                31.635   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 35.785                33.170   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                39.195                34.550   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10501 HLA-DQA10505-DQB10502   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                            57.125                73.980   \n",
       "AAAYYVGYLQPRTF                           76.465                88.410   \n",
       "AAAYYVGYLQPRTFL                          80.040                86.270   \n",
       "AAAYYVGYLQPRTFLL                         70.990                75.885   \n",
       "AAAYYVGYLQPRTFLLK                        69.460                67.245   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                    33.380                38.125   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   33.515                37.940   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  34.005                39.000   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 34.905                39.915   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                35.810                40.425   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10506-DQB10303 HLA-DQA10508-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                            44.550                64.315   \n",
       "AAAYYVGYLQPRTF                           66.020                75.925   \n",
       "AAAYYVGYLQPRTFL                          65.820                77.150   \n",
       "AAAYYVGYLQPRTFLL                         59.135                71.060   \n",
       "AAAYYVGYLQPRTFLLK                        53.555                62.945   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                    31.495                30.695   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   31.930                31.645   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  33.965                33.640   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 36.190                35.785   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                38.035                39.195   \n",
       "\n",
       "loci                                                                   \n",
       "genotype                  HLA-DQA10509-DQB10301 HLA-DQA10601-DQB10301  \n",
       "Peptide                                                                \n",
       "AAAYYVGYLQPRT                            64.315                56.865  \n",
       "AAAYYVGYLQPRTF                           75.925                70.560  \n",
       "AAAYYVGYLQPRTFL                          77.150                71.070  \n",
       "AAAYYVGYLQPRTFLL                         71.060                65.025  \n",
       "AAAYYVGYLQPRTFLLK                        62.945                57.300  \n",
       "...                                         ...                   ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                    30.695                31.155  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                   31.645                31.830  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                  33.640                34.265  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                 35.785                36.040  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                39.195                38.030  \n",
       "\n",
       "[125593 rows x 280 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Average of 4.0 BA %rank and 4.0 EL %rank\n",
    "ens_40ba_40el = (netmhc40_ba_rank_pivot + netmhc40_el_rank_pivot) / 2\n",
    "ens_40ba_40el.to_pickle('mhc2_haplotype_ens_netmhcii-4.0_ba_rank_netmhcii-4.0_el_rank.pkl.gz')\n",
    "ens_40ba_40el"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.322484</td>\n",
       "      <td>0.258498</td>\n",
       "      <td>0.200212</td>\n",
       "      <td>0.163577</td>\n",
       "      <td>0.076705</td>\n",
       "      <td>0.254549</td>\n",
       "      <td>0.222623</td>\n",
       "      <td>0.192972</td>\n",
       "      <td>0.246155</td>\n",
       "      <td>0.289692</td>\n",
       "      <td>...</td>\n",
       "      <td>0.280495</td>\n",
       "      <td>0.280495</td>\n",
       "      <td>0.341747</td>\n",
       "      <td>0.319118</td>\n",
       "      <td>0.254751</td>\n",
       "      <td>0.266447</td>\n",
       "      <td>0.280495</td>\n",
       "      <td>0.280495</td>\n",
       "      <td>0.237389</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.490004</td>\n",
       "      <td>0.394190</td>\n",
       "      <td>0.267980</td>\n",
       "      <td>0.219368</td>\n",
       "      <td>0.093999</td>\n",
       "      <td>0.339910</td>\n",
       "      <td>0.290397</td>\n",
       "      <td>0.250138</td>\n",
       "      <td>0.348641</td>\n",
       "      <td>0.388828</td>\n",
       "      <td>...</td>\n",
       "      <td>0.347394</td>\n",
       "      <td>0.347394</td>\n",
       "      <td>0.463792</td>\n",
       "      <td>0.391967</td>\n",
       "      <td>0.315552</td>\n",
       "      <td>0.360864</td>\n",
       "      <td>0.347394</td>\n",
       "      <td>0.347394</td>\n",
       "      <td>0.293370</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.637906</td>\n",
       "      <td>0.506652</td>\n",
       "      <td>0.348317</td>\n",
       "      <td>0.288839</td>\n",
       "      <td>0.115880</td>\n",
       "      <td>0.419452</td>\n",
       "      <td>0.355387</td>\n",
       "      <td>0.305167</td>\n",
       "      <td>0.441926</td>\n",
       "      <td>0.473647</td>\n",
       "      <td>...</td>\n",
       "      <td>0.385707</td>\n",
       "      <td>0.385707</td>\n",
       "      <td>0.524090</td>\n",
       "      <td>0.435281</td>\n",
       "      <td>0.352456</td>\n",
       "      <td>0.396362</td>\n",
       "      <td>0.385707</td>\n",
       "      <td>0.385707</td>\n",
       "      <td>0.316031</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.622981</td>\n",
       "      <td>0.465672</td>\n",
       "      <td>0.329097</td>\n",
       "      <td>0.259959</td>\n",
       "      <td>0.110437</td>\n",
       "      <td>0.417310</td>\n",
       "      <td>0.324221</td>\n",
       "      <td>0.292519</td>\n",
       "      <td>0.413951</td>\n",
       "      <td>0.466066</td>\n",
       "      <td>...</td>\n",
       "      <td>0.384422</td>\n",
       "      <td>0.384422</td>\n",
       "      <td>0.515780</td>\n",
       "      <td>0.426203</td>\n",
       "      <td>0.345868</td>\n",
       "      <td>0.389271</td>\n",
       "      <td>0.384422</td>\n",
       "      <td>0.384422</td>\n",
       "      <td>0.317764</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.642850</td>\n",
       "      <td>0.465804</td>\n",
       "      <td>0.344080</td>\n",
       "      <td>0.278532</td>\n",
       "      <td>0.114585</td>\n",
       "      <td>0.439120</td>\n",
       "      <td>0.335532</td>\n",
       "      <td>0.305352</td>\n",
       "      <td>0.435093</td>\n",
       "      <td>0.475456</td>\n",
       "      <td>...</td>\n",
       "      <td>0.372327</td>\n",
       "      <td>0.372327</td>\n",
       "      <td>0.509578</td>\n",
       "      <td>0.430653</td>\n",
       "      <td>0.339827</td>\n",
       "      <td>0.371787</td>\n",
       "      <td>0.372327</td>\n",
       "      <td>0.372327</td>\n",
       "      <td>0.304479</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.397788</td>\n",
       "      <td>0.266286</td>\n",
       "      <td>0.202335</td>\n",
       "      <td>0.213601</td>\n",
       "      <td>0.098355</td>\n",
       "      <td>0.345216</td>\n",
       "      <td>0.235960</td>\n",
       "      <td>0.248369</td>\n",
       "      <td>0.345126</td>\n",
       "      <td>0.393548</td>\n",
       "      <td>...</td>\n",
       "      <td>0.324329</td>\n",
       "      <td>0.324329</td>\n",
       "      <td>0.349465</td>\n",
       "      <td>0.323933</td>\n",
       "      <td>0.254110</td>\n",
       "      <td>0.316918</td>\n",
       "      <td>0.324329</td>\n",
       "      <td>0.324329</td>\n",
       "      <td>0.300982</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.398960</td>\n",
       "      <td>0.268777</td>\n",
       "      <td>0.204457</td>\n",
       "      <td>0.217067</td>\n",
       "      <td>0.098649</td>\n",
       "      <td>0.347618</td>\n",
       "      <td>0.238340</td>\n",
       "      <td>0.249769</td>\n",
       "      <td>0.347843</td>\n",
       "      <td>0.394617</td>\n",
       "      <td>...</td>\n",
       "      <td>0.330587</td>\n",
       "      <td>0.330587</td>\n",
       "      <td>0.352533</td>\n",
       "      <td>0.324522</td>\n",
       "      <td>0.255056</td>\n",
       "      <td>0.320681</td>\n",
       "      <td>0.330587</td>\n",
       "      <td>0.330587</td>\n",
       "      <td>0.306045</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.400411</td>\n",
       "      <td>0.272610</td>\n",
       "      <td>0.205723</td>\n",
       "      <td>0.219780</td>\n",
       "      <td>0.099916</td>\n",
       "      <td>0.349694</td>\n",
       "      <td>0.239857</td>\n",
       "      <td>0.251226</td>\n",
       "      <td>0.350417</td>\n",
       "      <td>0.396423</td>\n",
       "      <td>...</td>\n",
       "      <td>0.343593</td>\n",
       "      <td>0.343593</td>\n",
       "      <td>0.359359</td>\n",
       "      <td>0.326625</td>\n",
       "      <td>0.257858</td>\n",
       "      <td>0.330206</td>\n",
       "      <td>0.343593</td>\n",
       "      <td>0.343593</td>\n",
       "      <td>0.319666</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.402106</td>\n",
       "      <td>0.275694</td>\n",
       "      <td>0.207381</td>\n",
       "      <td>0.223090</td>\n",
       "      <td>0.100753</td>\n",
       "      <td>0.352444</td>\n",
       "      <td>0.241563</td>\n",
       "      <td>0.253076</td>\n",
       "      <td>0.352767</td>\n",
       "      <td>0.397452</td>\n",
       "      <td>...</td>\n",
       "      <td>0.354990</td>\n",
       "      <td>0.354990</td>\n",
       "      <td>0.367243</td>\n",
       "      <td>0.330172</td>\n",
       "      <td>0.260298</td>\n",
       "      <td>0.339698</td>\n",
       "      <td>0.354990</td>\n",
       "      <td>0.354990</td>\n",
       "      <td>0.328567</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.405549</td>\n",
       "      <td>0.281402</td>\n",
       "      <td>0.209714</td>\n",
       "      <td>0.226775</td>\n",
       "      <td>0.101743</td>\n",
       "      <td>0.354909</td>\n",
       "      <td>0.243884</td>\n",
       "      <td>0.256366</td>\n",
       "      <td>0.355690</td>\n",
       "      <td>0.398470</td>\n",
       "      <td>...</td>\n",
       "      <td>0.365810</td>\n",
       "      <td>0.365810</td>\n",
       "      <td>0.375528</td>\n",
       "      <td>0.334020</td>\n",
       "      <td>0.261940</td>\n",
       "      <td>0.345987</td>\n",
       "      <td>0.365810</td>\n",
       "      <td>0.365810</td>\n",
       "      <td>0.337403</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.322484  0.258498  0.200212  0.163577  0.076705   \n",
       "AAAYYVGYLQPRTF             0.490004  0.394190  0.267980  0.219368  0.093999   \n",
       "AAAYYVGYLQPRTFL            0.637906  0.506652  0.348317  0.288839  0.115880   \n",
       "AAAYYVGYLQPRTFLL           0.622981  0.465672  0.329097  0.259959  0.110437   \n",
       "AAAYYVGYLQPRTFLLK          0.642850  0.465804  0.344080  0.278532  0.114585   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.397788  0.266286  0.202335  0.213601  0.098355   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.398960  0.268777  0.204457  0.217067  0.098649   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.400411  0.272610  0.205723  0.219780  0.099916   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.402106  0.275694  0.207381  0.223090  0.100753   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.405549  0.281402  0.209714  0.226775  0.101743   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.254549  0.222623  0.192972  0.246155  0.289692   \n",
       "AAAYYVGYLQPRTF             0.339910  0.290397  0.250138  0.348641  0.388828   \n",
       "AAAYYVGYLQPRTFL            0.419452  0.355387  0.305167  0.441926  0.473647   \n",
       "AAAYYVGYLQPRTFLL           0.417310  0.324221  0.292519  0.413951  0.466066   \n",
       "AAAYYVGYLQPRTFLLK          0.439120  0.335532  0.305352  0.435093  0.475456   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.345216  0.235960  0.248369  0.345126  0.393548   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.347618  0.238340  0.249769  0.347843  0.394617   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.349694  0.239857  0.251226  0.350417  0.396423   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.352444  0.241563  0.253076  0.352767  0.397452   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.354909  0.243884  0.256366  0.355690  0.398470   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...              0.280495              0.280495   \n",
       "AAAYYVGYLQPRTF             ...              0.347394              0.347394   \n",
       "AAAYYVGYLQPRTFL            ...              0.385707              0.385707   \n",
       "AAAYYVGYLQPRTFLL           ...              0.384422              0.384422   \n",
       "AAAYYVGYLQPRTFLLK          ...              0.372327              0.372327   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...              0.324329              0.324329   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...              0.330587              0.330587   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...              0.343593              0.343593   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...              0.354990              0.354990   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...              0.365810              0.365810   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.341747              0.319118   \n",
       "AAAYYVGYLQPRTF                         0.463792              0.391967   \n",
       "AAAYYVGYLQPRTFL                        0.524090              0.435281   \n",
       "AAAYYVGYLQPRTFLL                       0.515780              0.426203   \n",
       "AAAYYVGYLQPRTFLLK                      0.509578              0.430653   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.349465              0.323933   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.352533              0.324522   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.359359              0.326625   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.367243              0.330172   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.375528              0.334020   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.254751              0.266447   \n",
       "AAAYYVGYLQPRTF                         0.315552              0.360864   \n",
       "AAAYYVGYLQPRTFL                        0.352456              0.396362   \n",
       "AAAYYVGYLQPRTFLL                       0.345868              0.389271   \n",
       "AAAYYVGYLQPRTFLLK                      0.339827              0.371787   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.254110              0.316918   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.255056              0.320681   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.257858              0.330206   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.260298              0.339698   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.261940              0.345987   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.280495              0.280495   \n",
       "AAAYYVGYLQPRTF                         0.347394              0.347394   \n",
       "AAAYYVGYLQPRTFL                        0.385707              0.385707   \n",
       "AAAYYVGYLQPRTFLL                       0.384422              0.384422   \n",
       "AAAYYVGYLQPRTFLLK                      0.372327              0.372327   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.324329              0.324329   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.330587              0.330587   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.343593              0.343593   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.354990              0.354990   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.365810              0.365810   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                          0.237389     0.0  \n",
       "AAAYYVGYLQPRTF                         0.293370     0.0  \n",
       "AAAYYVGYLQPRTFL                        0.316031     0.0  \n",
       "AAAYYVGYLQPRTFLL                       0.317764     0.0  \n",
       "AAAYYVGYLQPRTFLLK                      0.304479     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.300982     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.306045     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.319666     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.328567     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.337403     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Average of 3.2 predicted affinity and 4.0 predicted affinity\n",
    "ens_32aff_40aff = 1 - np.log(((50000 ** (1-netmhc32_aff_pivot)) + (50000 ** (1-netmhc40_aff_pivot))) / 2) / np.log(50000)\n",
    "ens_32aff_40aff.to_pickle('mhc2_haplotype_ens_netmhcii-3.2_aff_netmhcii-4.0_aff.pkl.gz')\n",
    "ens_32aff_40aff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.554</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.266</td>\n",
       "      <td>0.199</td>\n",
       "      <td>0.091</td>\n",
       "      <td>0.403</td>\n",
       "      <td>0.295</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.424</td>\n",
       "      <td>...</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.428</td>\n",
       "      <td>0.467</td>\n",
       "      <td>0.367</td>\n",
       "      <td>0.336</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.259</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.642</td>\n",
       "      <td>0.456</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.108</td>\n",
       "      <td>0.458</td>\n",
       "      <td>0.370</td>\n",
       "      <td>0.327</td>\n",
       "      <td>0.443</td>\n",
       "      <td>0.489</td>\n",
       "      <td>...</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.504</td>\n",
       "      <td>0.524</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.385</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.292</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.659</td>\n",
       "      <td>0.486</td>\n",
       "      <td>0.346</td>\n",
       "      <td>0.297</td>\n",
       "      <td>0.112</td>\n",
       "      <td>0.466</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.357</td>\n",
       "      <td>0.470</td>\n",
       "      <td>0.506</td>\n",
       "      <td>...</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.537</td>\n",
       "      <td>0.559</td>\n",
       "      <td>0.438</td>\n",
       "      <td>0.406</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.307</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.647</td>\n",
       "      <td>0.460</td>\n",
       "      <td>0.343</td>\n",
       "      <td>0.285</td>\n",
       "      <td>0.113</td>\n",
       "      <td>0.467</td>\n",
       "      <td>0.387</td>\n",
       "      <td>0.348</td>\n",
       "      <td>0.457</td>\n",
       "      <td>0.500</td>\n",
       "      <td>...</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.551</td>\n",
       "      <td>0.566</td>\n",
       "      <td>0.441</td>\n",
       "      <td>0.404</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.309</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.622</td>\n",
       "      <td>0.432</td>\n",
       "      <td>0.321</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.109</td>\n",
       "      <td>0.445</td>\n",
       "      <td>0.371</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.442</td>\n",
       "      <td>0.478</td>\n",
       "      <td>...</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.548</td>\n",
       "      <td>0.558</td>\n",
       "      <td>0.430</td>\n",
       "      <td>0.391</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.394</td>\n",
       "      <td>0.302</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.396</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.201</td>\n",
       "      <td>0.243</td>\n",
       "      <td>0.102</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.259</td>\n",
       "      <td>0.267</td>\n",
       "      <td>0.346</td>\n",
       "      <td>0.372</td>\n",
       "      <td>...</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.427</td>\n",
       "      <td>0.472</td>\n",
       "      <td>0.335</td>\n",
       "      <td>0.353</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.333</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.397</td>\n",
       "      <td>0.261</td>\n",
       "      <td>0.202</td>\n",
       "      <td>0.246</td>\n",
       "      <td>0.102</td>\n",
       "      <td>0.334</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.269</td>\n",
       "      <td>0.348</td>\n",
       "      <td>0.373</td>\n",
       "      <td>...</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.429</td>\n",
       "      <td>0.473</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.397</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.398</td>\n",
       "      <td>0.264</td>\n",
       "      <td>0.202</td>\n",
       "      <td>0.248</td>\n",
       "      <td>0.103</td>\n",
       "      <td>0.336</td>\n",
       "      <td>0.261</td>\n",
       "      <td>0.271</td>\n",
       "      <td>0.350</td>\n",
       "      <td>0.375</td>\n",
       "      <td>...</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.433</td>\n",
       "      <td>0.476</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.408</td>\n",
       "      <td>0.351</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.399</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.203</td>\n",
       "      <td>0.251</td>\n",
       "      <td>0.104</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.263</td>\n",
       "      <td>0.273</td>\n",
       "      <td>0.352</td>\n",
       "      <td>0.376</td>\n",
       "      <td>...</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.478</td>\n",
       "      <td>0.340</td>\n",
       "      <td>0.365</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.413</td>\n",
       "      <td>0.357</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.402</td>\n",
       "      <td>0.270</td>\n",
       "      <td>0.205</td>\n",
       "      <td>0.255</td>\n",
       "      <td>0.105</td>\n",
       "      <td>0.340</td>\n",
       "      <td>0.266</td>\n",
       "      <td>0.278</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.377</td>\n",
       "      <td>...</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.445</td>\n",
       "      <td>0.481</td>\n",
       "      <td>0.342</td>\n",
       "      <td>0.367</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.363</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 0.554     0.364     0.266     0.199     0.091   \n",
       "AAAYYVGYLQPRTF                0.642     0.456     0.327     0.265     0.108   \n",
       "AAAYYVGYLQPRTFL               0.659     0.486     0.346     0.297     0.112   \n",
       "AAAYYVGYLQPRTFLL              0.647     0.460     0.343     0.285     0.113   \n",
       "AAAYYVGYLQPRTFLLK             0.622     0.432     0.321     0.277     0.109   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         0.396     0.260     0.201     0.243     0.102   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        0.397     0.261     0.202     0.246     0.102   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       0.398     0.264     0.202     0.248     0.103   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      0.399     0.265     0.203     0.251     0.104   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     0.402     0.270     0.205     0.255     0.105   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT                 0.403     0.295     0.265     0.364     0.424   \n",
       "AAAYYVGYLQPRTF                0.458     0.370     0.327     0.443     0.489   \n",
       "AAAYYVGYLQPRTFL               0.466     0.397     0.357     0.470     0.506   \n",
       "AAAYYVGYLQPRTFLL              0.467     0.387     0.348     0.457     0.500   \n",
       "AAAYYVGYLQPRTFLLK             0.445     0.371     0.332     0.442     0.478   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM         0.332     0.259     0.267     0.346     0.372   \n",
       "YYVWKSYVHVVDGCNSSTCMMC        0.334     0.260     0.269     0.348     0.373   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY       0.336     0.261     0.271     0.350     0.375   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK      0.338     0.263     0.273     0.352     0.376   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR     0.340     0.266     0.278     0.355     0.377   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...                 0.327                 0.327   \n",
       "AAAYYVGYLQPRTF             ...                 0.364                 0.364   \n",
       "AAAYYVGYLQPRTFL            ...                 0.388                 0.388   \n",
       "AAAYYVGYLQPRTFLL           ...                 0.396                 0.396   \n",
       "AAAYYVGYLQPRTFLLK          ...                 0.394                 0.394   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...                 0.392                 0.392   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...                 0.397                 0.397   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...                 0.408                 0.408   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...                 0.413                 0.413   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...                 0.415                 0.415   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             0.428                 0.467   \n",
       "AAAYYVGYLQPRTF                            0.504                 0.524   \n",
       "AAAYYVGYLQPRTFL                           0.537                 0.559   \n",
       "AAAYYVGYLQPRTFLL                          0.551                 0.566   \n",
       "AAAYYVGYLQPRTFLLK                         0.548                 0.558   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.427                 0.472   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.429                 0.473   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.433                 0.476   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.437                 0.478   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.445                 0.481   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             0.367                 0.336   \n",
       "AAAYYVGYLQPRTF                            0.415                 0.385   \n",
       "AAAYYVGYLQPRTFL                           0.438                 0.406   \n",
       "AAAYYVGYLQPRTFLL                          0.441                 0.404   \n",
       "AAAYYVGYLQPRTFLLK                         0.430                 0.391   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.335                 0.353   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.338                 0.355   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.339                 0.361   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.340                 0.365   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.342                 0.367   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                             0.327                 0.327   \n",
       "AAAYYVGYLQPRTF                            0.364                 0.364   \n",
       "AAAYYVGYLQPRTFL                           0.388                 0.388   \n",
       "AAAYYVGYLQPRTFLL                          0.396                 0.396   \n",
       "AAAYYVGYLQPRTFLLK                         0.394                 0.394   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.392                 0.392   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.397                 0.397   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.408                 0.408   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.413                 0.413   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.415                 0.415   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                             0.259     0.0  \n",
       "AAAYYVGYLQPRTF                            0.292     0.0  \n",
       "AAAYYVGYLQPRTFL                           0.307     0.0  \n",
       "AAAYYVGYLQPRTFLL                          0.309     0.0  \n",
       "AAAYYVGYLQPRTFLLK                         0.302     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                     0.333     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                    0.339     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                   0.351     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK                  0.357     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR                 0.363     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "netmhc32_aff_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>loci</th>\n",
       "      <th colspan=\"10\" halign=\"left\">DRB1</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"10\" halign=\"left\">HLA-DQ</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>genotype</th>\n",
       "      <th>DRB1_0101</th>\n",
       "      <th>DRB1_0102</th>\n",
       "      <th>DRB1_0103</th>\n",
       "      <th>DRB1_0301</th>\n",
       "      <th>DRB1_0302</th>\n",
       "      <th>DRB1_0401</th>\n",
       "      <th>DRB1_0402</th>\n",
       "      <th>DRB1_0403</th>\n",
       "      <th>DRB1_0404</th>\n",
       "      <th>DRB1_0405</th>\n",
       "      <th>...</th>\n",
       "      <th>HLA-DQA10505-DQB10309</th>\n",
       "      <th>HLA-DQA10505-DQB10319</th>\n",
       "      <th>HLA-DQA10505-DQB10402</th>\n",
       "      <th>HLA-DQA10505-DQB10501</th>\n",
       "      <th>HLA-DQA10505-DQB10502</th>\n",
       "      <th>HLA-DQA10506-DQB10303</th>\n",
       "      <th>HLA-DQA10508-DQB10301</th>\n",
       "      <th>HLA-DQA10509-DQB10301</th>\n",
       "      <th>HLA-DQA10601-DQB10301</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peptide</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRT</th>\n",
       "      <td>0.262275</td>\n",
       "      <td>0.210513</td>\n",
       "      <td>0.162170</td>\n",
       "      <td>0.138032</td>\n",
       "      <td>0.064328</td>\n",
       "      <td>0.200257</td>\n",
       "      <td>0.182535</td>\n",
       "      <td>0.152988</td>\n",
       "      <td>0.196000</td>\n",
       "      <td>0.237120</td>\n",
       "      <td>...</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.297920</td>\n",
       "      <td>0.264890</td>\n",
       "      <td>0.205538</td>\n",
       "      <td>0.227213</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.249702</td>\n",
       "      <td>0.219885</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTF</th>\n",
       "      <td>0.435325</td>\n",
       "      <td>0.357479</td>\n",
       "      <td>0.232249</td>\n",
       "      <td>0.188956</td>\n",
       "      <td>0.081843</td>\n",
       "      <td>0.289715</td>\n",
       "      <td>0.248274</td>\n",
       "      <td>0.208763</td>\n",
       "      <td>0.302934</td>\n",
       "      <td>0.341891</td>\n",
       "      <td>...</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.435867</td>\n",
       "      <td>0.339701</td>\n",
       "      <td>0.268764</td>\n",
       "      <td>0.341743</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.333322</td>\n",
       "      <td>0.294761</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFL</th>\n",
       "      <td>0.620743</td>\n",
       "      <td>0.533288</td>\n",
       "      <td>0.350694</td>\n",
       "      <td>0.281340</td>\n",
       "      <td>0.119930</td>\n",
       "      <td>0.388640</td>\n",
       "      <td>0.326797</td>\n",
       "      <td>0.272157</td>\n",
       "      <td>0.420422</td>\n",
       "      <td>0.449729</td>\n",
       "      <td>...</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.512765</td>\n",
       "      <td>0.384206</td>\n",
       "      <td>0.308803</td>\n",
       "      <td>0.387634</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.383470</td>\n",
       "      <td>0.326042</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLL</th>\n",
       "      <td>0.603934</td>\n",
       "      <td>0.471715</td>\n",
       "      <td>0.317015</td>\n",
       "      <td>0.240276</td>\n",
       "      <td>0.107944</td>\n",
       "      <td>0.385170</td>\n",
       "      <td>0.287178</td>\n",
       "      <td>0.258092</td>\n",
       "      <td>0.384697</td>\n",
       "      <td>0.441300</td>\n",
       "      <td>...</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.490339</td>\n",
       "      <td>0.372928</td>\n",
       "      <td>0.299992</td>\n",
       "      <td>0.376569</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.374134</td>\n",
       "      <td>0.327447</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAYYVGYLQPRTFLLK</th>\n",
       "      <td>0.669816</td>\n",
       "      <td>0.519657</td>\n",
       "      <td>0.374913</td>\n",
       "      <td>0.280090</td>\n",
       "      <td>0.120530</td>\n",
       "      <td>0.433591</td>\n",
       "      <td>0.309963</td>\n",
       "      <td>0.284692</td>\n",
       "      <td>0.428667</td>\n",
       "      <td>0.472981</td>\n",
       "      <td>...</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.482520</td>\n",
       "      <td>0.379043</td>\n",
       "      <td>0.295066</td>\n",
       "      <td>0.355889</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.354784</td>\n",
       "      <td>0.307027</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMM</th>\n",
       "      <td>0.399611</td>\n",
       "      <td>0.273032</td>\n",
       "      <td>0.203689</td>\n",
       "      <td>0.191331</td>\n",
       "      <td>0.094848</td>\n",
       "      <td>0.360642</td>\n",
       "      <td>0.217532</td>\n",
       "      <td>0.232871</td>\n",
       "      <td>0.344261</td>\n",
       "      <td>0.421699</td>\n",
       "      <td>...</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.307903</td>\n",
       "      <td>0.269684</td>\n",
       "      <td>0.211645</td>\n",
       "      <td>0.291034</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.285683</td>\n",
       "      <td>0.277248</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMC</th>\n",
       "      <td>0.400962</td>\n",
       "      <td>0.277270</td>\n",
       "      <td>0.206981</td>\n",
       "      <td>0.195065</td>\n",
       "      <td>0.095416</td>\n",
       "      <td>0.363595</td>\n",
       "      <td>0.220804</td>\n",
       "      <td>0.233858</td>\n",
       "      <td>0.347686</td>\n",
       "      <td>0.422885</td>\n",
       "      <td>...</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.311268</td>\n",
       "      <td>0.270227</td>\n",
       "      <td>0.212058</td>\n",
       "      <td>0.295712</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.292342</td>\n",
       "      <td>0.281802</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCY</th>\n",
       "      <td>0.402886</td>\n",
       "      <td>0.282106</td>\n",
       "      <td>0.209602</td>\n",
       "      <td>0.198191</td>\n",
       "      <td>0.096931</td>\n",
       "      <td>0.365776</td>\n",
       "      <td>0.222661</td>\n",
       "      <td>0.234946</td>\n",
       "      <td>0.350836</td>\n",
       "      <td>0.424360</td>\n",
       "      <td>...</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.318901</td>\n",
       "      <td>0.272231</td>\n",
       "      <td>0.215326</td>\n",
       "      <td>0.307148</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.306004</td>\n",
       "      <td>0.296308</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYK</th>\n",
       "      <td>0.405321</td>\n",
       "      <td>0.287790</td>\n",
       "      <td>0.211981</td>\n",
       "      <td>0.201682</td>\n",
       "      <td>0.097617</td>\n",
       "      <td>0.369570</td>\n",
       "      <td>0.224174</td>\n",
       "      <td>0.236694</td>\n",
       "      <td>0.353541</td>\n",
       "      <td>0.425438</td>\n",
       "      <td>...</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.327946</td>\n",
       "      <td>0.275950</td>\n",
       "      <td>0.218148</td>\n",
       "      <td>0.319855</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.319624</td>\n",
       "      <td>0.306854</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YYVWKSYVHVVDGCNSSTCMMCYKR</th>\n",
       "      <td>0.409240</td>\n",
       "      <td>0.294411</td>\n",
       "      <td>0.214682</td>\n",
       "      <td>0.205183</td>\n",
       "      <td>0.098597</td>\n",
       "      <td>0.372695</td>\n",
       "      <td>0.226051</td>\n",
       "      <td>0.238848</td>\n",
       "      <td>0.356386</td>\n",
       "      <td>0.426486</td>\n",
       "      <td>...</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.336319</td>\n",
       "      <td>0.279893</td>\n",
       "      <td>0.219695</td>\n",
       "      <td>0.328877</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.333877</td>\n",
       "      <td>0.317379</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>125593 rows × 283 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "loci                           DRB1                                          \\\n",
       "genotype                  DRB1_0101 DRB1_0102 DRB1_0103 DRB1_0301 DRB1_0302   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.262275  0.210513  0.162170  0.138032  0.064328   \n",
       "AAAYYVGYLQPRTF             0.435325  0.357479  0.232249  0.188956  0.081843   \n",
       "AAAYYVGYLQPRTFL            0.620743  0.533288  0.350694  0.281340  0.119930   \n",
       "AAAYYVGYLQPRTFLL           0.603934  0.471715  0.317015  0.240276  0.107944   \n",
       "AAAYYVGYLQPRTFLLK          0.669816  0.519657  0.374913  0.280090  0.120530   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.399611  0.273032  0.203689  0.191331  0.094848   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.400962  0.277270  0.206981  0.195065  0.095416   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.402886  0.282106  0.209602  0.198191  0.096931   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.405321  0.287790  0.211981  0.201682  0.097617   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.409240  0.294411  0.214682  0.205183  0.098597   \n",
       "\n",
       "loci                                                                         \\\n",
       "genotype                  DRB1_0401 DRB1_0402 DRB1_0403 DRB1_0404 DRB1_0405   \n",
       "Peptide                                                                       \n",
       "AAAYYVGYLQPRT              0.200257  0.182535  0.152988  0.196000  0.237120   \n",
       "AAAYYVGYLQPRTF             0.289715  0.248274  0.208763  0.302934  0.341891   \n",
       "AAAYYVGYLQPRTFL            0.388640  0.326797  0.272157  0.420422  0.449729   \n",
       "AAAYYVGYLQPRTFLL           0.385170  0.287178  0.258092  0.384697  0.441300   \n",
       "AAAYYVGYLQPRTFLLK          0.433591  0.309963  0.284692  0.428667  0.472981   \n",
       "...                             ...       ...       ...       ...       ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      0.360642  0.217532  0.232871  0.344261  0.421699   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     0.363595  0.220804  0.233858  0.347686  0.422885   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    0.365776  0.222661  0.234946  0.350836  0.424360   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   0.369570  0.224174  0.236694  0.353541  0.425438   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  0.372695  0.226051  0.238848  0.356386  0.426486   \n",
       "\n",
       "loci                       ...                HLA-DQ                        \\\n",
       "genotype                   ... HLA-DQA10505-DQB10309 HLA-DQA10505-DQB10319   \n",
       "Peptide                    ...                                               \n",
       "AAAYYVGYLQPRT              ...              0.249702              0.249702   \n",
       "AAAYYVGYLQPRTF             ...              0.333322              0.333322   \n",
       "AAAYYVGYLQPRTFL            ...              0.383470              0.383470   \n",
       "AAAYYVGYLQPRTFLL           ...              0.374134              0.374134   \n",
       "AAAYYVGYLQPRTFLLK          ...              0.354784              0.354784   \n",
       "...                        ...                   ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM      ...              0.285683              0.285683   \n",
       "YYVWKSYVHVVDGCNSSTCMMC     ...              0.292342              0.292342   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY    ...              0.306004              0.306004   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK   ...              0.319624              0.319624   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR  ...              0.333877              0.333877   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10402 HLA-DQA10505-DQB10501   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.297920              0.264890   \n",
       "AAAYYVGYLQPRTF                         0.435867              0.339701   \n",
       "AAAYYVGYLQPRTFL                        0.512765              0.384206   \n",
       "AAAYYVGYLQPRTFLL                       0.490339              0.372928   \n",
       "AAAYYVGYLQPRTFLLK                      0.482520              0.379043   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.307903              0.269684   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.311268              0.270227   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.318901              0.272231   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.327946              0.275950   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.336319              0.279893   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10505-DQB10502 HLA-DQA10506-DQB10303   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.205538              0.227213   \n",
       "AAAYYVGYLQPRTF                         0.268764              0.341743   \n",
       "AAAYYVGYLQPRTFL                        0.308803              0.387634   \n",
       "AAAYYVGYLQPRTFLL                       0.299992              0.376569   \n",
       "AAAYYVGYLQPRTFLLK                      0.295066              0.355889   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.211645              0.291034   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.212058              0.295712   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.215326              0.307148   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.218148              0.319855   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.219695              0.328877   \n",
       "\n",
       "loci                                                                   \\\n",
       "genotype                  HLA-DQA10508-DQB10301 HLA-DQA10509-DQB10301   \n",
       "Peptide                                                                 \n",
       "AAAYYVGYLQPRT                          0.249702              0.249702   \n",
       "AAAYYVGYLQPRTF                         0.333322              0.333322   \n",
       "AAAYYVGYLQPRTFL                        0.383470              0.383470   \n",
       "AAAYYVGYLQPRTFLL                       0.374134              0.374134   \n",
       "AAAYYVGYLQPRTFLLK                      0.354784              0.354784   \n",
       "...                                         ...                   ...   \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.285683              0.285683   \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.292342              0.292342   \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.306004              0.306004   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.319624              0.319624   \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.333877              0.333877   \n",
       "\n",
       "loci                                                     \n",
       "genotype                  HLA-DQA10601-DQB10301 unknown  \n",
       "Peptide                                                  \n",
       "AAAYYVGYLQPRT                          0.219885     0.0  \n",
       "AAAYYVGYLQPRTF                         0.294761     0.0  \n",
       "AAAYYVGYLQPRTFL                        0.326042     0.0  \n",
       "AAAYYVGYLQPRTFLL                       0.327447     0.0  \n",
       "AAAYYVGYLQPRTFLLK                      0.307027     0.0  \n",
       "...                                         ...     ...  \n",
       "YYVWKSYVHVVDGCNSSTCMM                  0.277248     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMC                 0.281802     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCY                0.296308     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYK               0.306854     0.0  \n",
       "YYVWKSYVHVVDGCNSSTCMMCYKR              0.317379     0.0  \n",
       "\n",
       "[125593 rows x 283 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "netmhc40_aff_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
