{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext jupyter_spaces"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deeprobust.graph.data import Dataset\n",
    "from hrdataset import CustomDataset\n",
    "import pandas\n",
    "import numpy as np\n",
    "import scipy.sparse\n",
    "import networkx as nx\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from collections import defaultdict\n",
    "import signac\n",
    "import pickle\n",
    "import itertools\n",
    "import copy\n",
    "\n",
    "from jupyter_spaces import get_spaces\n",
    "import jupyter_spaces\n",
    "from scipy.special import softmax\n",
    "import warnings\n",
    "import itertools\n",
    "import plotly\n",
    "import plotly.graph_objects as go\n",
    "import plotly.express as px\n",
    "from plotly.subplots import make_subplots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "project = signac.get_project(\"../../\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_expRun = pandas.read_csv(\"metattack-adj-only.csv\", index_col=0, keep_default_na=False, na_values=[\"\"])\n",
    "df_expRun = df_expRun.melt(id_vars=[col for col in df_expRun.columns if col not in {\"evasionJobID\", \"poisonJobID\"}],\n",
    "                           var_name = 'attackIDType',\n",
    "                           value_name = 'attackID')\n",
    "df_expRun = df_expRun.drop('Attack Phase', 1)\n",
    "\n",
    "na_mask = (df_expRun['attackID'] == 'N/A')\n",
    "df_expRun = df_expRun[~na_mask]\n",
    "\n",
    "incomplete_mask = (df_expRun.attackID.isnull() | df_expRun.cleanJobID.isnull())\n",
    "if incomplete_mask.sum() > 0:\n",
    "    warnings.warn(f\"{incomplete_mask.sum()} experiments are incomplete!\")\n",
    "df_expRun_Original = copy.deepcopy(df_expRun)\n",
    "df_expRun = df_expRun.loc[~incomplete_mask]\n",
    "\n",
    "df_expRun_evasion = df_expRun[df_expRun['attackIDType'] == 'evasionJobID']\n",
    "df_expRun_poison = df_expRun[df_expRun['attackIDType'] == 'poisonJobID']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def check_correctness(prediction: np.array, mask=None, data=None):\n",
    "    pred_class = prediction.argmax(1)\n",
    "    if mask is not None:\n",
    "        return pred_class[mask] == data.labels[mask]\n",
    "    else:\n",
    "        return pred_class == data.labels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "HETERO_DATASETS = ['fb100', 'twitch-tw', 'snap-patent-downsampled']\n",
    "HOMO_DATASETES = ['citeseer', 'cora']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evasion (Post-training Attack)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%space `metattack-evasion`\n",
    "df_subtask = df_expRun_evasion\n",
    "perturbDataDict = dict()\n",
    "defenseTableDict = dict()\n",
    "lst_dict_result = list()\n",
    "\n",
    "for tid, tdata in df_subtask.iterrows():\n",
    "    \n",
    "    if tdata.perturbJobID not in perturbDataDict:\n",
    "        perturbJob = project.open_job(id=tdata.perturbJobID)\n",
    "        with perturbJob:\n",
    "            with open(\"perturbDict.pkl\", \"rb\") as dataFile:\n",
    "                dict_pertubation = pickle.load(dataFile)\n",
    "            datasetName_ = perturbJob.sp['datasetName']\n",
    "            if datasetName_ in HETERO_DATASETS:\n",
    "                with open(f\"../../datasets/data/{datasetName_}.pkl\", \"rb\") as dataFile:\n",
    "                    dataset = pickle.load(dataFile)\n",
    "                    print(dataset)\n",
    "            else:\n",
    "                with open(\"data.pkl\", \"rb\") as dataFile:\n",
    "                    dataset = pickle.load(dataFile)\n",
    "                    print(dataset)\n",
    "        perturbDataDict[tdata.perturbJobID] = dict(\n",
    "            dict_pertubation=dict_pertubation,\n",
    "            dataset=dataset\n",
    "        )\n",
    "    else:\n",
    "        dict_pertubation = perturbDataDict[tdata.perturbJobID][\"dict_pertubation\"]\n",
    "        dataset = perturbDataDict[tdata.perturbJobID][\"dataset\"]\n",
    "\n",
    "    job = project.open_job(id=tdata.attackID)\n",
    "    assert job.sp.use_runner\n",
    "    with job:\n",
    "        with open(f\"resultTable.csv\", \"r\") as f:\n",
    "            resultTableAttack = pandas.read_csv(f, index_col=0)\n",
    "    perturb_name = tdata.perturb_prefix\n",
    "    DEFENSE_MODEL = f\"{tdata.model}_p\"\n",
    "    defenseModelType = tdata.model\n",
    "    \n",
    "    clean_job = project.open_job(id=tdata.cleanJobID)\n",
    "    with clean_job.data.open(mode=\"r\"):\n",
    "        dict_prediction_clean = {key: np.array(val) for key, val in clean_job.data.predictionDict.items()}\n",
    "    prediction_result_clean = softmax(np.array(dict_prediction_clean[f\"f:{DEFENSE_MODEL}@clean\"]), axis=1)\n",
    "    \n",
    "    dict_cur_result = resultTableAttack.iloc[0].to_dict()\n",
    "    dict_cur_result = {**tdata.to_dict(), **dict_cur_result}\n",
    "    dict_cur_result['train_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_train, dataset).mean()\n",
    "    dict_cur_result['val_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_val, dataset).mean()\n",
    "    dict_cur_result['test_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_test, dataset).mean()\n",
    "    lst_dict_result += [dict_cur_result]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%space `metattack-evasion`\n",
    "pd_result_full = pandas.DataFrame.from_dict(lst_dict_result).drop(['AttackSession', \n",
    "                                                                   'perturb_prefix', \n",
    "                                                                   'cleanJobID',\n",
    "                                                                   'attackIDType',\n",
    "                                                                   'attackID',\n",
    "                                                                   'attack_type'], axis=1)\n",
    "pd_result_full['model_with_arg'] = pd_result_full[\"model\"] + \":\" + pd_result_full[\"model_arg\"].fillna(\"\")\n",
    "pivot_index = [\"model_with_arg\", \"perturbJobID\"]\n",
    "defensePivotDict = dict()\n",
    "for cur_dataset in np.unique(pd_result_full['DATASET'].values):\n",
    "    for cur_ptb_ratio in np.unique(pd_result_full['ptb_ratio'].values):\n",
    "        h = f\"{cur_dataset}@{cur_ptb_ratio}\"\n",
    "        cur_mask = (pd_result_full['DATASET'] == cur_dataset) & (pd_result_full['ptb_ratio'] == cur_ptb_ratio)\n",
    "        pd_cur_masked = pd_result_full.loc[cur_mask, ~pd_result_full.columns.isin(['model', 'model_arg', 'gcnsvd_k'])]\n",
    "        pd_cur_masked.rename(columns={\"train_acc\": \"train_acc_attack\",\n",
    "                              \"val_acc\": \"val_acc_attack\",\n",
    "                              \"test_acc\": \"test_acc_attack\"},inplace=True)\n",
    "        \n",
    "        pd_cur_masked['test_acc_abs_delta'] = pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']\n",
    "        pd_cur_masked['test_acc_rel_delta'] = (pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']) / pd_cur_masked['test_acc_clean'] \n",
    "        defensePivot = pd_cur_masked.pivot_table(values=['test_acc_clean', 'test_acc_attack', \n",
    "                                                         'test_acc_abs_delta', 'test_acc_rel_delta'],\n",
    "                                                 index=pivot_index, \n",
    "                                                 aggfunc={'test_acc_clean': [np.mean], \n",
    "                                                          'test_acc_attack': [np.mean],\n",
    "                                                          'test_acc_abs_delta': [np.mean],\n",
    "                                                          'test_acc_rel_delta': [np.mean]})\n",
    "        for key2 in defensePivot.index.levels[0]:\n",
    "\n",
    "            defensePivot.loc[(key2, 'subtotal_mean'), :] = defensePivot.loc[key2].mean(axis=0)\n",
    "            defensePivot.loc[(key2, 'subtotal_std'), :] = defensePivot.loc[key2].std(axis=0)\n",
    "\n",
    "        defensePivot.sort_index(inplace=True)\n",
    "        defensePivotDict[h] = defensePivot.loc[pandas.IndexSlice[:, [\"subtotal_mean\", \"subtotal_std\"]], \n",
    "                                               [\"test_acc_clean\", \"test_acc_attack\", \n",
    "                                                \"test_acc_abs_delta\", \"test_acc_rel_delta\"]]\n",
    "        defensePivotDict[h].columns = defensePivotDict[h].columns.droplevel(1)\n",
    "        print(h)\n",
    "        defensePivotDict[h].style.format(dict(test_acc_clean=\"{:.2%}\", \n",
    "                                              test_acc_attack=\"{:.2%}\",\n",
    "                                              test_acc_abs_delta=\"{:.2%}\",\n",
    "                                              test_acc_rel_delta=\"{:.2%}\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Poison (Pre-training Attack) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%space `metattack-poison`\n",
    "df_subtask = df_expRun_poison\n",
    "perturbDataDict = dict()\n",
    "defenseTableDict = dict()\n",
    "lst_dict_result = list()\n",
    "\n",
    "for tid, tdata in df_subtask.iterrows():\n",
    "    \n",
    "    if tdata.perturbJobID not in perturbDataDict:\n",
    "        perturbJob = project.open_job(id=tdata.perturbJobID)\n",
    "        with perturbJob:\n",
    "            with open(\"perturbDict.pkl\", \"rb\") as dataFile:\n",
    "                dict_pertubation = pickle.load(dataFile)\n",
    "            datasetName_ = perturbJob.sp['datasetName']\n",
    "            if datasetName_ in HETERO_DATASETS:\n",
    "                with open(f\"../../datasets/data/{datasetName_}.pkl\", \"rb\") as dataFile:\n",
    "                    dataset = pickle.load(dataFile)\n",
    "                    print(dataset)\n",
    "            else:\n",
    "                with open(\"data.pkl\", \"rb\") as dataFile:\n",
    "                    dataset = pickle.load(dataFile)\n",
    "                    print(dataset)\n",
    "        perturbDataDict[tdata.perturbJobID] = dict(\n",
    "            dict_pertubation=dict_pertubation,\n",
    "            dataset=dataset\n",
    "        )\n",
    "    else:\n",
    "        dict_pertubation = perturbDataDict[tdata.perturbJobID][\"dict_pertubation\"]\n",
    "        dataset = perturbDataDict[tdata.perturbJobID][\"dataset\"]\n",
    "\n",
    "    job = project.open_job(id=tdata.attackID)\n",
    "    assert job.sp.use_runner\n",
    "    with job:\n",
    "        with open(f\"resultTable.csv\", \"r\") as f:\n",
    "            resultTableAttack = pandas.read_csv(f, index_col=0)\n",
    "    perturb_name = tdata.perturb_prefix\n",
    "    DEFENSE_MODEL = f\"{tdata.model}_p\"\n",
    "    defenseModelType = tdata.model\n",
    "    \n",
    "    clean_job = project.open_job(id=tdata.cleanJobID)\n",
    "    with clean_job.data.open(mode=\"r\"):\n",
    "        dict_prediction_clean = {key: np.array(val) for key, val in clean_job.data.predictionDict.items()}\n",
    "    prediction_result_clean = softmax(np.array(dict_prediction_clean[f\"f:{DEFENSE_MODEL}@clean\"]), axis=1)\n",
    "    \n",
    "    dict_cur_result = resultTableAttack.iloc[0].to_dict()\n",
    "    dict_cur_result = {**tdata.to_dict(), **dict_cur_result}\n",
    "    dict_cur_result['train_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_train, dataset).mean()\n",
    "    dict_cur_result['val_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_val, dataset).mean()\n",
    "    dict_cur_result['test_acc_clean'] = check_correctness(prediction_result_clean, dataset.idx_test, dataset).mean()\n",
    "    lst_dict_result += [dict_cur_result]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%space `metattack-poison`\n",
    "pd_result_full = pandas.DataFrame.from_dict(lst_dict_result).drop(['AttackSession', \n",
    "                                                                   'perturb_prefix', \n",
    "                                                                   'cleanJobID',\n",
    "                                                                   'attackIDType',\n",
    "                                                                   'attackID',\n",
    "                                                                   'attack_type'], axis=1)\n",
    "pd_result_full['model_with_arg'] = pd_result_full[\"model\"] + \":\" + pd_result_full[\"model_arg\"].fillna(\"\")\n",
    "pivot_index = [\"model_with_arg\", \"perturbJobID\"]\n",
    "defensePivotDict = dict()\n",
    "for cur_dataset in np.unique(pd_result_full['DATASET'].values):\n",
    "    for cur_ptb_ratio in np.unique(pd_result_full['ptb_ratio'].values):\n",
    "        h = f\"{cur_dataset}@{cur_ptb_ratio}\"\n",
    "        cur_mask = (pd_result_full['DATASET'] == cur_dataset) & (pd_result_full['ptb_ratio'] == cur_ptb_ratio)\n",
    "        pd_cur_masked = pd_result_full.loc[cur_mask, ~pd_result_full.columns.isin(['model', 'model_arg', 'gcnsvd_k'])]\n",
    "        pd_cur_masked.rename(columns={\"train_acc\": \"train_acc_attack\",\n",
    "                              \"val_acc\": \"val_acc_attack\",\n",
    "                              \"test_acc\": \"test_acc_attack\"},inplace=True)\n",
    "        \n",
    "        pd_cur_masked['test_acc_abs_delta'] = pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']\n",
    "        pd_cur_masked['test_acc_rel_delta'] = (pd_cur_masked['test_acc_attack'] - pd_cur_masked['test_acc_clean']) / pd_cur_masked['test_acc_clean'] \n",
    "        defensePivot = pd_cur_masked.pivot_table(values=['test_acc_clean', 'test_acc_attack', \n",
    "                                                         'test_acc_abs_delta', 'test_acc_rel_delta'],\n",
    "                                                 index=pivot_index, \n",
    "                                                 aggfunc={'test_acc_clean': [np.mean], \n",
    "                                                          'test_acc_attack': [np.mean],\n",
    "                                                          'test_acc_abs_delta': [np.mean],\n",
    "                                                          'test_acc_rel_delta': [np.mean]})\n",
    "        for key2 in defensePivot.index.levels[0]:\n",
    "\n",
    "            defensePivot.loc[(key2, 'subtotal_mean'), :] = defensePivot.loc[key2].mean(axis=0)\n",
    "            defensePivot.loc[(key2, 'subtotal_std'), :] = defensePivot.loc[key2].std(axis=0)\n",
    "\n",
    "        defensePivot.sort_index(inplace=True)\n",
    "        defensePivotDict[h] = defensePivot.loc[pandas.IndexSlice[:, [\"subtotal_mean\", \"subtotal_std\"]], \n",
    "                                               [\"test_acc_clean\", \"test_acc_attack\", \n",
    "                                                \"test_acc_abs_delta\", \"test_acc_rel_delta\"]]\n",
    "        defensePivotDict[h].columns = defensePivotDict[h].columns.droplevel(1)\n",
    "        print(h)\n",
    "        defensePivotDict[h].style.format(dict(test_acc_clean=\"{:.2%}\", \n",
    "                                              test_acc_attack=\"{:.2%}\",\n",
    "                                              test_acc_abs_delta=\"{:.2%}\",\n",
    "                                              test_acc_rel_delta=\"{:.2%}\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
