{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "\n",
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "sys.path.insert(0, os.getcwd())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = [\n",
    "    \"#000000\",\n",
    "    \"#E69F00\",\n",
    "    \"#56B4E9\",\n",
    "    \"#009E73\",\n",
    "    \"#FB6467FF\",\n",
    "    \"#808282\",\n",
    "    \"#F0E442\",\n",
    "    \"#440154FF\",\n",
    "    \"#0072B2\",\n",
    "    \"#D55E00\",\n",
    "    \"#CC79A7\",\n",
    "    \"#C2CD23\",\n",
    "    \"#918BC3\",\n",
    "    \"#FFFFFF\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ROOT_DIR = Path(\"/path/to/a/folder/containing/another/folder/named/labels/with/all/the/labels/of/all/synthetic/data/\")\n",
    "LABELS_DIR = ROOT_DIR / \"labels\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_frames_list = []\n",
    "\n",
    "for file in os.listdir(LABELS_DIR):\n",
    "    if \".svg\" in file:\n",
    "        continue\n",
    "    data_frames_list.append(pd.read_csv(LABELS_DIR / file))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "every_label = pd.concat(data_frames_list)\n",
    "every_label.drop(columns=[\"OldTarget\"], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sub0_plus_07_mask = np.abs(every_label.loc[:, \"Subgraph0\"] - 0.70710678) < 0.00001\n",
    "sub0_minus_07_mask = np.abs(every_label.loc[:, \"Subgraph0\"] + 0.70710678) < 0.00001\n",
    "\n",
    "sub1_plus_07_mask = np.abs(every_label.loc[:, \"Subgraph1\"] - 0.70710678) < 0.00001\n",
    "sub1_minus_07_mask = np.abs(every_label.loc[:, \"Subgraph1\"] + 0.70710678) < 0.00001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "l1 = every_label[\n",
    "    (sub1_minus_07_mask | sub1_plus_07_mask) & (sub0_plus_07_mask | sub0_minus_07_mask)\n",
    "].shape[0]\n",
    "\n",
    "l2 = every_label[(sub1_minus_07_mask | sub1_plus_07_mask)].shape[0]\n",
    "\n",
    "assert l1 == l2\n",
    "\n",
    "every_label = every_label[\n",
    "    (sub1_minus_07_mask | sub1_plus_07_mask) & (sub0_plus_07_mask | sub0_minus_07_mask)\n",
    "]  # According to our theory jus tone should do the trick!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pos_or_neg = lambda x, y: \"Pos\"+str(y) if np.abs(x - 0.70710678) < 0.00001 else \"Neg\"+str(y)\n",
    "\n",
    "every_label[\"ID0\"] = every_label[\"Subgraph0\"].apply(pos_or_neg, args=(0,))\n",
    "every_label[\"ID1\"] = every_label[\"Subgraph1\"].apply(pos_or_neg, args=(1,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "every_label.drop(columns=[\"Subgraph0\", \"Subgraph1\"], axis=1, inplace=True)\n",
    "every_label.reset_index(inplace=True, drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "every_label = every_label[~(every_label.iloc[:, 1:7] == 0).all(axis=1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "every_label_melted = pd.melt(\n",
    "    every_label,\n",
    "    id_vars=[\"GraphName\", \"ID0\", \"ID1\"],\n",
    "    value_vars=[\n",
    "        \"Subgraph2\",\n",
    "        \"Subgraph3\",\n",
    "        \"Subgraph4\",\n",
    "        \"Subgraph5\",\n",
    "        \"Subgraph6\",\n",
    "        \"Subgraph7\",\n",
    "    ],\n",
    "    var_name=\"Subgraph Type\",\n",
    "    value_name=\"Z-Score\",\n",
    ")\n",
    "\n",
    "every_label_melted_ready = pd.melt(\n",
    "    every_label_melted,\n",
    "    id_vars=[\"GraphName\", \"Subgraph Type\", \"Z-Score\"],\n",
    "    value_vars=[\"ID0\", \"ID1\"],\n",
    "    var_name=\"ID\",\n",
    "    value_name=\"ID Size3\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "subgraph_name = {\n",
    "    \"Subgraph2\": \"4-path\",\n",
    "    \"Subgraph3\": \"4-cycle\",\n",
    "    \"Subgraph4\": \"4-star\",\n",
    "    \"Subgraph5\": \"tri-pan\",\n",
    "    \"Subgraph6\": \"bi-fan\",\n",
    "    \"Subgraph7\": \"4-clique\",\n",
    "}\n",
    "\n",
    "id_name = {\n",
    "    \"Neg0\": \"3-path - SP\",\n",
    "    \"Pos0\": \"3-path + SP\",\n",
    "    \"Neg1\": \"3-clique - SP\",\n",
    "    \"Pos1\": \"3-clique + SP\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "every_label_melted_ready[\"Subgraph Type\"] = every_label_melted_ready[\n",
    "    \"Subgraph Type\"\n",
    "].apply(lambda x: subgraph_name[x])\n",
    "\n",
    "every_label_melted_ready[\"ID Size3\"] = every_label_melted_ready[\n",
    "    \"ID Size3\"\n",
    "].apply(lambda x: id_name[x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set_context(\"paper\", font_scale=1.1)\n",
    "\n",
    "g = sns.FacetGrid(\n",
    "    every_label_melted_ready[\n",
    "        (every_label_melted_ready[\"ID Size3\"] == \"3-path + SP\")\n",
    "        | (every_label_melted_ready[\"ID Size3\"] == \"3-path - SP\")\n",
    "    ],\n",
    "    col=\"Subgraph Type\",\n",
    "    col_wrap=3,\n",
    "    hue=\"ID Size3\",\n",
    "    palette=p[1:],\n",
    "    sharey=False,\n",
    "    aspect=19 / 11,\n",
    ")\n",
    "g.map_dataframe(sns.histplot, \"Z-Score\", alpha=0.5, linewidth=0)\n",
    "g.add_legend()\n",
    "g.set_titles('{col_name}')\n",
    "g.tight_layout()\n",
    "\n",
    "plt.savefig(ROOT_DIR/'size3_vs_subgraph0.pdf', dpi=1200, bbox_inches = 'tight')\n",
    "plt.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set_context(\"paper\", font_scale=1.1)\n",
    "\n",
    "g = sns.FacetGrid(\n",
    "    every_label_melted_ready[\n",
    "        (every_label_melted_ready[\"ID Size3\"] == \"3-clique + SP\")\n",
    "        | (every_label_melted_ready[\"ID Size3\"] == \"3-clique - SP\")\n",
    "    ],\n",
    "    col=\"Subgraph Type\",\n",
    "    col_wrap=3,\n",
    "    hue=\"ID Size3\",\n",
    "    palette=p[1:],\n",
    "    sharey=False,\n",
    "    aspect=19 / 11,\n",
    ")\n",
    "g.map_dataframe(sns.histplot, \"Z-Score\", alpha=0.5, linewidth=0)\n",
    "g.add_legend()\n",
    "g.set_titles('{col_name}')\n",
    "g.tight_layout()\n",
    "\n",
    "plt.savefig(ROOT_DIR/'size3_vs_subgraph1.pdf', dpi=1200, bbox_inches = 'tight')\n",
    "plt.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "zs = every_label_melted_ready[\n",
    "    ((every_label_melted_ready[\"ID Size3\"] == \"3-path + SP\"))\n",
    "    & (every_label_melted_ready[\"Subgraph Type\"] == \"bi-fan\")\n",
    "][\"Z-Score\"].to_numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.where(np.abs(zs) - 0.1 <= 0)[0].shape[0]/zs.shape[0]"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
