{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_dir = \"../graphconformal_sweep/\"\n",
    "datasets = os.listdir(results_dir)\n",
    "sns.set(font_scale=1.4, style=\"white\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Split "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_results = []\n",
    "for dataset in datasets:\n",
    "    for train_frac in [0.2, 0.3]:\n",
    "        for valid_frac in [0.2, 0.1]:\n",
    "            for alpha in [0.1, 0.2, 0.3, 0.4]:\n",
    "                split_dir = os.path.join(results_dir, dataset, \"split\", f\"{train_frac}_{valid_frac}\", f\"alpha_{alpha}\")\n",
    "                methods = os.listdir(split_dir)\n",
    "                for method in methods:\n",
    "                    if \"_params\" not in method:\n",
    "                        method_name = method.split(\".\")[0]\n",
    "                        result_file = os.path.join(split_dir, method)\n",
    "                        results = pd.read_csv(result_file)\n",
    "                        results[\"dataset\"] = dataset\n",
    "                        results[\"train_frac\"] = train_frac\n",
    "                        results[\"valid_frac\"] = valid_frac\n",
    "                        results[\"alpha\"] = alpha\n",
    "                        results[\"method\"] = method_name\n",
    "                        all_results.append(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_results = []\n",
    "for dataset in datasets:\n",
    "    for nspc in [10, 20, 40, 80]:\n",
    "        for alpha in [0.1, 0.2, 0.3, 0.4]:\n",
    "            split_dir = os.path.join(results_dir, dataset, \"nspc\", f\"{nspc}\", f\"alpha_{alpha}\")\n",
    "            if not os.path.exists(split_dir):\n",
    "                continue\n",
    "            methods = os.listdir(split_dir)\n",
    "            for method in methods:\n",
    "                if \"_params\" not in method:\n",
    "                    method_name = method.split(\".\")[0]\n",
    "                    result_file = os.path.join(split_dir, method)\n",
    "                    results = pd.read_csv(result_file)\n",
    "                    results[\"dataset\"] = dataset\n",
    "                    results[\"nspc\"] = nspc\n",
    "                    results[\"alpha\"] = alpha\n",
    "                    results[\"method\"] = method_name\n",
    "                    all_results.append(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df = pd.concat(all_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df.method.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "basic_methods = [\"tps\", \"tps_classwise\", \"aps_randomized\", \"daps\", \"raps\", \"naps_uniform\", \"naps_hyperbolic\", \"naps_exponential\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# small datasets\n",
    "small_datasets = [\"CiteSeer\", \"Amazon_Photos\"]\n",
    "small_df = full_df[full_df[\"dataset\"].isin(small_datasets)]\n",
    "small_df = small_df[small_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "ax = sns.relplot(data=small_df, x=\"alpha\", y=\"efficiency\", hue=\"method\", col=\"dataset\", kind=\"line\")\n",
    "ax.savefig(\"figures/small_datasets_efficiency.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Medium Datasets 1\n",
    "med_1_datasets = [\"Cora\", \"PubMed\"]\n",
    "med_1_df = full_df[full_df[\"dataset\"].isin(med_1_datasets)]\n",
    "med_1_df = med_1_df[med_1_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "ax = sns.relplot(data=med_1_df, x=\"alpha\", y=\"efficiency\", hue=\"method\", col=\"dataset\", kind=\"line\", ax=ax)\n",
    "ax.savefig(\"figures/med_1_datasets_efficiency.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Medium Datasets 2\n",
    "med_2_datasets = [\"Coauthor_CS\", \"Coauthor_Physics\"]\n",
    "med_2_df = full_df[full_df[\"dataset\"].isin(med_2_datasets)]\n",
    "med_2_df = med_2_df[med_2_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "ax = sns.relplot(data=med_2_df, x=\"alpha\", y=\"efficiency\", hue=\"method\", col=\"dataset\", kind=\"line\", ax=ax)\n",
    "ax.savefig(\"figures/med_2_datasets_efficiency.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Large Datasets 1\n",
    "large_datasets = [\"Flickr\"]\n",
    "large_df = full_df[full_df[\"dataset\"].isin(large_datasets)]\n",
    "large_df = large_df[large_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "ax = sns.relplot(data=large_df, x=\"alpha\", y=\"efficiency\", hue=\"method\", col=\"dataset\", kind=\"line\")#, ax=ax)\n",
    "ax.savefig(\"figures/large_datasets_nspc_efficiency.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# OGB Datasets\n",
    "ogb_datasets = [\"ogbn-arxiv\"]#, \"ogbn-products\"]\n",
    "ogb_df = full_df[full_df[\"dataset\"].isin(ogb_datasets)]\n",
    "ogb_df = ogb_df[ogb_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "ax = sns.relplot(data=ogb_df, x=\"alpha\", y=\"efficiency\", hue=\"method\", col=\"dataset\", kind=\"line\")#, ax=ax)\n",
    "ax.savefig(\"figures/ogb_datasets_nspc_efficiency.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "small_datasets = [\"CiteSeer\"]#, \"Amazon_Photos\"]\n",
    "methods = [\"tps\", \"tps_classwise\", \"aps_randomized\", \"raps\", \"naps\"]\n",
    "small_df = full_df[full_df[\"dataset\"].isin(small_datasets)]\n",
    "small_df = small_df[small_df[\"method\"].isin(methods)]\n",
    "\n",
    "#small_df[\"split\"] = small_df[\"train_frac\"].astype(str) + \"_\" + small_df[\"valid_frac\"].astype(str)\n",
    "\n",
    "#sns.kdeplot(small_df[(small_df.split == \"0.2_0.1\") & (small_df.alpha==0.1)], x=\"coverage\", y=\"label_stratified_coverage\", hue=\"method\", kind=\"kde\")\n",
    "\n",
    "#sns.kdeplot(small_df[(small_df.split == \"0.2_0.1\") & (small_df.alpha==0.1)], x=\"coverage\", y=\"size_stratified_coverage\", hue=\"method\", kind=\"kde\")\n",
    "# plt.figure()\n",
    "# sns.kdeplot(small_df[(small_df.split == \"0.2_0.1\") & (small_df.alpha==0.1)], x=\"coverage\", hue=\"method\")\n",
    "sns.set(rc={\"figure.figsize\": (14, 12)}, font_scale=1.3, style=\"white\")\n",
    "ax = sns.catplot(small_df[(small_df.alpha==0.1)], x=\"label_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "ax.savefig(\"figures/split/citeseer_label_stratified_coverage.pdf\")\n",
    "\n",
    "\n",
    "ax = sns.catplot(small_df[(small_df.alpha==0.1)], x=\"size_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "ax.savefig(\"figures/split/citeseer_size_stratified_coverage.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "small_datasets = [\"Cora\"]#, \"Amazon_Photos\"]\n",
    "methods = [\"tps\", \"tps_classwise\", \"aps_randomized\", \"raps\", \"naps\"]\n",
    "small_df = full_df[full_df[\"dataset\"].isin(small_datasets)]\n",
    "small_df = small_df[small_df[\"method\"].isin(methods)]\n",
    "\n",
    "small_df[\"split\"] = small_df[\"train_frac\"].astype(str) + \"_\" + small_df[\"valid_frac\"].astype(str)\n",
    "\n",
    "#sns.kdeplot(small_df[(small_df.split == \"0.2_0.1\") & (small_df.alpha==0.1)], x=\"coverage\", y=\"label_stratified_coverage\", hue=\"method\", kind=\"kde\")\n",
    "\n",
    "#sns.kdeplot(small_df[(small_df.split == \"0.2_0.1\") & (small_df.alpha==0.1)], x=\"coverage\", y=\"size_stratified_coverage\", hue=\"method\", kind=\"kde\")\n",
    "# plt.figure()\n",
    "# sns.kdeplot(small_df[(small_df.split == \"0.2_0.1\") & (small_df.alpha==0.1)], x=\"coverage\", hue=\"method\")\n",
    "sns.set(rc={\"figure.figsize\": (14, 12)}, font_scale=1.3, style=\"white\")\n",
    "ax = sns.catplot(small_df[(small_df.alpha==0.1)], x=\"label_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "ax.savefig(\"figures/split/cora_label_stratified_coverage.pdf\")\n",
    "\n",
    "#plt.figure()\n",
    "ax = sns.catplot(small_df[(small_df.alpha==0.1)], x=\"size_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "ax.savefig(\"figures/split/cora_size_stratified_coverage.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## n samples per class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_dir = \"../graphconformal_sweep/\"\n",
    "datasets = os.listdir(results_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_nspc_results = []\n",
    "for dataset in datasets:\n",
    "    for nspc in [10, 20, 40, 80]:\n",
    "        for alpha in [0.1, 0.2, 0.3, 0.4]:\n",
    "            split_dir = os.path.join(results_dir, dataset, \"nspc\", f\"{nspc}\", f\"alpha_{alpha}\")\n",
    "            if not os.path.exists(split_dir):\n",
    "                continue\n",
    "            methods = os.listdir(split_dir)\n",
    "            for method in methods:\n",
    "                if \"_params\" not in method:\n",
    "                    method_name = method.split(\".\")[0]\n",
    "                    result_file = os.path.join(split_dir, method)\n",
    "                    results = pd.read_csv(result_file)\n",
    "                    results[\"dataset\"] = dataset\n",
    "                    results[\"nspc\"] = nspc\n",
    "                    results[\"alpha\"] = alpha\n",
    "                    results[\"method\"] = method_name\n",
    "                    all_nspc_results.append(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ns_df = pd.concat(all_nspc_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "small_datasets = [\"Amazon_Photos\"]#, \"Amazon_Photos\"]\n",
    "# methods = [\"tps\", \"tps_classwise\", \"aps_randomized\", \"raps\", \"naps_uniform\"]\n",
    "small_df = ns_df[ns_df[\"dataset\"].isin(small_datasets)]\n",
    "small_df = small_df[small_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "#fig, ax = plt.subplots((2, 2))\n",
    "for nspc in [10, 20, 40]:\n",
    "    tdf = small_df[(small_df.nspc == nspc)]\n",
    "    plt.figure()\n",
    "    sns.catplot(tdf[(tdf.alpha==0.1)], x=\"label_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "    plt.savefig(f\"figures/nspc/amazon_photos_{nspc}_label_stratified_coverage.pdf\")\n",
    "    plt.figure()\n",
    "    sns.catplot(tdf[(tdf.alpha==0.1)], x=\"size_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "    plt.savefig(f\"figures/nspc/amazon_photos_{nspc}_size_stratified_coverage.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "large_datasets = [\"Flickr\"]\n",
    "# methods = [\"tps\", \"tps_classwise\", \"aps_randomized\", \"raps\", \"naps_uniform\"]\n",
    "large_df = ns_df[ns_df[\"dataset\"].isin(large_datasets)]\n",
    "large_df = large_df[large_df[\"method\"].isin(basic_methods)]\n",
    "\n",
    "#fig, ax = plt.subplots((2, 2))\n",
    "for nspc in [10, 20, 40, 80]:\n",
    "    tdf = large_df[(large_df.nspc == nspc)]\n",
    "    plt.figure()\n",
    "    sns.catplot(tdf[(tdf.alpha==0.1)], x=\"label_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "    plt.savefig(f\"figures/nspc/amazon_photos_{nspc}_label_stratified_coverage.pdf\")\n",
    "    plt.figure()\n",
    "    sns.catplot(tdf[(tdf.alpha==0.1)], x=\"size_stratified_coverage\", y=\"method\", kind=\"box\")\n",
    "    plt.savefig(f\"figures/nspc/amazon_photos_{nspc}_size_stratified_coverage.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## APS randomized vs non randomized "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set(rc={'figure.figsize': (15,10)}, font_scale=1, style=\"white\")\n",
    "aps_df = full_df[full_df.method.isin([\"aps\", \"aps_randomized\"])]\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(aps_df[aps_df.dataset != \"Cora\"], x=\"efficiency\", y=\"dataset\", hue=\"method\", split=True, kind=\"violin\", gap=.1, inner=\"quart\")\n",
    "plt.savefig(\"figures/split/aps_randomized_efficiency.pdf\")\n",
    "plt.figure()\n",
    "sns.catplot(aps_df[aps_df.dataset == \"Cora\"], x=\"efficiency\", y=\"dataset\", hue=\"method\", split=True, kind=\"violin\", gap=.1, inner=\"quart\")\n",
    "plt.savefig(\"figures/split/aps_randomized_efficiency_cora.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "aps_df = ns_df[ns_df.method.isin([\"aps\", \"aps_randomized\"])]\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(aps_df[aps_df.dataset != \"Cora\"], x=\"efficiency\", y=\"dataset\", hue=\"method\", split=True, kind=\"violin\", gap=.1, inner=\"quart\")\n",
    "plt.savefig(\"figures/nspc/aps_randomized_efficiency.pdf\")\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(aps_df[aps_df.dataset == \"Cora\"], x=\"efficiency\", y=\"dataset\", hue=\"method\", split=True, kind=\"violin\", gap=.1, inner=\"quart\")\n",
    "plt.savefig(\"figures/nspc/aps_randomized_efficiency_cora.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DAPS vs DTPS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set(rc={'figure.figsize': (15,10)}, font_scale=1.5, style=\"white\")\n",
    "methods = [\"daps\", \"dtps\"]\n",
    "dps_df = full_df[(full_df.method.isin(methods)) & (full_df.alpha == 0.1)]\n",
    "#dps_df.groupby([\"dataset\", \"method\"])[[\"efficiency\", \"coverage\", \"label_stratified_coverage\", \"size_stratified_coverage\"]].mean().reset_index()\n",
    "melted = dps_df.melt(id_vars=[\"dataset\", \"method\"], var_name=\"metric\", value_name=\"value\", value_vars=[\"efficiency\", \"coverage\", \"label_stratified_coverage\", \"size_stratified_coverage\"])\n",
    "plt.tight_layout()\n",
    "\n",
    "plt.figure()\n",
    "sns.barplot(data=melted[melted.dataset == \"PubMed\"], x=\"value\", y=\"metric\", hue=\"method\")\n",
    "plt.savefig(\"figures/split/daps_dtps_pubmed.pdf\", bbox_inches = \"tight\")\n",
    "\n",
    "plt.figure()\n",
    "sns.barplot(data=melted[melted.dataset == \"Cora\"], x=\"value\", y=\"metric\", hue=\"method\")\n",
    "plt.savefig(\"figures/split/daps_dtps_cora.pdf\", bbox_inches = \"tight\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set(rc={'figure.figsize': (15,10)}, font_scale=1, style=\"white\")\n",
    "\n",
    "methods = [\"daps\", \"dtps\"]\n",
    "dps_df = ns_df[(ns_df.method.isin(methods)) & (ns_df.alpha == 0.1)]\n",
    "#dps_df.groupby([\"dataset\", \"method\"])[[\"efficiency\", \"coverage\", \"label_stratified_coverage\", \"size_stratified_coverage\"]].mean().reset_index()\n",
    "melted = dps_df.melt(id_vars=[\"dataset\", \"method\"], var_name=\"metric\", value_name=\"value\", value_vars=[\"efficiency\", \"coverage\", \"label_stratified_coverage\", \"size_stratified_coverage\"])\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(data=melted, x=\"value\", y=\"metric\", hue=\"method\", kind=\"bar\")\n",
    "plt.savefig(\"figures/nspc/daps_dtps_0.1.pdf\")\n",
    "\n",
    "\n",
    "\n",
    "dps_df = ns_df[(ns_df.method.isin(methods)) & (ns_df.alpha == 0.2)]\n",
    "#dps_df.groupby([\"dataset\", \"method\"])[[\"efficiency\", \"coverage\", \"label_stratified_coverage\", \"size_stratified_coverage\"]].mean().reset_index()\n",
    "melted = dps_df.melt(id_vars=[\"dataset\", \"method\"], var_name=\"metric\", value_name=\"value\", value_vars=[\"efficiency\", \"coverage\", \"label_stratified_coverage\", \"size_stratified_coverage\"])\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(data=melted, x=\"value\", y=\"metric\", hue=\"method\", kind=\"bar\")\n",
    "plt.savefig(\"figures/nspc/daps_dtps_0.2.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CFGNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df.method.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set(rc={'figure.figsize': (18,10)}, font_scale=1, style=\"white\")\n",
    "\n",
    "full_cf = full_df[full_df.method.isin([\"cfgnn_aps\", \"cfgnn_orig\"])]#, \"aps\"])]\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(data=full_cf, x=\"efficiency\", y=\"dataset\", hue=\"method\", kind=\"bar\")\n",
    "plt.savefig(\"figures/split/cfgnn_aps_vs_orig_efficiency.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_cf = ns_df[ns_df.method.isin([\"cfgnn_aps\", \"cfgnn_orig\"])]#, \"aps\"])]\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(data=n_cf[n_cf.nspc == 10], x=\"efficiency\", y=\"dataset\", hue=\"method\", kind=\"bar\")\n",
    "plt.savefig(\"figures/nspc/cfgnn_aps_vs_orig_efficiency_10.pdf\")\n",
    "\n",
    "plt.figure()\n",
    "sns.catplot(data=n_cf[n_cf.nspc == 20], x=\"efficiency\", y=\"dataset\", hue=\"method\", kind=\"bar\")\n",
    "plt.savefig(\"figures/nspc/cfgnn_aps_vs_orig_efficiency_20.pdf\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
