{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dc5fb14f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-08-04 14:13:04.821320: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9373] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-08-04 14:13:04.821396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-08-04 14:13:04.823165: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1534] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-08-04 14:13:04.831921: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import time\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "\n",
    "from src.datasets import load_dataset, preprocess_dataset, prefetch_dataset\n",
    "from src.pmi_estimators import train_critic_model, neural_pmi\n",
    "import src.utils as utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4c744e5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "cfg = { 'dataset' : 'mnist',\n",
    "       'model' : 'mlp',\n",
    "       'batch_size' : 512,\n",
    "       }\n",
    "\n",
    "# cfg = { 'dataset' : 'fashion_mnist',\n",
    "#         'model' : 'cnn',\n",
    "#         'batch_size' : 512,\n",
    "#         }  \n",
    "\n",
    "model_name = cfg['model']\n",
    "dataset_name = cfg['dataset']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2fe0f769",
   "metadata": {},
   "source": [
    "### Use Penultimate Layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77c61d1f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "critic_list = ['concat','separable']\n",
    "estimators_list = ['density_ratio_fitting', 'variational_f_js']\n",
    "\n",
    "for run in range(5):\n",
    "    print(f'Run: {run+1}')\n",
    "    for critic in critic_list:\n",
    "        for estimator in estimators_list:\n",
    "            tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow\n",
    "            exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pmi/{critic}_{estimator}'\n",
    "            if not os.path.exists(exp_name):\n",
    "                print(\"Making directory\", exp_name)\n",
    "                os.makedirs(exp_name)\n",
    "                \n",
    "            ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)\n",
    "            n_classes = ds_info.features['label'].num_classes\n",
    "            ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "            ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "            ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "            \n",
    "            model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/saved_models/trained_model.keras')\n",
    "            int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)\n",
    "\n",
    "            ##############################################################\n",
    "            #\n",
    "            # Train PMI Model\n",
    "            #\n",
    "            # #############################################################\n",
    "        \n",
    "            print(f'Training PMI model ({critic}, {estimator})...')\n",
    "            ds_activity_trn = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).cache().prefetch(tf.data.AUTOTUNE)\n",
    "            ds_activity_val = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).cache().prefetch(tf.data.AUTOTUNE)\n",
    "            train_critic_model(ds_activity_trn, ds_activity_val, critic=critic, estimator=estimator, epochs=200, save_path=f'{exp_name}/pmi_model')\n",
    "                                                                              \n",
    "            ##############################################################\n",
    "            #\n",
    "            # Compute PMI for all validation and test samples\n",
    "            #\n",
    "            # #############################################################\n",
    "\n",
    "            pmi_model = tf.keras.models.load_model(f'{exp_name}/pmi_model')\n",
    "            \n",
    "            print(f'Computing PMI for all validation samples and for all classes...')\n",
    "            pmi_class = []\n",
    "            for k in range(n_classes):\n",
    "                ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)\n",
    "                pmi_list = []\n",
    "                for (x_batch, y_batch) in ds_activity:\n",
    "                    pmi = neural_pmi(x_batch, y_batch, pmi_model, estimator=estimator)\n",
    "                    pmi_list += np.array(pmi).tolist()\n",
    "                pmi_class.append(pmi_list)\n",
    "            np.save(f'{exp_name}/pmi_class_val.npy', np.array(pmi_class).T)\n",
    "\n",
    "            print(f'Computing PMI for all test samples and for all classes...')\n",
    "            pmi_class = []\n",
    "            for k in range(n_classes):\n",
    "                ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)\n",
    "                pmi_list = []\n",
    "                for (x_batch, y_batch) in ds_activity:\n",
    "                    pmi = neural_pmi(x_batch, y_batch, pmi_model, estimator=estimator)\n",
    "                    pmi_list += np.array(pmi).tolist()\n",
    "                pmi_class.append(pmi_list)\n",
    "            np.save(f'{exp_name}/pmi_class_test.npy', np.array(pmi_class).T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0b2e31aa",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run: 1\n",
      "Estimator: concat_density_ratio_fitting\n",
      "AUROC: 58.738\n",
      "AUPRC (success): 98.136\n",
      "AUPRC (error): 2.927\n",
      "AURC: 19.317\n",
      "Estimator: concat_variational_f_js\n",
      "AUROC: 26.550\n",
      "AUPRC (success): 95.588\n",
      "AUPRC (error): 2.203\n",
      "AURC: 47.822\n",
      "Estimator: separable_density_ratio_fitting\n",
      "AUROC: 52.141\n",
      "AUPRC (success): 97.973\n",
      "AUPRC (error): 5.023\n",
      "AURC: 21.082\n",
      "Estimator: separable_variational_f_js\n",
      "AUROC: 43.106\n",
      "AUPRC (success): 96.319\n",
      "AUPRC (error): 9.822\n",
      "AURC: 40.015\n",
      "Run: 2\n",
      "Estimator: concat_density_ratio_fitting\n",
      "AUROC: 47.066\n",
      "AUPRC (success): 97.898\n",
      "AUPRC (error): 1.399\n",
      "AURC: 21.761\n",
      "Estimator: concat_variational_f_js\n",
      "AUROC: 24.999\n",
      "AUPRC (success): 95.536\n",
      "AUPRC (error): 1.967\n",
      "AURC: 48.093\n",
      "Estimator: separable_density_ratio_fitting\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 29\u001b[0m\n\u001b[1;32m     27\u001b[0m         pmi_class \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexp_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/pmi_class_test.npy\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     28\u001b[0m \u001b[38;5;66;03m#         pmi_class = np.array([utils.softmax(x) for x in pmi_class])\u001b[39;00m\n\u001b[0;32m---> 29\u001b[0m         pmi_class \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray([utils\u001b[38;5;241m.\u001b[39mnormalize(x) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m pmi_class])\n\u001b[1;32m     30\u001b[0m         pmi \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray([pmi_value[pred_value] \u001b[38;5;28;01mfor\u001b[39;00m pmi_value, pred_value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(pmi_class, pred_y)])\n\u001b[1;32m     32\u001b[0m         auroc \u001b[38;5;241m=\u001b[39m utils\u001b[38;5;241m.\u001b[39mcompute_auroc(true_label, pmi)\n",
      "Cell \u001b[0;32mIn[4], line 29\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     27\u001b[0m         pmi_class \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexp_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/pmi_class_test.npy\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     28\u001b[0m \u001b[38;5;66;03m#         pmi_class = np.array([utils.softmax(x) for x in pmi_class])\u001b[39;00m\n\u001b[0;32m---> 29\u001b[0m         pmi_class \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray([\u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnormalize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m pmi_class])\n\u001b[1;32m     30\u001b[0m         pmi \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray([pmi_value[pred_value] \u001b[38;5;28;01mfor\u001b[39;00m pmi_value, pred_value \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(pmi_class, pred_y)])\n\u001b[1;32m     32\u001b[0m         auroc \u001b[38;5;241m=\u001b[39m utils\u001b[38;5;241m.\u001b[39mcompute_auroc(true_label, pmi)\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "estimators_list = ['concat_density_ratio_fitting', 'concat_variational_f_js',\n",
    "                   'separable_density_ratio_fitting', 'separable_variational_f_js']\n",
    "\n",
    "all_auroc = []\n",
    "all_auprc_succ = []\n",
    "all_auprc_error = []\n",
    "all_aurc = []\n",
    "for run in range(5):\n",
    "    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow\n",
    "    print(f'Run: {run+1}')\n",
    "    auroc_list = []\n",
    "    auprc_succ_list = []\n",
    "    auprc_error_list = []\n",
    "    aurc_list = []\n",
    "    for estimator in estimators_list:\n",
    "        print(f'Estimator: {estimator}')\n",
    "        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pmi/{estimator}'\n",
    "\n",
    "        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)\n",
    "        n_classes = ds_info.features['label'].num_classes\n",
    "        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/saved_models/trained_model.keras')\n",
    "        \n",
    "        true_y = np.argmax([y for x,y in ds_test], axis=1)\n",
    "        pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)\n",
    "        true_label = np.equal(true_y, pred_y).astype(int) # assign 1 if true_y != pred_y, assign 0 if true_y == pred_y\n",
    "        pmi_class = np.load(f'{exp_name}/pmi_class_test.npy')\n",
    "        pmi_class = np.array([utils.softmax(x) for x in pmi_class])\n",
    "        pmi = np.array([pmi_value[pred_value] for pmi_value, pred_value in zip(pmi_class, pred_y)])\n",
    "\n",
    "        auroc = utils.compute_auroc(true_label, pmi)\n",
    "        auprc_succ = utils.compute_auprc_success(true_label, pmi)\n",
    "        auprc_error = utils.compute_auprc_error(true_label, pmi)\n",
    "        aurc, _, _ = utils.compute_aurc(true_label, pmi)\n",
    "        auroc_list.append(auroc)\n",
    "        auprc_succ_list.append(auprc_succ)\n",
    "        auprc_error_list.append(auprc_error)\n",
    "        aurc_list.append(aurc)\n",
    "        print(f'AUROC: {auroc*100:.3f}')\n",
    "        print(f'AUPRC (success): {auprc_succ*100:.3f}')\n",
    "        print(f'AUPRC (error): {auprc_error*100:.3f}')\n",
    "        print(f'AURC: {aurc*1000:.3f}')\n",
    "        all_auroc.append(auroc_list)\n",
    "        all_auprc_succ.append(auprc_succ_list)\n",
    "        all_auprc_error.append(auprc_error_list)\n",
    "        all_aurc.append(aurc_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c9cd4a26",
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_auroc = np.mean(all_auroc, axis=0)\n",
    "std_auroc = np.std(all_auroc, axis=0)\n",
    "mean_auprc_succ = np.mean(all_auprc_succ, axis=0)\n",
    "std_auprc_succ = np.std(all_auprc_succ, axis=0)\n",
    "mean_auprc_error = np.mean(all_auprc_error, axis=0)\n",
    "std_auprc_error = np.std(all_auprc_error, axis=0)\n",
    "mean_aurc = np.mean(all_aurc, axis=0)\n",
    "std_aurc = np.std(all_aurc, axis=0)\n",
    "for i, estimator in enumerate(estimators_list):\n",
    "    print(f'Estimator: {estimator}')\n",
    "    print(f'AUROC: {mean_auroc[i]*100:.3f}, std: {std_auroc[i]*100:.3f}')\n",
    "    print(f'AUPRC (success): {mean_auprc_succ[i]*100:.3f}, std: {std_auprc_succ[i]*100:.3f}')\n",
    "    print(f'AUPRC (error): {mean_auprc_error[i]*100:.3f}, std: {std_auprc_succ[i]*100:.3f}')\n",
    "    print(f'AURC: {mean_aurc[i]*1000:.3f}, std: {std_aurc[i]*1000:.3f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c8e78fa",
   "metadata": {},
   "source": [
    "### Use Output Layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddaf25a3",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "critic_list = ['concat','separable']\n",
    "estimators_list = ['density_ratio_fitting', 'variational_f_js']\n",
    "\n",
    "for run in range(5):\n",
    "    print(f'Run: {run+1}')\n",
    "    for critic in critic_list:\n",
    "        for estimator in estimators_list:\n",
    "            tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow\n",
    "            exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pmi/{critic}_{estimator}'\n",
    "            if not os.path.exists(exp_name):\n",
    "                print(\"Making directory\", exp_name)\n",
    "                os.makedirs(exp_name)\n",
    "                \n",
    "            ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)\n",
    "            n_classes = ds_info.features['label'].num_classes\n",
    "            ds_train = preprocess_dataset(ds_train, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "            ds_val = preprocess_dataset(ds_val, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "            ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "            \n",
    "            model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/saved_models/trained_model.keras')\n",
    "            int_model = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-1].output)\n",
    "\n",
    "            ##############################################################\n",
    "            #\n",
    "            # Train PMI Model\n",
    "            #\n",
    "            # #############################################################\n",
    "        \n",
    "            print(f'Training PMI model ({critic}, {estimator})...')\n",
    "            ds_activity_trn = ds_train.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).cache().prefetch(tf.data.AUTOTUNE)\n",
    "            ds_activity_val = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), y)).cache().prefetch(tf.data.AUTOTUNE)\n",
    "            train_critic_model(ds_activity_trn, ds_activity_val, critic=critic, estimator=estimator, epochs=200, save_path=f'{exp_name}/pmi_output_model')\n",
    "                                                                              \n",
    "            ##############################################################\n",
    "            #\n",
    "            # Compute PMI for all validation and test samples\n",
    "            #\n",
    "            # #############################################################\n",
    "\n",
    "            pmi_model = tf.keras.models.load_model(f'{exp_name}/pmi_output_model')\n",
    "            \n",
    "            print(f'Computing PMI for all validation samples and for all classes...')\n",
    "            pmi_class = []\n",
    "            for k in range(n_classes):\n",
    "                ds_activity = ds_val.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)\n",
    "                pmi_list = []\n",
    "                for (x_batch, y_batch) in ds_activity:\n",
    "                    pmi = neural_pmi(x_batch, y_batch, pmi_model, estimator=estimator)\n",
    "                    pmi_list += np.array(pmi).tolist()\n",
    "                pmi_class.append(pmi_list)\n",
    "            np.save(f'{exp_name}/pmi_output_class_val.npy', np.array(pmi_class).T)\n",
    "\n",
    "            print(f'Computing PMI for all test samples and for all classes...')\n",
    "            pmi_class = []\n",
    "            for k in range(n_classes):\n",
    "                ds_activity = ds_test.batch(cfg['batch_size']).map(lambda x, y: (int_model(x), tf.one_hot(tf.fill([tf.shape(x)[0]], k), depth=n_classes))).cache().prefetch(tf.data.AUTOTUNE)\n",
    "                pmi_list = []\n",
    "                for (x_batch, y_batch) in ds_activity:\n",
    "                    pmi = neural_pmi(x_batch, y_batch, pmi_model, estimator=estimator)\n",
    "                    pmi_list += np.array(pmi).tolist()\n",
    "                pmi_class.append(pmi_list)\n",
    "            np.save(f'{exp_name}/pmi_output_class_test.npy', np.array(pmi_class).T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48192bb0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "estimators_list = ['concat_density_ratio_fitting', 'concat_variational_f_js',\n",
    "                   'separable_density_ratio_fitting', 'separable_variational_f_js']\n",
    "\n",
    "all_auroc = []\n",
    "all_auprc_succ = []\n",
    "all_auprc_error = []\n",
    "all_aurc = []\n",
    "for run in range(5):\n",
    "    tf.keras.utils.set_random_seed(run+10) # set random seed for Python, NumPy, and TensorFlow\n",
    "    print(f'Run: {run+1}')\n",
    "    auroc_list = []\n",
    "    auprc_succ_list = []\n",
    "    auprc_error_list = []\n",
    "    aurc_list = []\n",
    "    for estimator in estimators_list:\n",
    "        print(f'Estimator: {estimator}')\n",
    "        exp_name = f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/calibration/pmi/{estimator}'\n",
    "\n",
    "        ds_train, ds_val, ds_test, ds_info = load_dataset(cfg)\n",
    "        n_classes = ds_info.features['label'].num_classes\n",
    "        ds_test = preprocess_dataset(ds_test, cfg, n_classes, resize=False, normalize=True, onehot=True)\n",
    "        model = tf.keras.models.load_model(f'../results/PI_Explainability/{model_name}_{dataset_name}/run_{run+1}/saved_models/trained_model.keras')\n",
    "        \n",
    "        true_y = np.argmax([y for x,y in ds_test], axis=1)\n",
    "        pred_y = np.argmax(model.predict(ds_test.batch(cfg['batch_size']), verbose=0), axis=1)\n",
    "        true_label = np.equal(true_y, pred_y).astype(int) # assign 1 if true_y != pred_y, assign 0 if true_y == pred_y\n",
    "        pmi_class = np.load(f'{exp_name}/pmi_output_class_test.npy')\n",
    "        pmi_class = np.array([utils.softmax(x) for x in pmi_class])\n",
    "        pmi = np.array([pmi_value[pred_value] for pmi_value, pred_value in zip(pmi_class, pred_y)])\n",
    "\n",
    "        auroc = utils.compute_auroc(true_label, pmi)\n",
    "        auprc_succ = utils.compute_auprc_success(true_label, pmi)\n",
    "        auprc_error = utils.compute_auprc_error(true_label, pmi)\n",
    "        aurc, _, _ = utils.compute_aurc(true_label, pmi)\n",
    "        auroc_list.append(auroc)\n",
    "        auprc_succ_list.append(auprc_succ)\n",
    "        auprc_error_list.append(auprc_error)\n",
    "        aurc_list.append(aurc)\n",
    "        print(f'AUROC: {auroc*100:.3f}')\n",
    "        print(f'AUPRC (success): {auprc_succ*100:.3f}')\n",
    "        print(f'AUPRC (error): {auprc_error*100:.3f}')\n",
    "        print(f'AURC: {aurc*1000:.3f}')\n",
    "        all_auroc.append(auroc_list)\n",
    "        all_auprc_succ.append(auprc_succ_list)\n",
    "        all_auprc_error.append(auprc_error_list)\n",
    "        all_aurc.append(aurc_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e15513c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_auroc = np.mean(all_auroc, axis=0)\n",
    "std_auroc = np.std(all_auroc, axis=0)\n",
    "mean_auprc_succ = np.mean(all_auprc_succ, axis=0)\n",
    "std_auprc_succ = np.std(all_auprc_succ, axis=0)\n",
    "mean_auprc_error = np.mean(all_auprc_error, axis=0)\n",
    "std_auprc_error = np.std(all_auprc_error, axis=0)\n",
    "mean_aurc = np.mean(all_aurc, axis=0)\n",
    "std_aurc = np.std(all_aurc, axis=0)\n",
    "for i, estimator in enumerate(estimators_list):\n",
    "    print(f'Estimator: {estimator}')\n",
    "    print(f'AUROC: {mean_auroc[i]*100:.3f}, std: {std_auroc[i]*100:.3f}')\n",
    "    print(f'AUPRC (success): {mean_auprc_succ[i]*100:.3f}, std: {std_auprc_succ[i]*100:.3f}')\n",
    "    print(f'AUPRC (error): {mean_auprc_error[i]*100:.3f}, std: {std_auprc_succ[i]*100:.3f}')\n",
    "    print(f'AURC: {mean_aurc[i]*1000:.3f}, std: {std_aurc[i]*1000:.3f}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
