{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Toxicity Rank Aggregation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load Required Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from rpy2.robjects.packages import importr\n",
    "from rpy2.robjects import numpy2ri\n",
    "import pickle5 as pickle\n",
    "import glob\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "numpy2ri.activate()\n",
    "rankaggreg = importr('RankAggreg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Aggregation Function\n",
    "def aggregate_ordered_lists(x, \n",
    "                            k,\n",
    "                            importance,\n",
    "                            method='CE',\n",
    "                            distance='Spearman',\n",
    "                            seed=0,\n",
    "                            verbose=False):\n",
    "    \n",
    "    x_array          = np.array(x)\n",
    "    importance_array = np.array(importance)\n",
    "    \n",
    "    results      = rankaggreg.RankAggreg(x=x_array,\n",
    "                                         k=k, \n",
    "                                         method=method, \n",
    "                                         distance=distance, \n",
    "                                         importance=importance_array, \n",
    "                                         seed=seed,\n",
    "                                         verbose=verbose)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### All Rank Aggregation Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "toxic_only_gen_log_proba.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('mosaicml-mpt-30b', 1), ('llama-2-13b', 2), ('llama-2-7b', 3), ('llama-2-70b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('mosaicml-mpt-30b', 1), ('llama-2-13b', 2), ('llama-2-7b', 3), ('llama-2-70b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n",
      "nontoxic_only_gen_log_proba.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('llama-2-7b', 1), ('llama-2-13b', 2), ('mosaicml-mpt-30b', 3), ('llama-2-70b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('llama-2-7b', 1), ('llama-2-13b', 2), ('llama-2-70b', 3), ('mosaicml-mpt-30b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n",
      "promptgen_logprob_conditional_onlytoxic.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('llama-2-70b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('mosaicml-mpt-30b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('llama-2-70b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('mosaicml-mpt-30b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n",
      "nontoxic_only_prompt_gen_log_proba.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('mosaicml-mpt-30b', 1), ('llama-2-13b', 2), ('llama-2-7b', 3), ('llama-2-70b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('llama-2-7b', 1), ('llama-2-13b', 2), ('llama-2-70b', 3), ('mosaicml-mpt-30b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n",
      "all_toxicity_combined_only_gen_logproba.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('mosaicml-mpt-30b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('tiiuae-falcon-40b', 4), ('llama-2-70b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('mosaicml-mpt-30b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('tiiuae-falcon-40b', 4), ('llama-2-70b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n",
      "toxic_only_prompt_gen_log_proba.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('llama-2-70b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('mosaicml-mpt-30b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('llama-2-70b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('mosaicml-mpt-30b', 4), ('tiiuae-falcon-40b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n",
      "all_toxicity_combined_prompt_gen_logproba.pickle\n",
      "----------------------------------------\n",
      "FSD\n",
      "[('mosaicml-mpt-30b', 1), ('tiiuae-falcon-40b', 2), ('llama-2-7b', 3), ('llama-2-13b', 4), ('llama-2-70b', 5)]\n",
      "----------------------------------------\n",
      "SSD\n",
      "[('mosaicml-mpt-30b', 1), ('tiiuae-falcon-40b', 2), ('llama-2-7b', 3), ('llama-2-13b', 4), ('llama-2-70b', 5)]\n",
      "----------------------------------------\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "parent_dir = \"Toxicity_New_Ranked_Lists/\"\n",
    "results = {}\n",
    "for dir_name in glob.glob(f\"{parent_dir}*/\"):\n",
    "\n",
    "    # print(dir_name[:-1])\n",
    "    with open(dir_name + \"first_order.pickle\", \"rb\") as handle:\n",
    "        ranked_lists_fo = pickle.load(handle)\n",
    "    with open(dir_name + \"second_order.pickle\", \"rb\") as handle:\n",
    "        ranked_lists_so = pickle.load(handle)\n",
    "    x_so = np.array(ranked_lists_so)\n",
    "    k  = x_so.shape[1]\n",
    "    importance = np.ones(x_so.shape[0])\n",
    "    results_so = aggregate_ordered_lists(x_so, k, importance=importance)\n",
    "    optimal_list_so = list(results_so[0])\n",
    "    ranked_optimal_list_so = list(zip(optimal_list_so,range(1,len(optimal_list_so)+1)))\n",
    "\n",
    "    \n",
    "    x_fo = np.array(ranked_lists_fo)\n",
    "    k  = x_fo.shape[1]\n",
    "    importance = np.ones(x_fo.shape[0])\n",
    "    results_fo = aggregate_ordered_lists(x_fo, k, importance=importance)\n",
    "    optimal_list_fo = list(results_fo[0])\n",
    "    ranked_optimal_list_fo = list(zip(optimal_list_fo,range(1,len(optimal_list_fo)+1)))\n",
    "    \n",
    "    key = os.path.basename(dir_name[:-1])\n",
    "    # print(key)\n",
    "    print(key)\n",
    "    print(\"-\" * 40)\n",
    "    print(\"FSD\")\n",
    "    print(ranked_optimal_list_fo)\n",
    "    print(\"-\" * 40)\n",
    "    print(\"SSD\")\n",
    "    print(ranked_optimal_list_so)\n",
    "    print(\"-\" * 40)\n",
    "    print(\"\\n\")\n",
    "    results[key] = {\"fo\":ranked_optimal_list_fo, \"so\": ranked_optimal_list_so}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'toxic_only_gen_log_proba.pickle': {'fo': [('mosaicml-mpt-30b', 1),\n",
       "   ('llama-2-13b', 2),\n",
       "   ('llama-2-7b', 3),\n",
       "   ('llama-2-70b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)],\n",
       "  'so': [('mosaicml-mpt-30b', 1),\n",
       "   ('llama-2-13b', 2),\n",
       "   ('llama-2-7b', 3),\n",
       "   ('llama-2-70b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)]},\n",
       " 'nontoxic_only_gen_log_proba.pickle': {'fo': [('llama-2-7b', 1),\n",
       "   ('llama-2-13b', 2),\n",
       "   ('mosaicml-mpt-30b', 3),\n",
       "   ('llama-2-70b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)],\n",
       "  'so': [('llama-2-7b', 1),\n",
       "   ('llama-2-13b', 2),\n",
       "   ('llama-2-70b', 3),\n",
       "   ('mosaicml-mpt-30b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)]},\n",
       " 'promptgen_logprob_conditional_onlytoxic.pickle': {'fo': [('llama-2-70b', 1),\n",
       "   ('llama-2-7b', 2),\n",
       "   ('llama-2-13b', 3),\n",
       "   ('mosaicml-mpt-30b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)],\n",
       "  'so': [('llama-2-70b', 1),\n",
       "   ('llama-2-7b', 2),\n",
       "   ('llama-2-13b', 3),\n",
       "   ('mosaicml-mpt-30b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)]},\n",
       " 'nontoxic_only_prompt_gen_log_proba.pickle': {'fo': [('mosaicml-mpt-30b', 1),\n",
       "   ('llama-2-13b', 2),\n",
       "   ('llama-2-7b', 3),\n",
       "   ('llama-2-70b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)],\n",
       "  'so': [('llama-2-7b', 1),\n",
       "   ('llama-2-13b', 2),\n",
       "   ('llama-2-70b', 3),\n",
       "   ('mosaicml-mpt-30b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)]},\n",
       " 'all_toxicity_combined_only_gen_logproba.pickle': {'fo': [('mosaicml-mpt-30b',\n",
       "    1),\n",
       "   ('llama-2-7b', 2),\n",
       "   ('llama-2-13b', 3),\n",
       "   ('tiiuae-falcon-40b', 4),\n",
       "   ('llama-2-70b', 5)],\n",
       "  'so': [('mosaicml-mpt-30b', 1),\n",
       "   ('llama-2-7b', 2),\n",
       "   ('llama-2-13b', 3),\n",
       "   ('tiiuae-falcon-40b', 4),\n",
       "   ('llama-2-70b', 5)]},\n",
       " 'toxic_only_prompt_gen_log_proba.pickle': {'fo': [('llama-2-70b', 1),\n",
       "   ('llama-2-7b', 2),\n",
       "   ('llama-2-13b', 3),\n",
       "   ('mosaicml-mpt-30b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)],\n",
       "  'so': [('llama-2-70b', 1),\n",
       "   ('llama-2-7b', 2),\n",
       "   ('llama-2-13b', 3),\n",
       "   ('mosaicml-mpt-30b', 4),\n",
       "   ('tiiuae-falcon-40b', 5)]},\n",
       " 'all_toxicity_combined_prompt_gen_logproba.pickle': {'fo': [('mosaicml-mpt-30b',\n",
       "    1),\n",
       "   ('tiiuae-falcon-40b', 2),\n",
       "   ('llama-2-7b', 3),\n",
       "   ('llama-2-13b', 4),\n",
       "   ('llama-2-70b', 5)],\n",
       "  'so': [('mosaicml-mpt-30b', 1),\n",
       "   ('tiiuae-falcon-40b', 2),\n",
       "   ('llama-2-7b', 3),\n",
       "   ('llama-2-13b', 4),\n",
       "   ('llama-2-70b', 5)]}}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "with open('ranked_lists_mean_risk_model_severe_toxicity_all.pickle', 'rb') as handle:\n",
    "    ranked_lists_mrm = pickle.load(handle)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "x_mrm = np.array(ranked_lists_mrm)\n",
    "k  = x_mrm.shape[1]\n",
    "importance = np.ones(x_mrm.shape[0])\n",
    "# importance = [8,5,7,7]\n",
    "results_mrm = aggregate_ordered_lists(x_mrm, k, importance=importance)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['mosaicml-mpt-30b', 'llama-2-7b', 'llama-2-13b', 'llama-2-70b', 'tiiuae-falcon-40b']\n",
      "----------------------------------------------------------------------------------------------------\n",
      "[('mosaicml-mpt-30b', 1), ('llama-2-7b', 2), ('llama-2-13b', 3), ('llama-2-70b', 4), ('tiiuae-falcon-40b', 5)]\n"
     ]
    }
   ],
   "source": [
    "optimal_list_mrm = list(results_mrm[0])\n",
    "print(optimal_list_mrm)\n",
    "print(\"-\"*100)\n",
    "print(list(zip(optimal_list_mrm,range(1,len(optimal_list_mrm)+1))))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
