{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0032b0c5-e766-4091-b60a-5a9f92a966ad",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import jsonlines\n",
    "from collections import defaultdict\n",
    "import pickle\n",
    "import numpy as np\n",
    "from collections import Counter\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c8279b49-b204-4166-98dd-8196c326e94c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def merge_dicts(dict_list):\n",
    "    dd = defaultdict(list)\n",
    "    for d in dict_list: # you can list as many input dicts as you want here\n",
    "        for key, value in d.items():\n",
    "            dd[key].append(value)\n",
    "    return dd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "90a18a81-1b49-4743-97b1-03386396358f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def get_max(score_dict,model_names):\n",
    "    max_model_dict = {}\n",
    "    for key, value in score_dict.items():\n",
    "        winner = np.argwhere(value == np.amax(value))\n",
    "        # print(winner.flatten())\n",
    "        winner_models = []\n",
    "        for num in winner.flatten():\n",
    "            winner_models.append(model_names[num])\n",
    "        # print(winner_models)\n",
    "        max_model_dict[key] = winner_models\n",
    "    return max_model_dict\n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e274bab-7174-4167-a9d9-23cfa2913d64",
   "metadata": {},
   "source": [
    "#### Test Data Count Metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3eaf7d18-8b9a-4cb0-8256-15b149d8060f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "winners_per_ip = []\n",
    "with jsonlines.open('test_data_prepared.jsonl') as reader:\n",
    "    for obj in reader:\n",
    "        model_order = []\n",
    "        scores = []\n",
    "        for response in obj['candidates']:\n",
    "            model_order.append(response['model'])\n",
    "            scores.append(response['scores'])\n",
    "        all_scores_dict = merge_dicts(scores)\n",
    "        winners_per_ip.append(get_max(all_scores_dict,model_order))            \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "89e533d3-9b0f-43f5-b3e8-770ecbde88ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "metric_wise_grouped_winners = merge_dicts(winners_per_ip)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0359df68-5b8e-46c7-99f9-20505dab5743",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "most_common = {}\n",
    "for k,v in metric_wise_grouped_winners.items():\n",
    "    most_common[k]= Counter(np.concatenate(v)).most_common() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "34a31623-f118-464c-985b-689044bed292",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: logprobs\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>2277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0                        flan-t5-xxl   2277\n",
       "1   oasst-sft-4-pythia-12b-epoch-3.5   1312\n",
       "2                         chatglm-6b    921\n",
       "3                      alpaca-native    656\n",
       "4            stablelm-tuned-alpha-7b    272\n",
       "5                    mpt-7b-instruct    229\n",
       "6                     vicuna-13b-1.1    201\n",
       "7                        koala-7B-HF    172\n",
       "8                       dolly-v2-12b     94\n",
       "9                             mpt-7b     61\n",
       "10       llama-7b-hf-baize-lora-bf16     25\n",
       "11                 moss-moon-003-sft     16"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: rougeL\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>1069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>636</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   1975\n",
       "1                      alpaca-native   1069\n",
       "2                     vicuna-13b-1.1    742\n",
       "3                         chatglm-6b    636\n",
       "4                        flan-t5-xxl    225\n",
       "5                        koala-7B-HF    202\n",
       "6            stablelm-tuned-alpha-7b    167\n",
       "7        llama-7b-hf-baize-lora-bf16     95\n",
       "8                  moss-moon-003-sft     79\n",
       "9                       dolly-v2-12b     49\n",
       "10                   mpt-7b-instruct     32\n",
       "11                            mpt-7b     11"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: rougeLsum\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>1032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   1967\n",
       "1                      alpaca-native   1032\n",
       "2                     vicuna-13b-1.1    800\n",
       "3                         chatglm-6b    628\n",
       "4                        koala-7B-HF    205\n",
       "5                        flan-t5-xxl    182\n",
       "6            stablelm-tuned-alpha-7b    178\n",
       "7        llama-7b-hf-baize-lora-bf16    117\n",
       "8                  moss-moon-003-sft     75\n",
       "9                       dolly-v2-12b     46\n",
       "10                   mpt-7b-instruct     30\n",
       "11                            mpt-7b     15"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: rouge1\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>1075</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   1992\n",
       "1                      alpaca-native   1075\n",
       "2                     vicuna-13b-1.1    808\n",
       "3                         chatglm-6b    610\n",
       "4                        koala-7B-HF    202\n",
       "5                        flan-t5-xxl    180\n",
       "6            stablelm-tuned-alpha-7b    176\n",
       "7        llama-7b-hf-baize-lora-bf16    117\n",
       "8                  moss-moon-003-sft     64\n",
       "9                       dolly-v2-12b     35\n",
       "10                   mpt-7b-instruct     32\n",
       "11                            mpt-7b     13"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: rouge2\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>956</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   1771\n",
       "1                      alpaca-native    956\n",
       "2                     vicuna-13b-1.1    882\n",
       "3                         chatglm-6b    629\n",
       "4                        koala-7B-HF    258\n",
       "5                        flan-t5-xxl    224\n",
       "6            stablelm-tuned-alpha-7b    211\n",
       "7                  moss-moon-003-sft    206\n",
       "8        llama-7b-hf-baize-lora-bf16    206\n",
       "9                       dolly-v2-12b    112\n",
       "10                   mpt-7b-instruct    101\n",
       "11                            mpt-7b     67"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: bleu\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>969</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>796</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   1806\n",
       "1                      alpaca-native    969\n",
       "2                     vicuna-13b-1.1    796\n",
       "3                         chatglm-6b    581\n",
       "4            stablelm-tuned-alpha-7b    206\n",
       "5                        koala-7B-HF    200\n",
       "6        llama-7b-hf-baize-lora-bf16    161\n",
       "7                  moss-moon-003-sft    150\n",
       "8                        flan-t5-xxl    108\n",
       "9                       dolly-v2-12b     77\n",
       "10                   mpt-7b-instruct     41\n",
       "11                            mpt-7b     16"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: bertscore\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>2151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>1018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   2151\n",
       "1                      alpaca-native   1018\n",
       "2                     vicuna-13b-1.1    668\n",
       "3                         chatglm-6b    525\n",
       "4                        flan-t5-xxl    186\n",
       "5                        koala-7B-HF    145\n",
       "6            stablelm-tuned-alpha-7b    144\n",
       "7        llama-7b-hf-baize-lora-bf16     78\n",
       "8                  moss-moon-003-sft     77\n",
       "9                    mpt-7b-instruct     20\n",
       "10                      dolly-v2-12b     18\n",
       "11                            mpt-7b      6"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: bleurt\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>1619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>866</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0   oasst-sft-4-pythia-12b-epoch-3.5   1619\n",
       "1                      alpaca-native    866\n",
       "2                     vicuna-13b-1.1    630\n",
       "3                         chatglm-6b    549\n",
       "4        llama-7b-hf-baize-lora-bf16    361\n",
       "5                  moss-moon-003-sft    306\n",
       "6                        koala-7B-HF    205\n",
       "7                    mpt-7b-instruct    154\n",
       "8            stablelm-tuned-alpha-7b    129\n",
       "9                       dolly-v2-12b    109\n",
       "10                       flan-t5-xxl     93\n",
       "11                            mpt-7b     54"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: bartscore\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>vicuna-13b-1.1</td>\n",
       "      <td>1077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
       "      <td>825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>moss-moon-003-sft</td>\n",
       "      <td>559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>alpaca-native</td>\n",
       "      <td>545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>llama-7b-hf-baize-lora-bf16</td>\n",
       "      <td>472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>koala-7B-HF</td>\n",
       "      <td>257</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>mpt-7b</td>\n",
       "      <td>104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>flan-t5-xxl</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  count\n",
       "0                     vicuna-13b-1.1   1077\n",
       "1   oasst-sft-4-pythia-12b-epoch-3.5    825\n",
       "2                         chatglm-6b    586\n",
       "3                  moss-moon-003-sft    559\n",
       "4                      alpaca-native    545\n",
       "5        llama-7b-hf-baize-lora-bf16    472\n",
       "6                        koala-7B-HF    257\n",
       "7                    mpt-7b-instruct    217\n",
       "8                       dolly-v2-12b    180\n",
       "9            stablelm-tuned-alpha-7b    166\n",
       "10                            mpt-7b    104\n",
       "11                       flan-t5-xxl     50"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "for metric in most_common.keys():\n",
    "    name = []\n",
    "    count = []\n",
    "    print(f\"Order for metric: {metric}\")\n",
    "    for vals in most_common[metric]:\n",
    "        name.append(vals[0])\n",
    "        count.append(vals[1])\n",
    "    df = pd.DataFrame.from_dict({\"name\":name,\"count\":count})\n",
    "    display(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fa06a0f6-ade0-451c-994a-0952680d857a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order for metric: logprobs\n",
      "Order for metric: rougeL\n",
      "Order for metric: rougeLsum\n",
      "Order for metric: rouge1\n",
      "Order for metric: rouge2\n",
      "Order for metric: bleu\n",
      "Order for metric: bertscore\n",
      "Order for metric: bleurt\n",
      "Order for metric: bartscore\n"
     ]
    }
   ],
   "source": [
    "ranked_list = {}\n",
    "for metric in most_common.keys():\n",
    "    name = []\n",
    "    print(f\"Order for metric: {metric}\")\n",
    "    for vals in most_common[metric]:\n",
    "        name.append(vals[0])\n",
    "    ranked_list[metric] = name\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "5567c3df-2223-42bd-92c5-7ec2cdcef3ec",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "with open(\"MeanWinRate\", 'wb') as handle:\n",
    "        pickle.dump(ranked_list, handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
