{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3431be42-48a5-457c-b71b-8d7f6bb22428",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tqdm.auto import tqdm\n",
    "import re\n",
    "from fuzzywuzzy import fuzz\n",
    "from datasets import load_dataset\n",
    "\n",
    "def search(s, s_list):\n",
    "    scores = [fuzz.token_sort_ratio(s, s_try) for s_try in s_list]\n",
    "    return [s_list[np.argmax(scores)], np.max(scores)]\n",
    "    \n",
    "def standard_name(s, slash=True):\n",
    "    if not slash:\n",
    "        return s.lower().replace(\"-hf\",\"\")\n",
    "    else:\n",
    "        if \"/\" in s:\n",
    "            return s.split(\"/\")[1].lower().replace(\"-hf\",\"\")\n",
    "        else:\n",
    "            return s.lower().replace(\"-hf\",\"\")\n",
    "\n",
    "def consolidate_columns(df):\n",
    "    # Get all columns with '_x' suffix\n",
    "    x_columns = [col for col in df.columns if col.endswith('_x')]\n",
    "    \n",
    "    # Iterate over each '_x' column\n",
    "    for x_col in x_columns:\n",
    "        # Derive the corresponding '_y' column name\n",
    "        y_col = x_col[:-2] + '_y'\n",
    "        \n",
    "        # Consolidate columns\n",
    "        df[x_col[:-2]] = df[x_col].combine_first(df[y_col])\n",
    "        \n",
    "        # Drop the '_x' and '_y' columns\n",
    "        df.drop([x_col, y_col], axis=1, inplace=True)\n",
    "    \n",
    "    return df\n",
    "    \n",
    "def remove_params(s):\n",
    "    pattern = r'\\d+(\\.\\d+)?[BbMm](\\d)?'\n",
    "    cleaned_str = re.sub(pattern, '', s)\n",
    "    if cleaned_str[-1]=='-':\n",
    "        cleaned_str = cleaned_str[:-1]\n",
    "    cleaned_str = cleaned_str.replace(\"--\",\"-\")\n",
    "    return cleaned_str\n",
    "    \n",
    "def are_strings_equivalent(str1, str2):\n",
    "    cleaned_str1 = remove_params(str1)\n",
    "    cleaned_str2 = remove_params(str2)\n",
    "    bool1 = cleaned_str1 == cleaned_str2\n",
    "    return bool1 \n",
    "\n",
    "def get_families(data, min=2):\n",
    "    if type(data)==list:\n",
    "        models = np.unique(data).tolist()\n",
    "    else:\n",
    "        models = np.unique(list(data.Model)).tolist()\n",
    "    D = (np.array([[are_strings_equivalent(m1, m2) for m1 in models] for m2 in tqdm(models)]))\n",
    "    \n",
    "    families = []\n",
    "    while len(models)>0:\n",
    "        indices = [j for j,bool in enumerate(D[0]) if bool]\n",
    "        D = np.delete(D, indices, axis=0)\n",
    "        D = np.delete(D, indices, axis=1)\n",
    "        families.append(np.array(models)[indices].tolist())\n",
    "    \n",
    "        for m in np.array(models)[indices].tolist():\n",
    "            models.remove(m)\n",
    "    \n",
    "    families = [f for f in families if len(f)>=min]\n",
    "    #families_instruct = [f for f in families if 'chat' in f[0].lower() or 'instruct' in f[0].lower() or '-it' in f[0][-4:].lower()]\n",
    "    #families_base = [f for f in families if f not in families_instruct]\n",
    "\n",
    "    families = [np.sort(f).tolist() for f in families]\n",
    "    #families_instruct = [np.sort(f).tolist() for f in families if data.loc[data.Model==f[0]]['T'].iloc[0]=='💬']\n",
    "    #families_base = [np.sort(f).tolist() for f in families if data.loc[data.Model==f[0]]['T'].iloc[0]=='🟢']\n",
    "    return families#, families_base, families_instruct\n",
    "\n",
    "def get_family_name(strings):\n",
    "    # Start with the shortest string in the list\n",
    "    shortest_string = min(strings, key=len)\n",
    "    length = len(shortest_string)\n",
    "    \n",
    "    # Iterate over all possible substrings of the shortest string\n",
    "    for sub_len in range(length, 0, -1):  # Start with the longest substrings\n",
    "        for i in range(length - sub_len + 1):\n",
    "            substring = shortest_string[i:i + sub_len]\n",
    "            # Check if this substring is in all other strings\n",
    "            if all(substring in string for string in strings):\n",
    "                return substring\n",
    "    \n",
    "    return \"\"  # Return an empty string if no common substring is found\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc1f0bc4-48bf-4e74-825c-4d5c8c4ee29f",
   "metadata": {},
   "source": [
    "## Gathering model names linked to a family from the old and new open llm leaderboards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "bcd5be3a-62ec-4870-801c-3213f45947db",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fe203be468a1436598184e4f062ba33d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/707 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lb_new = pd.read_csv(\"open-llm-leaderboard_new.csv\")\n",
    "new_families = get_families(lb_new)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "eb5780d3-88de-401f-b543-9888bc38e24f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1eee2585034a4e7a97e769655774560e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/6811 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "KeyboardInterrupt\n",
      "\n"
     ]
    }
   ],
   "source": [
    "lb_old = pd.read_csv(\"open-llm-leaderboard_old.csv\")\n",
    "old_families = get_families(lb_old)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "730a4ecc-2bc9-48c1-9642-19dbdc32d217",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "01-ai/Yi-1.5-34B\n",
      "01-ai/Yi-1.5-34B-32K\n",
      "01-ai/Yi-1.5-34B-Chat\n",
      "01-ai/Yi-1.5-34B-Chat-16K\n",
      "01-ai/Yi-1.5-6B\n",
      "01-ai/Yi-1.5-6B-Chat\n",
      "01-ai/Yi-1.5-9B\n",
      "01-ai/Yi-1.5-9B-32K\n",
      "01-ai/Yi-1.5-9B-Chat\n",
      "01-ai/Yi-1.5-9B-Chat-16K\n",
      "01-ai/Yi-34B\n",
      "01-ai/Yi-34B-200K\n",
      "01-ai/Yi-34B-Chat\n",
      "01-ai/Yi-6B\n",
      "01-ai/Yi-6B-200K\n",
      "01-ai/Yi-6B-Chat\n",
      "01-ai/Yi-9B\n",
      "01-ai/Yi-9B-200K\n",
      "Azure99/blossom-v5.1-34b\n",
      "Azure99/blossom-v5.1-9b\n",
      "BEE-spoke-data/smol_llama-101M-GQA\n",
      "BEE-spoke-data/smol_llama-220M-GQA\n",
      "CohereForAI/aya-23-35B\n",
      "CohereForAI/aya-23-8B\n",
      "EleutherAI/gpt-neo-1.3B\n",
      "EleutherAI/gpt-neo-2.7B\n",
      "EleutherAI/pythia-12b\n",
      "EleutherAI/pythia-160m\n",
      "EleutherAI/pythia-2.8b\n",
      "EleutherAI/pythia-410m\n",
      "EleutherAI/pythia-6.9b\n",
      "HuggingFaceTB/SmolLM-1.7B\n",
      "HuggingFaceTB/SmolLM-1.7B-Instruct\n",
      "HuggingFaceTB/SmolLM-135M\n",
      "HuggingFaceTB/SmolLM-135M-Instruct\n",
      "HuggingFaceTB/SmolLM-360M\n",
      "HuggingFaceTB/SmolLM-360M-Instruct\n",
      "NeverSleep/Lumimaid-v0.2-12B\n",
      "NeverSleep/Lumimaid-v0.2-8B\n",
      "NousResearch/Yarn-Llama-2-13b-128k\n",
      "NousResearch/Yarn-Llama-2-7b-128k\n",
      "OpenBuddy/openbuddy-zero-3b-v21.2-32k\n",
      "OpenBuddy/openbuddy-zero-56b-v21.2-32k\n",
      "Qwen/Qwen1.5-0.5B\n",
      "Qwen/Qwen1.5-0.5B-Chat\n",
      "Qwen/Qwen1.5-1.8B\n",
      "Qwen/Qwen1.5-1.8B-Chat\n",
      "Qwen/Qwen1.5-110B\n",
      "Qwen/Qwen1.5-110B-Chat\n",
      "Qwen/Qwen1.5-14B\n",
      "Qwen/Qwen1.5-14B-Chat\n",
      "Qwen/Qwen1.5-32B\n",
      "Qwen/Qwen1.5-32B-Chat\n",
      "Qwen/Qwen1.5-4B\n",
      "Qwen/Qwen1.5-4B-Chat\n",
      "Qwen/Qwen1.5-7B\n",
      "Qwen/Qwen1.5-7B-Chat\n",
      "Qwen/Qwen2-0.5B\n",
      "Qwen/Qwen2-0.5B-Instruct\n",
      "Qwen/Qwen2-1.5B\n",
      "Qwen/Qwen2-1.5B-Instruct\n",
      "Qwen/Qwen2-72B\n",
      "Qwen/Qwen2-72B-Instruct\n",
      "Qwen/Qwen2-7B\n",
      "Qwen/Qwen2-7B-Instruct\n",
      "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct\n",
      "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct\n",
      "VAGOsolutions/SauerkrautLM-Gemma-2b\n",
      "VAGOsolutions/SauerkrautLM-Gemma-7b\n",
      "WizardLMTeam/WizardLM-13B-V1.0\n",
      "WizardLMTeam/WizardLM-70B-V1.0\n",
      "abacusai/Smaug-34B-v0.1\n",
      "abacusai/Smaug-72B-v0.1\n",
      "allenai/OLMo-1B-hf\n",
      "allenai/OLMo-7B-hf\n",
      "bigcode/starcoder2-15b\n",
      "bigcode/starcoder2-3b\n",
      "bigcode/starcoder2-7b\n",
      "bigscience/bloom-1b1\n",
      "bigscience/bloom-3b\n",
      "bigscience/bloom-560m\n",
      "bigscience/bloom-7b1\n",
      "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b\n",
      "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b\n",
      "cognitivecomputations/dolphin-2.9.2-qwen2-72b\n",
      "cognitivecomputations/dolphin-2.9.2-qwen2-7b\n",
      "databricks/dolly-v2-12b\n",
      "databricks/dolly-v2-3b\n",
      "databricks/dolly-v2-7b\n",
      "deepseek-ai/deepseek-llm-67b-chat\n",
      "deepseek-ai/deepseek-llm-7b-chat\n",
      "facebook/opt-1.3b\n",
      "facebook/opt-30b\n",
      "google/gemma-1.1-2b-it\n",
      "google/gemma-1.1-7b-it\n",
      "google/gemma-2-2b\n",
      "google/gemma-2-2b-it\n",
      "google/gemma-2-9b\n",
      "google/gemma-2-9b-it\n",
      "google/gemma-2b\n",
      "google/gemma-2b-it\n",
      "google/gemma-7b\n",
      "google/gemma-7b-it\n",
      "google/recurrentgemma-2b\n",
      "google/recurrentgemma-2b-it\n",
      "google/recurrentgemma-9b\n",
      "google/recurrentgemma-9b-it\n",
      "huggyllama/llama-13b\n",
      "huggyllama/llama-65b\n",
      "huggyllama/llama-7b\n",
      "internlm/internlm2_5-20b-chat\n",
      "internlm/internlm2_5-7b-chat\n",
      "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0\n",
      "jpacifico/Chocolatine-8B-Instruct-DPO-v1.0\n",
      "lmsys/vicuna-13b-v1.3\n",
      "lmsys/vicuna-7b-v1.3\n",
      "meta-llama/Llama-2-13b-chat-hf\n",
      "meta-llama/Llama-2-13b-hf\n",
      "meta-llama/Llama-2-70b-chat-hf\n",
      "meta-llama/Llama-2-70b-hf\n",
      "meta-llama/Llama-2-7b-chat-hf\n",
      "meta-llama/Llama-2-7b-hf\n",
      "meta-llama/Meta-Llama-3-70B\n",
      "meta-llama/Meta-Llama-3-70B-Instruct\n",
      "meta-llama/Meta-Llama-3-8B\n",
      "meta-llama/Meta-Llama-3-8B-Instruct\n",
      "meta-llama/Meta-Llama-3.1-70B\n",
      "meta-llama/Meta-Llama-3.1-70B-Instruct\n",
      "meta-llama/Meta-Llama-3.1-8B\n",
      "meta-llama/Meta-Llama-3.1-8B-Instruct\n",
      "microsoft/Orca-2-13b\n",
      "microsoft/Orca-2-7b\n",
      "mistralai/Mixtral-8x22B-Instruct-v0.1\n",
      "mistralai/Mixtral-8x22B-v0.1\n",
      "mistralai/Mixtral-8x7B-Instruct-v0.1\n",
      "mistralai/Mixtral-8x7B-v0.1\n",
      "pankajmathur/orca_mini_v3_13b\n",
      "pankajmathur/orca_mini_v3_70b\n",
      "pankajmathur/orca_mini_v3_7b\n",
      "pankajmathur/orca_mini_v7_72b\n",
      "pankajmathur/orca_mini_v7_7b\n",
      "princeton-nlp/Sheared-LLaMA-1.3B\n",
      "princeton-nlp/Sheared-LLaMA-2.7B\n",
      "teknium/OpenHermes-13B\n",
      "teknium/OpenHermes-7B\n",
      "tiiuae/falcon-11B\n",
      "tiiuae/falcon-40b\n",
      "tiiuae/falcon-40b-instruct\n",
      "tiiuae/falcon-7b\n",
      "tiiuae/falcon-7b-instruct\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_list = [item for sublist in new_families for item in sublist]\n",
    "[print(m) for m in np.sort(merged_list).tolist()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "fb3800a1-fe98-450a-8966-dc291ad9f745",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0-hero/Matter-0.2-32B\n",
      "0-hero/Matter-0.2-7B\n",
      "01-ai/Yi-1.5-34B\n",
      "01-ai/Yi-1.5-34B-32K\n",
      "01-ai/Yi-1.5-34B-Chat\n",
      "01-ai/Yi-1.5-34B-Chat-16K\n",
      "01-ai/Yi-1.5-6B\n",
      "01-ai/Yi-1.5-6B-Chat\n",
      "01-ai/Yi-1.5-9B\n",
      "01-ai/Yi-1.5-9B-32K\n",
      "01-ai/Yi-1.5-9B-Chat\n",
      "01-ai/Yi-1.5-9B-Chat-16K\n",
      "01-ai/Yi-34B\n",
      "01-ai/Yi-34B-200K\n",
      "01-ai/Yi-6B\n",
      "01-ai/Yi-6B-200K\n",
      "01-ai/Yi-9B\n",
      "01-ai/Yi-9B-200K\n",
      "0x7194633/fialka-13B-v3\n",
      "0x7194633/fialka-7B-v3\n",
      "922-CA/monika-ddlc-7b-v1\n",
      "922-CA/monika-ddlc-8b-v1\n",
      "AI-Sweden-Models/gpt-sw3-1.3b\n",
      "AI-Sweden-Models/gpt-sw3-1.3b-instruct\n",
      "AI-Sweden-Models/gpt-sw3-126m\n",
      "AI-Sweden-Models/gpt-sw3-126m-instruct\n",
      "AI-Sweden-Models/gpt-sw3-20b\n",
      "AI-Sweden-Models/gpt-sw3-20b-instruct\n",
      "AI-Sweden-Models/gpt-sw3-356m\n",
      "AI-Sweden-Models/gpt-sw3-356m-instruct\n",
      "AI-Sweden-Models/gpt-sw3-40b\n",
      "AI-Sweden-Models/gpt-sw3-6.7b\n",
      "AIGym/deepseek-coder-1.3b-chat\n",
      "AIGym/deepseek-coder-1.3b-chat-and-function-calling\n",
      "AIGym/deepseek-coder-6.7b-chat\n",
      "AIGym/deepseek-coder-6.7b-chat-and-function-calling\n",
      "Aspik101/trurl-2-13b-pl-instruct_unload\n",
      "Aspik101/trurl-2-7b-pl-instruct_unload\n",
      "Azure99/blossom-v4-qwen1_5-14b\n",
      "Azure99/blossom-v4-qwen1_5-4b\n",
      "Azure99/blossom-v4-qwen1_5-7b\n",
      "Azure99/blossom-v5-14b\n",
      "Azure99/blossom-v5-32b\n",
      "Azure99/blossom-v5-34b\n",
      "Azure99/blossom-v5-4b\n",
      "Azure99/blossom-v5-7b\n",
      "Azure99/blossom-v5-9b\n",
      "BEE-spoke-data/smol_llama-101M-GQA\n",
      "BEE-spoke-data/smol_llama-220M-GQA\n",
      "BFauber/lora_llama2-13b_10e4\n",
      "BFauber/lora_llama2-13b_10e5\n",
      "BFauber/lora_llama2-13b_10e6\n",
      "BFauber/lora_llama2-7b_10e4\n",
      "BFauber/lora_llama2-7b_10e5\n",
      "BFauber/lora_llama2-7b_10e6\n",
      "BFauber/lora_opt1.3b_10e5\n",
      "BFauber/lora_opt125m_10e5\n",
      "BFauber/lora_opt13b_10e5\n",
      "BFauber/lora_opt6.7b_10e5\n",
      "BFauber/opt1.3b_10e4\n",
      "BFauber/opt1.3b_10e5\n",
      "BFauber/opt1.3b_10e6\n",
      "BFauber/opt125m_10e4\n",
      "BFauber/opt125m_10e5\n",
      "BFauber/opt350m_10e5\n",
      "BFauber/opt350m_10e6\n",
      "BreadAi/gpt-YA-1-1_160M\n",
      "BreadAi/gpt-YA-1-1_70M\n",
      "CausalLM/14B\n",
      "CausalLM/14B-DPO-alpha\n",
      "CausalLM/34b-beta\n",
      "CausalLM/35b-beta\n",
      "CausalLM/7B\n",
      "CausalLM/7B-DPO-alpha\n",
      "CobraMamba/mamba-gpt-3b\n",
      "CobraMamba/mamba-gpt-7b\n",
      "ConvexAI/Luminex-32B-v0.2\n",
      "ConvexAI/Luminex-34B-v0.1\n",
      "ConvexAI/Luminex-34B-v0.2\n",
      "ConvexAI/Luminex-72B-v0.1\n",
      "Corianas/1.3b\n",
      "Corianas/111m\n",
      "Corianas/590m\n",
      "Corianas/Quokka_1.3b\n",
      "Corianas/Quokka_2.7b\n",
      "Corianas/Quokka_256m\n",
      "Corianas/Quokka_590m\n",
      "Danielbrdz/Barcenas-10.7b\n",
      "Danielbrdz/Barcenas-13b\n",
      "Danielbrdz/Barcenas-3.8b\n",
      "Danielbrdz/Barcenas-3b\n",
      "Danielbrdz/Barcenas-7b\n",
      "Devio/test-22B\n",
      "Devio/test-3b\n",
      "EleutherAI/gpt-neo-1.3B\n",
      "EleutherAI/gpt-neo-125m\n",
      "EleutherAI/gpt-neo-2.7B\n",
      "EleutherAI/llemma_34b\n",
      "EleutherAI/llemma_7b\n",
      "EleutherAI/pythia-1.3b\n",
      "EleutherAI/pythia-1.4b\n",
      "EleutherAI/pythia-1.4b-deduped\n",
      "EleutherAI/pythia-12b\n",
      "EleutherAI/pythia-12b-deduped\n",
      "EleutherAI/pythia-160m\n",
      "EleutherAI/pythia-160m-deduped\n",
      "EleutherAI/pythia-1b-deduped\n",
      "EleutherAI/pythia-2.7b\n",
      "EleutherAI/pythia-2.8b-deduped\n",
      "EleutherAI/pythia-410m\n",
      "EleutherAI/pythia-410m-deduped\n",
      "EleutherAI/pythia-6.7b\n",
      "EleutherAI/pythia-6.9b-deduped\n",
      "EleutherAI/pythia-70m\n",
      "EleutherAI/pythia-70m-deduped\n",
      "FelixChao/NarutoDolphin-10B\n",
      "FelixChao/NarutoDolphin-7B\n",
      "FelixChao/WestSeverus-10.7B\n",
      "FelixChao/WestSeverus-7B\n",
      "Felladrin/Llama-160M-Chat-v1\n",
      "Felladrin/Llama-68M-Chat-v1\n",
      "FlagAlpha/Llama2-Chinese-13b-Chat\n",
      "FlagAlpha/Llama2-Chinese-7b-Chat\n",
      "Fredithefish/Guanaco-13B-Uncensored\n",
      "Fredithefish/Guanaco-3B-Uncensored\n",
      "Fredithefish/Guanaco-7B-Uncensored\n",
      "Joseph717171/Mistral-10.7B-v0.2\n",
      "Joseph717171/Mistral-12.25B-v0.2\n",
      "Josephgflowers/Tinyllama-1.3B-Cinder-Reason-Test\n",
      "Josephgflowers/Tinyllama-Cinder-1.3B-Reason-Test\n",
      "JosephusCheung/Pwen-14B-Chat-20_30\n",
      "JosephusCheung/Pwen-7B-Chat-20_30\n",
      "KingNish/CodeMaster-v1-7b\n",
      "KingNish/CodeMaster-v1-9b\n",
      "KnutJaegersberg/Deacon-1b\n",
      "KnutJaegersberg/Deacon-20B\n",
      "KnutJaegersberg/Deita-20b\n",
      "KnutJaegersberg/Deita-2b\n",
      "KnutJaegersberg/Deita-32b\n",
      "KnutJaegersberg/Deita-34b\n",
      "KnutJaegersberg/Deita-4b\n",
      "KnutJaegersberg/Deita-500m\n",
      "KnutJaegersberg/deacon-13b\n",
      "KnutJaegersberg/deacon-3b\n",
      "KoboldAI/OPT-13B-Erebus\n",
      "KoboldAI/OPT-13B-Nerybus-Mix\n",
      "KoboldAI/OPT-13B-Nerys-v2\n",
      "KoboldAI/OPT-2.7B-Erebus\n",
      "KoboldAI/OPT-2.7B-Nerybus-Mix\n",
      "KoboldAI/OPT-2.7B-Nerys-v2\n",
      "KoboldAI/OPT-30B-Erebus\n",
      "KoboldAI/OPT-350M-Erebus\n",
      "KoboldAI/OPT-350M-Nerys-v2\n",
      "KoboldAI/OPT-6.7B-Erebus\n",
      "KoboldAI/OPT-6.7B-Nerybus-Mix\n",
      "KoboldAI/fairseq-dense-1.3B\n",
      "KoboldAI/fairseq-dense-125M\n",
      "KoboldAI/fairseq-dense-13B\n",
      "KoboldAI/fairseq-dense-2.7B\n",
      "KoboldAI/fairseq-dense-355M\n",
      "KoboldAI/fairseq-dense-6.7B\n",
      "L-R/LLmRa-1.3B\n",
      "L-R/LLmRa-2.7B\n",
      "LLMs/WizardLM-13B-V1.0\n",
      "LLMs/WizardLM-30B-V1.0\n",
      "LeoLM/leo-hessianai-13b\n",
      "LeoLM/leo-hessianai-7b\n",
      "M4-ai/tau-0.5B\n",
      "M4-ai/tau-1.8B\n",
      "MBZUAI/LaMini-GPT-1.5B\n",
      "MBZUAI/LaMini-GPT-124M\n",
      "MBZUAI/LaMini-GPT-774M\n",
      "MBZUAI/lamini-cerebras-1.3b\n",
      "MBZUAI/lamini-cerebras-111m\n",
      "MBZUAI/lamini-cerebras-256m\n",
      "MBZUAI/lamini-cerebras-590m\n",
      "MBZUAI/lamini-neo-1.3b\n",
      "MBZUAI/lamini-neo-125m\n",
      "MaziyarPanahi/Calme-12B-Instruct-v0.1\n",
      "MaziyarPanahi/Calme-7B-Instruct-v0.1\n",
      "MaziyarPanahi/Llama-3-11B-Instruct-v0.1\n",
      "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.1\n",
      "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.2\n",
      "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.3\n",
      "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.4\n",
      "MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.1\n",
      "MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.2\n",
      "MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3\n",
      "MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.4\n",
      "MaziyarPanahi/Llama-3-8B-Instruct-v0.1\n",
      "Mikael110/llama-2-13b-guanaco-fp16\n",
      "Mikael110/llama-2-7b-guanaco-fp16\n",
      "Minami-su/Qwen1.5-0.5B-Chat_llamafy\n",
      "Minami-su/Qwen1.5-0.5B-Chat_mistral\n",
      "Minami-su/Qwen1.5-7B-Chat_llamafy\n",
      "Minami-su/Qwen1.5-7B-Chat_mistral\n",
      "NeverSleep/Llama-3-Lumimaid-70B-v0.1\n",
      "NeverSleep/Llama-3-Lumimaid-8B-v0.1\n",
      "NeverSleep/Noromaid-13b-v0.2\n",
      "NeverSleep/Noromaid-7b-v0.2\n",
      "NewstaR/Starlight-13B\n",
      "NewstaR/Starlight-7B\n",
      "NousResearch/CodeLlama-13b-hf\n",
      "NousResearch/CodeLlama-34b-hf\n",
      "NousResearch/CodeLlama-7b-hf\n",
      "NurtureAI/Orca-2-13B-16k\n",
      "NurtureAI/Orca-2-7B-16k\n",
      "OEvortex/HelpingAI-110M\n",
      "OEvortex/HelpingAI-8B\n",
      "OEvortex/HelpingAI-9B\n",
      "Open-Orca/LlongOrca-13B-16k\n",
      "Open-Orca/LlongOrca-7B-16k\n",
      "OpenBuddy/openbuddy-qwen1.5-14b-v21.1-32k\n",
      "OpenBuddy/openbuddy-qwen1.5-32b-v21.1-32k\n",
      "PocketDoc/Dans-PersonalityEngine-13b\n",
      "PocketDoc/Dans-PersonalityEngine-30b\n",
      "Ppoyaa/LexiLumin-20B\n",
      "Ppoyaa/LexiLumin-7B\n",
      "PygmalionAI/pygmalion-1.3b\n",
      "PygmalionAI/pygmalion-2-13b\n",
      "PygmalionAI/pygmalion-2-7b\n",
      "PygmalionAI/pygmalion-2.7b\n",
      "PygmalionAI/pygmalion-350m\n",
      "PygmalionAI/pygmalion-6b\n",
      "QueryloopAI/gemma-2b-openhermes\n",
      "QueryloopAI/gemma-7b-openhermes\n",
      "Qwen/Qwen-14B\n",
      "Qwen/Qwen-72B\n",
      "Qwen/Qwen-7B\n",
      "Qwen/Qwen1.5-0.5B\n",
      "Qwen/Qwen1.5-0.5B-Chat\n",
      "Qwen/Qwen1.5-1.8B\n",
      "Qwen/Qwen1.5-1.8B-Chat\n",
      "Qwen/Qwen1.5-110B\n",
      "Qwen/Qwen1.5-110B-Chat\n",
      "Qwen/Qwen1.5-14B\n",
      "Qwen/Qwen1.5-14B-Chat\n",
      "Qwen/Qwen1.5-32B\n",
      "Qwen/Qwen1.5-32B-Chat\n",
      "Qwen/Qwen1.5-4B\n",
      "Qwen/Qwen1.5-4B-Chat\n",
      "Qwen/Qwen1.5-72B\n",
      "Qwen/Qwen1.5-72B-Chat\n",
      "Qwen/Qwen1.5-7B\n",
      "Qwen/Qwen1.5-7B-Chat\n",
      "Qwen/Qwen2-0.5B\n",
      "Qwen/Qwen2-1.5B\n",
      "Qwen/Qwen2-72B\n",
      "Qwen/Qwen2-7B\n",
      "Qwen/Qwen2-beta-14B\n",
      "Qwen/Qwen2-beta-72B\n",
      "RESMPDEV/Qwen1.5-Wukong-0.5B\n",
      "RESMPDEV/Qwen1.5-Wukong-1.8B\n",
      "RWKV/rwkv-4-14b-pile\n",
      "RWKV/rwkv-4-169m-pile\n",
      "RWKV/rwkv-4-3b-pile\n",
      "RWKV/rwkv-4-430m-pile\n",
      "RWKV/rwkv-4-7b-pile\n",
      "RWKV/rwkv-raven-14b\n",
      "RWKV/rwkv-raven-3b\n",
      "RWKV/rwkv-raven-7b\n",
      "Rallio67/3B-redpajama-conditional-alpha\n",
      "Rallio67/7B-redpajama-conditional-alpha\n",
      "SF-Foundation/TextBase-7B-v0.1\n",
      "SF-Foundation/TextBase-v0.1\n",
      "SUSTech/SUS-Chat-34B\n",
      "SUSTech/SUS-Chat-72B\n",
      "Salesforce/codegen-16B-nl\n",
      "Salesforce/codegen-6B-nl\n",
      "Sharathhebbar24/SSH_300M\n",
      "Sharathhebbar24/SSH_355M\n",
      "TIGER-Lab/MAmmoTH2-7B-Plus\n",
      "TIGER-Lab/MAmmoTH2-8B-Plus\n",
      "TaylorAI/Flash-Llama-13B\n",
      "TaylorAI/Flash-Llama-3B\n",
      "TaylorAI/Flash-Llama-7B\n",
      "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0\n",
      "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0\n",
      "TheBloke/Airoboros-L2-13B-2.1-GPTQ\n",
      "TheBloke/Airoboros-L2-70B-2.1-GPTQ\n",
      "TheBloke/CodeLlama-13B-Instruct-fp16\n",
      "TheBloke/CodeLlama-13B-Python-fp16\n",
      "TheBloke/CodeLlama-34B-Instruct-fp16\n",
      "TheBloke/CodeLlama-34B-Python-fp16\n",
      "TheBloke/Llama-2-13B-GPTQ\n",
      "TheBloke/Llama-2-13B-fp16\n",
      "TheBloke/Llama-2-70B-fp16\n",
      "TheBloke/Llama-2-7B-GPTQ\n",
      "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ\n",
      "TheBloke/Wizard-Vicuna-13B-Uncensored-HF\n",
      "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ\n",
      "TheBloke/Wizard-Vicuna-7B-Uncensored-HF\n",
      "TheBloke/gpt4-alpaca-lora-13B-HF\n",
      "TheBloke/gpt4-alpaca-lora-30b-HF\n",
      "TheBloke/guanaco-13B-HF\n",
      "TheBloke/guanaco-65B-HF\n",
      "TheBloke/guanaco-7B-HF\n",
      "TheBloke/koala-13B-HF\n",
      "TheBloke/koala-7B-HF\n",
      "TheBloke/orca_mini_v3_13B-GPTQ\n",
      "TheBloke/orca_mini_v3_7B-GPTQ\n",
      "TheBloke/tulu-13B-fp16\n",
      "TheBloke/tulu-30B-fp16\n",
      "TheBloke/tulu-7B-fp16\n",
      "TigerResearch/tigerbot-13b-base\n",
      "TigerResearch/tigerbot-70b-base\n",
      "TigerResearch/tigerbot-7b-base\n",
      "TomGrc/FusionNet_34Bx2_MoE_v0.1\n",
      "TomGrc/FusionNet_7Bx2_MoE_v0.1\n",
      "VAGOsolutions/SauerkrautLM-Gemma-2b\n",
      "VAGOsolutions/SauerkrautLM-Gemma-7b\n",
      "ValiantLabs/Fireplace-13b\n",
      "ValiantLabs/Fireplace-34b\n",
      "Weyaxi/Bagel-Hermes-2x34B\n",
      "Weyaxi/Bagel-Hermes-2x34b\n",
      "WhiteRabbitNeo/WhiteRabbitNeo-13B-v1\n",
      "WhiteRabbitNeo/WhiteRabbitNeo-33B-v1\n",
      "WizardLM/WizardCoder-Python-34B-V1.0\n",
      "WizardLM/WizardCoder-Python-7B-V1.0\n",
      "WizardLM/WizardLM-30B-V1.0\n",
      "WizardLM/WizardLM-70B-V1.0\n",
      "WizardLM/WizardMath-13B-V1.0\n",
      "WizardLM/WizardMath-70B-V1.0\n",
      "WizardLM/WizardMath-7B-V1.0\n",
      "Xwin-LM/Xwin-LM-13B-V0.1\n",
      "Xwin-LM/Xwin-LM-70B-V0.1\n",
      "Xwin-LM/Xwin-LM-7B-V0.1\n",
      "Yash21/TinyYi-7B-Test\n",
      "Yash21/TinyYi-7b-Test\n",
      "YeungNLP/firefly-llama-13b\n",
      "YeungNLP/firefly-llama-30b\n",
      "YeungNLP/firefly-llama2-13b-pretrain\n",
      "YeungNLP/firefly-llama2-7b-pretrain\n",
      "YeungNLP/firefly-qwen1.5-en-14b-dpo-v0.1\n",
      "YeungNLP/firefly-qwen1.5-en-7b-dpo-v0.1\n",
      "Yukang/Llama-2-13b-longlora-16k-ft\n",
      "Yukang/Llama-2-13b-longlora-32k-ft\n",
      "Yukang/Llama-2-7b-longlora-16k-ft\n",
      "Yukang/Llama-2-7b-longlora-32k-ft\n",
      "Yukang/LongAlpaca-13B\n",
      "Yukang/LongAlpaca-7B\n",
      "abacusai/Fewshot-Metamath-OrcaVicuna-Mistral\n",
      "abacusai/Fewshot-Metamath-OrcaVicuna-Mistral-10B\n",
      "abacusai/Liberated-Qwen1.5-14B\n",
      "abacusai/Liberated-Qwen1.5-72B\n",
      "abacusai/Liberated-Qwen1.5-7B\n",
      "abacusai/Smaug-34B-v0.1\n",
      "abacusai/Smaug-70B-v0.1\n",
      "abacusai/Smaug-72B-v0.1\n",
      "abhinand/tamil-llama-13b-base-v0.1\n",
      "abhinand/tamil-llama-13b-instruct-v0.1\n",
      "abhinand/tamil-llama-7b-base-v0.1\n",
      "abhinand/tamil-llama-7b-instruct-v0.1\n",
      "abhishek/autotrain-llama3-70b-orpo-v2\n",
      "abhishek/autotrain-llama3-orpo-v2\n",
      "abideen/gemma-2b-openhermes\n",
      "abideen/gemma-7b-openhermes\n",
      "adamo1139/Yi-34B-200K-AEZAKMI-v2\n",
      "adamo1139/Yi-6B-200K-AEZAKMI-v2\n",
      "aihub-app/zyte-1.1B\n",
      "aihub-app/zyte-1.1b\n",
      "aihub-app/zyte-1B\n",
      "aisquared/dlite-v1-124m\n",
      "aisquared/dlite-v1-355m\n",
      "aisquared/dlite-v1-774m\n",
      "aisquared/dlite-v2-124m\n",
      "aisquared/dlite-v2-355m\n",
      "aisquared/dlite-v2-774m\n",
      "ajibawa-2023/Python-Code-13B\n",
      "ajibawa-2023/Python-Code-33B\n",
      "ajibawa-2023/Uncensored-Frank-13B\n",
      "ajibawa-2023/Uncensored-Frank-33B\n",
      "ajibawa-2023/Uncensored-Frank-7B\n",
      "ajibawa-2023/Uncensored-Jordan-13B\n",
      "ajibawa-2023/Uncensored-Jordan-7B\n",
      "ajibawa-2023/carl-33b\n",
      "ajibawa-2023/carl-7b\n",
      "ajibawa-2023/scarlett-33b\n",
      "ajibawa-2023/scarlett-7b\n",
      "allenai/OLMo-1B-hf\n",
      "allenai/OLMo-7B-hf\n",
      "allenai/digital-socrates-13b\n",
      "allenai/digital-socrates-7b\n",
      "alnrg2arg/test3_sft_16bit\n",
      "alnrg2arg/test3_sft_4bit\n",
      "aloobun/d-Qwen1.5-0.5B\n",
      "aloobun/d-Qwen1.5-1.8B\n",
      "ausboss/llama-13b-supercot\n",
      "ausboss/llama-30b-supercot\n",
      "beaugogh/Llama2-13b-sharegpt4\n",
      "beaugogh/Llama2-7b-sharegpt4\n",
      "bigcode/starcoder2-15b\n",
      "bigcode/starcoder2-3b\n",
      "bigcode/starcoder2-7b\n",
      "bigcode/starcoderbase\n",
      "bigcode/starcoderbase-1b\n",
      "bigcode/starcoderbase-3b\n",
      "bigcode/starcoderbase-7b\n",
      "bigscience/bloom\n",
      "bigscience/bloom-1b1\n",
      "bigscience/bloom-3b\n",
      "bigscience/bloom-560m\n",
      "bigscience/bloom-7b1\n",
      "bigscience/bloomz-3b\n",
      "bigscience/bloomz-560m\n",
      "bofenghuang/vigogne-13b-chat\n",
      "bofenghuang/vigogne-13b-instruct\n",
      "bofenghuang/vigogne-2-13b-instruct\n",
      "bofenghuang/vigogne-2-7b-instruct\n",
      "bofenghuang/vigogne-33b-instruct\n",
      "bofenghuang/vigogne-7b-chat\n",
      "bofenghuang/vigogne-7b-instruct\n",
      "camel-ai/CAMEL-13B-Combined-Data\n",
      "camel-ai/CAMEL-33B-Combined-Data\n",
      "cerebras/Cerebras-GPT-1.3B\n",
      "cerebras/Cerebras-GPT-111M\n",
      "cerebras/Cerebras-GPT-13B\n",
      "cerebras/Cerebras-GPT-2.7B\n",
      "cerebras/Cerebras-GPT-256M\n",
      "cerebras/Cerebras-GPT-590M\n",
      "cerebras/Cerebras-GPT-6.7B\n",
      "chargoddard/internlm2-20b-llama\n",
      "chargoddard/internlm2-7b-llama\n",
      "chargoddard/internlm2-base-20b-llama\n",
      "chargoddard/internlm2-base-7b-llama\n",
      "chujiezheng/tulu-2-dpo-70b-ExPO\n",
      "chujiezheng/tulu-2-dpo-7b-ExPO\n",
      "circulus/Llama-2-13b-orca-v1\n",
      "circulus/Llama-2-7b-orca-v1\n",
      "clibrain/Llama-2-13b-ft-instruct-es\n",
      "clibrain/Llama-2-7b-ft-instruct-es\n",
      "cloudyu/Mixtral_11Bx2_MoE_19B\n",
      "cloudyu/Mixtral_34Bx2_MoE_60B\n",
      "cloudyu/Mixtral_7Bx2_MoE_13B\n",
      "cloudyu/mistral_11B_instruct_v0.1\n",
      "cloudyu/mistral_15B_instruct_v0.1\n",
      "cloudyu/mistral_18B_instruct_v0.1\n",
      "cloudyu/mistral_28B_instruct_v0.1\n",
      "cloudyu/mistral_28B_instruct_v0.2\n",
      "cloudyu/mistral_9B_instruct_v0.2\n",
      "cmarkea/bloomz-3b-sft-chat\n",
      "cmarkea/bloomz-560m-sft-chat\n",
      "codellama/CodeLlama-13b-Instruct-hf\n",
      "codellama/CodeLlama-13b-Python-hf\n",
      "codellama/CodeLlama-13b-hf\n",
      "codellama/CodeLlama-34b-Instruct-hf\n",
      "codellama/CodeLlama-34b-Python-hf\n",
      "codellama/CodeLlama-34b-hf\n",
      "codellama/CodeLlama-70b-Instruct-hf\n",
      "codellama/CodeLlama-70b-Python-hf\n",
      "codellama/CodeLlama-70b-hf\n",
      "codellama/CodeLlama-7b-Instruct-hf\n",
      "codellama/CodeLlama-7b-Python-hf\n",
      "codellama/CodeLlama-7b-hf\n",
      "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b\n",
      "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b\n",
      "core-3/kuno-royale-7B\n",
      "core-3/kuno-royale-7b\n",
      "databricks/dolly-v2-12b\n",
      "databricks/dolly-v2-7b\n",
      "deepseek-ai/deepseek-coder-1.3b-instruct\n",
      "deepseek-ai/deepseek-coder-6.7b-instruct\n",
      "deepseek-ai/deepseek-llm-67b-chat\n",
      "deepseek-ai/deepseek-llm-7b-chat\n",
      "dfurman/Llama-3-70B-Orpo-v0.1\n",
      "dfurman/Llama-3-8B-Orpo-v0.1\n",
      "digitous/Alpacino13b\n",
      "digitous/Alpacino30b\n",
      "eachadea/vicuna-13b-1.1\n",
      "eachadea/vicuna-7b-1.1\n",
      "ehartford/Samantha-1.11-13b\n",
      "ehartford/Samantha-1.11-70b\n",
      "ehartford/Samantha-1.11-7b\n",
      "ehartford/Wizard-Vicuna-13B-Uncensored\n",
      "ehartford/Wizard-Vicuna-30B-Uncensored\n",
      "ehartford/Wizard-Vicuna-7B-Uncensored\n",
      "ehartford/WizardLM-13B-Uncensored\n",
      "ehartford/WizardLM-30B-Uncensored\n",
      "ehartford/WizardLM-7B-Uncensored\n",
      "elinas/chronos-13b-v2\n",
      "elinas/chronos-70b-v2\n",
      "elyza/ELYZA-japanese-Llama-2-13b\n",
      "elyza/ELYZA-japanese-Llama-2-13b-fast\n",
      "elyza/ELYZA-japanese-Llama-2-13b-fast-instruct\n",
      "elyza/ELYZA-japanese-Llama-2-13b-instruct\n",
      "elyza/ELYZA-japanese-Llama-2-7b\n",
      "elyza/ELYZA-japanese-Llama-2-7b-fast\n",
      "elyza/ELYZA-japanese-Llama-2-7b-fast-instruct\n",
      "elyza/ELYZA-japanese-Llama-2-7b-instruct\n",
      "facebook/opt-1.3b\n",
      "facebook/opt-125m\n",
      "facebook/opt-13b\n",
      "facebook/opt-2.7b\n",
      "facebook/opt-30b\n",
      "facebook/opt-350m\n",
      "facebook/opt-6.7b\n",
      "facebook/opt-66b\n",
      "facebook/opt-iml-max-1.3b\n",
      "facebook/opt-iml-max-30b\n",
      "facebook/xglm-1.7B\n",
      "facebook/xglm-4.5B\n",
      "facebook/xglm-564M\n",
      "facebook/xglm-7.5B\n",
      "freecs/ThetaWave-14B-v0.1\n",
      "freecs/ThetaWave-28B-v0.1\n",
      "freecs/ThetaWave-7B-v0.1\n",
      "garage-bAInd/Camel-Platypus2-13B\n",
      "garage-bAInd/Camel-Platypus2-70B\n",
      "garage-bAInd/Platypus2-13B\n",
      "garage-bAInd/Platypus2-70B\n",
      "garage-bAInd/Platypus2-7B\n",
      "golaxy/gogpt-3b-bloom\n",
      "golaxy/gogpt-560m\n",
      "golaxy/gogpt-7b\n",
      "golaxy/gogpt-7b-bloom\n",
      "golaxy/gogpt2-13b\n",
      "golaxy/gogpt2-7b\n",
      "google/codegemma-2b\n",
      "google/codegemma-7b\n",
      "google/gemma-2b\n",
      "google/gemma-2b-it\n",
      "google/gemma-7b\n",
      "google/gemma-7b-it\n",
      "h2oai/h2ogpt-gm-oasst1-en-1024-12b\n",
      "h2oai/h2ogpt-gm-oasst1-en-1024-20b\n",
      "h2oai/h2ogpt-oasst1-512-12b\n",
      "h2oai/h2ogpt-oasst1-512-20b\n",
      "hamxea/Llama-2-13b-chat-hf-activity-fine-tuned-v4\n",
      "hamxea/Llama-2-7b-chat-hf-activity-fine-tuned-v4\n",
      "haoranxu/ALMA-13B\n",
      "haoranxu/ALMA-7B\n",
      "health360/Healix-3B\n",
      "health360/Healix-410M\n",
      "heegyu/LIMA2-13b-hf\n",
      "heegyu/LIMA2-7b-hf\n",
      "hfl/chinese-alpaca-2-1.3b\n",
      "hfl/chinese-alpaca-2-1.3b-rlhf\n",
      "hfl/chinese-alpaca-2-13b\n",
      "hfl/chinese-alpaca-2-13b-16k\n",
      "hfl/chinese-alpaca-2-7b\n",
      "hfl/chinese-alpaca-2-7b-16k\n",
      "hfl/chinese-alpaca-2-7b-rlhf\n",
      "hfl/chinese-llama-2-1.3b\n",
      "hfl/chinese-llama-2-13b\n",
      "hfl/chinese-llama-2-13b-16k\n",
      "hfl/chinese-llama-2-7b\n",
      "hfl/chinese-llama-2-7b-16k\n",
      "hoskinson-center/proofGPT-v0.1\n",
      "hoskinson-center/proofGPT-v0.1-6.7B\n",
      "huggingface/llama-13b\n",
      "huggingface/llama-30b\n",
      "huggingface/llama-65b\n",
      "huggingface/llama-7b\n",
      "huggyllama/llama-13b\n",
      "huggyllama/llama-30b\n",
      "huggyllama/llama-65b\n",
      "huggyllama/llama-7b\n",
      "hyunseoki/ko-ref-llama2-13b\n",
      "hyunseoki/ko-ref-llama2-7b\n",
      "hywu/Camelidae-8x13B\n",
      "hywu/Camelidae-8x7B\n",
      "internlm/internlm2-20b\n",
      "internlm/internlm2-7b\n",
      "itsliupeng/llama2_70b_mmlu\n",
      "itsliupeng/llama2_7b_mmlu\n",
      "jan-ai/Pandora-10.7B-v1\n",
      "jan-ai/Pandora-13B-v1\n",
      "jeonsworld/CarbonVillain-en-10.7B-v1\n",
      "jeonsworld/CarbonVillain-en-13B-v1\n",
      "jondurbin/airoboros-13b\n",
      "jondurbin/airoboros-13b-gpt4\n",
      "jondurbin/airoboros-13b-gpt4-1.1\n",
      "jondurbin/airoboros-13b-gpt4-1.2\n",
      "jondurbin/airoboros-13b-gpt4-1.3\n",
      "jondurbin/airoboros-13b-gpt4-1.4\n",
      "jondurbin/airoboros-33b-gpt4\n",
      "jondurbin/airoboros-33b-gpt4-1.2\n",
      "jondurbin/airoboros-33b-gpt4-1.3\n",
      "jondurbin/airoboros-33b-gpt4-1.4\n",
      "jondurbin/airoboros-33b-gpt4-2.0\n",
      "jondurbin/airoboros-33b-gpt4-m2.0\n",
      "jondurbin/airoboros-34b-3.3\n",
      "jondurbin/airoboros-65b-gpt4-1.2\n",
      "jondurbin/airoboros-65b-gpt4-1.3\n",
      "jondurbin/airoboros-65b-gpt4-1.4\n",
      "jondurbin/airoboros-65b-gpt4-2.0\n",
      "jondurbin/airoboros-65b-gpt4-m2.0\n",
      "jondurbin/airoboros-70b-3.3\n",
      "jondurbin/airoboros-7b\n",
      "jondurbin/airoboros-7b-gpt4\n",
      "jondurbin/airoboros-7b-gpt4-1.1\n",
      "jondurbin/airoboros-7b-gpt4-1.2\n",
      "jondurbin/airoboros-7b-gpt4-1.3\n",
      "jondurbin/airoboros-7b-gpt4-1.4\n",
      "jondurbin/airoboros-l2-13b-2.1\n",
      "jondurbin/airoboros-l2-13b-2.2.1\n",
      "jondurbin/airoboros-l2-13b-gpt4-1.4.1\n",
      "jondurbin/airoboros-l2-13b-gpt4-2.0\n",
      "jondurbin/airoboros-l2-13b-gpt4-m2.0\n",
      "jondurbin/airoboros-l2-70b-2.2.1\n",
      "jondurbin/airoboros-l2-70b-gpt4-1.4.1\n",
      "jondurbin/airoboros-l2-70b-gpt4-2.0\n",
      "jondurbin/airoboros-l2-70b-gpt4-m2.0\n",
      "jondurbin/airoboros-l2-7b-2.1\n",
      "jondurbin/airoboros-l2-7b-2.2.1\n",
      "jondurbin/airoboros-l2-7b-gpt4-1.4.1\n",
      "jondurbin/airoboros-l2-7b-gpt4-2.0\n",
      "jondurbin/airoboros-l2-7b-gpt4-m2.0\n",
      "jondurbin/bagel-34b-v0.4\n",
      "jondurbin/bagel-34b-v0.5\n",
      "jondurbin/bagel-7b-v0.4\n",
      "jondurbin/bagel-7b-v0.5\n",
      "jondurbin/bagel-dpo-34b-v0.5\n",
      "jondurbin/bagel-dpo-7b-v0.5\n",
      "jondurbin/spicyboros-70b-2.2\n",
      "jondurbin/spicyboros-7b-2.2\n",
      "kalisai/Nusantara-0.8b-Indo-Chat\n",
      "kalisai/Nusantara-1.8b-Indo-Chat\n",
      "kalisai/Nusantara-2.7b-Indo-Chat\n",
      "kalisai/Nusantara-4b-Indo-Chat\n",
      "kalisai/Nusantara-7b-Indo-Chat\n",
      "kreimben/CodeMind-gemma\n",
      "kreimben/CodeMind-gemma-2b\n",
      "lcw99/llama-3-10b-it-kor-extented-chang\n",
      "lcw99/llama-3-8b-it-kor-extented-chang\n",
      "lingyun1/GZDX\n",
      "lingyun1/GZDX-1.1B\n",
      "llm-agents/tora-13b-v1.0\n",
      "llm-agents/tora-70b-v1.0\n",
      "llm-agents/tora-7b-v1.0\n",
      "llm-agents/tora-code-13b-v1.0\n",
      "llm-agents/tora-code-34b-v1.0\n",
      "llm-agents/tora-code-7b-v1.0\n",
      "lmsys/vicuna-13b-delta-v1.1\n",
      "lmsys/vicuna-13b-v1.3\n",
      "lmsys/vicuna-13b-v1.5\n",
      "lmsys/vicuna-13b-v1.5-16k\n",
      "lmsys/vicuna-33b-v1.3\n",
      "lmsys/vicuna-7b-delta-v1.1\n",
      "lmsys/vicuna-7b-v1.3\n",
      "lmsys/vicuna-7b-v1.5\n",
      "lmsys/vicuna-7b-v1.5-16k\n",
      "luffycodes/vicuna-class-shishya-13b-ep3\n",
      "luffycodes/vicuna-class-shishya-7b-ep3\n",
      "luffycodes/vicuna-class-shishya-ac-hal-13b-ep3\n",
      "luffycodes/vicuna-class-shishya-ac-hal-7b-ep3\n",
      "luffycodes/vicuna-class-shishya-all-hal-13b-ep3\n",
      "luffycodes/vicuna-class-shishya-all-hal-7b-ep3\n",
      "luffycodes/vicuna-class-tutor-13b-ep3\n",
      "luffycodes/vicuna-class-tutor-7b-ep3\n",
      "meta-llama/Llama-2-13b-chat-hf\n",
      "meta-llama/Llama-2-13b-hf\n",
      "meta-llama/Llama-2-70b-chat-hf\n",
      "meta-llama/Llama-2-70b-hf\n",
      "meta-llama/Llama-2-7b-chat-hf\n",
      "meta-llama/Llama-2-7b-hf\n",
      "meta-llama/Meta-Llama-3-70B\n",
      "meta-llama/Meta-Llama-3-70B-Instruct\n",
      "meta-llama/Meta-Llama-3-8B\n",
      "meta-llama/Meta-Llama-3-8B-Instruct\n",
      "meta-math/MetaMath-13B-V1.0\n",
      "meta-math/MetaMath-70B-V1.0\n",
      "microsoft/Orca-2-13b\n",
      "microsoft/Orca-2-7b\n",
      "migtissera/SynthIA-70B-v1.5\n",
      "migtissera/SynthIA-7B-v1.5\n",
      "migtissera/Synthia-13B\n",
      "migtissera/Synthia-13B-v1.2\n",
      "migtissera/Synthia-70B\n",
      "migtissera/Synthia-70B-v1.2\n",
      "migtissera/Synthia-7B\n",
      "migtissera/Synthia-7B-v1.2\n",
      "migtissera/Synthia-7B-v3.0\n",
      "migtissera/Synthia-v3.0-11B\n",
      "migtissera/Tess-10.7B-v1.5b\n",
      "migtissera/Tess-2.0-Llama-3-70B\n",
      "migtissera/Tess-2.0-Llama-3-8B\n",
      "migtissera/Tess-34B-v1.4\n",
      "migtissera/Tess-34B-v1.5b\n",
      "migtissera/Tess-72B-v1.5b\n",
      "migtissera/Tess-7B-v1.4\n",
      "mistralai/Mixtral-8x22B-Instruct-v0.1\n",
      "mistralai/Mixtral-8x22B-v0.1\n",
      "mistralai/Mixtral-8x7B-Instruct-v0.1\n",
      "mistralai/Mixtral-8x7B-v0.1\n",
      "mlabonne/Daredevil-7B\n",
      "mlabonne/Daredevil-8B\n",
      "mncai/agiin-11.1B-v0.0\n",
      "mncai/agiin-13.6B-v0.0\n",
      "moreh/MoMo-70B-LoRA-V1.4\n",
      "moreh/MoMo-70B-lora-1.8.4-DPO\n",
      "moreh/MoMo-70B-lora-1.8.5-DPO\n",
      "moreh/MoMo-70B-lora-1.8.6-DPO\n",
      "moreh/MoMo-72B-LoRA-V1.4\n",
      "moreh/MoMo-72B-lora-1.8.4-DPO\n",
      "moreh/MoMo-72B-lora-1.8.5-DPO\n",
      "moreh/MoMo-72B-lora-1.8.6-DPO\n",
      "mosaicml/mpt-30b\n",
      "mosaicml/mpt-30b-chat\n",
      "mosaicml/mpt-30b-instruct\n",
      "mosaicml/mpt-7b\n",
      "mosaicml/mpt-7b-chat\n",
      "mosaicml/mpt-7b-instruct\n",
      "netcat420/MFANN3bv0.11\n",
      "netcat420/MFANN3bv0.2\n",
      "netcat420/MFANN3bv0.3\n",
      "netcat420/MFANN3bv0.4\n",
      "netcat420/MFANN3bv0.8\n",
      "netcat420/MFANNv0.11\n",
      "netcat420/MFANNv0.2\n",
      "netcat420/MFANNv0.3\n",
      "netcat420/MFANNv0.4\n",
      "netcat420/MFANNv0.8\n",
      "nicholasKluge/Aira-2-355M\n",
      "nicholasKluge/Aira-2-774M\n",
      "nisten/shqiponja-15b-v1\n",
      "nisten/shqiponja-59b-v1\n",
      "nnheui/pythia-1.4b-sft-full\n",
      "nnheui/pythia-410m-sft-full\n",
      "nvidia/Llama3-ChatQA-1.5-70B\n",
      "nvidia/Llama3-ChatQA-1.5-8B\n",
      "oh-yeontaek/llama-2-13B-LoRA-assemble\n",
      "oh-yeontaek/llama-2-7B-LoRA-assemble\n",
      "openaccess-ai-collective/DPOpenHermes-11B\n",
      "openaccess-ai-collective/DPOpenHermes-7B\n",
      "openbmb/UltraLM-13b\n",
      "openbmb/UltraLM-65b\n",
      "openlm-research/open_llama_13b\n",
      "openlm-research/open_llama_3b\n",
      "openlm-research/open_llama_3b_v2\n",
      "openlm-research/open_llama_7b\n",
      "openlm-research/open_llama_7b_v2\n",
      "openthaigpt/openthaigpt-1.0.0-13b-chat\n",
      "openthaigpt/openthaigpt-1.0.0-70b-chat\n",
      "openthaigpt/openthaigpt-1.0.0-7b-chat\n",
      "pankajmathur/orca_mini_v3_13b\n",
      "pankajmathur/orca_mini_v3_7b\n",
      "perlthoughts/Chupacabra-16B-v2.01\n",
      "perlthoughts/Chupacabra-7B-v2.01\n",
      "perlthoughts/Falkor-16b\n",
      "perlthoughts/Falkor-7b\n",
      "pmking27/PrathameshLLM-2B\n",
      "pmking27/PrathameshLLM-7B\n",
      "princeton-nlp/Sheared-LLaMA-1.3B\n",
      "princeton-nlp/Sheared-LLaMA-1.3B-ShareGPT\n",
      "princeton-nlp/Sheared-LLaMA-2.7B\n",
      "princeton-nlp/Sheared-LLaMA-2.7B-ShareGPT\n",
      "project-baize/baize-v2-13b\n",
      "project-baize/baize-v2-7b\n",
      "psmathur/orca_mini_13b\n",
      "psmathur/orca_mini_3b\n",
      "psmathur/orca_mini_7b\n",
      "psmathur/orca_mini_v2_13b\n",
      "psmathur/orca_mini_v2_7b\n",
      "psmathur/orca_mini_v3_13b\n",
      "psmathur/orca_mini_v3_70b\n",
      "psmathur/orca_mini_v3_7b\n",
      "quantumaikr/QuantumLM\n",
      "quantumaikr/QuantumLM-7B\n",
      "raincandy-u/Qwen1.5-1.8B_llamafy\n",
      "raincandy-u/Qwen1.5-4B_llamafy\n",
      "rishiraj/smol-3b\n",
      "rishiraj/smol-7b\n",
      "rombodawg/LosslessMegaCoder-llama2-13b-mini\n",
      "rombodawg/LosslessMegaCoder-llama2-7b-mini\n",
      "roneneldan/TinyStories-1M\n",
      "roneneldan/TinyStories-28M\n",
      "roneneldan/TinyStories-33M\n",
      "roneneldan/TinyStories-3M\n",
      "roneneldan/TinyStories-8M\n",
      "sail/Sailor-0.5B\n",
      "sail/Sailor-0.5B-Chat\n",
      "sail/Sailor-1.8B\n",
      "sail/Sailor-1.8B-Chat\n",
      "sail/Sailor-4B\n",
      "sail/Sailor-4B-Chat\n",
      "sail/Sailor-7B\n",
      "sail/Sailor-7B-Chat\n",
      "shenzhi-wang/Llama3-70B-Chinese-Chat\n",
      "shenzhi-wang/Llama3-8B-Chinese-Chat\n",
      "shibing624/chinese-alpaca-plus-13b-hf\n",
      "shibing624/chinese-alpaca-plus-7b-hf\n",
      "stabilityai/StableBeluga-13B\n",
      "stabilityai/StableBeluga-7B\n",
      "stabilityai/stablelm-base-alpha-3b\n",
      "stabilityai/stablelm-base-alpha-7b\n",
      "stabilityai/stablelm-tuned-alpha-3b\n",
      "stabilityai/stablelm-tuned-alpha-7b\n",
      "starmpcc/Asclepius-Llama2-13B\n",
      "starmpcc/Asclepius-Llama2-7B\n",
      "teknium/OpenHermes-13B\n",
      "teknium/OpenHermes-7B\n",
      "tiiuae/falcon-11B\n",
      "tiiuae/falcon-180B\n",
      "tiiuae/falcon-40b\n",
      "tiiuae/falcon-7b\n",
      "tuantran1632001/Psyfighter2-Orca2-13B-ties\n",
      "tuantran1632001/Psyfighter2-Orca2-ties\n",
      "upstage/llama-30b-instruct\n",
      "upstage/llama-65b-instruct\n",
      "uukuguy/Orca-2-13b-f16\n",
      "uukuguy/Orca-2-7b-f16\n",
      "uukuguy/speechless-codellama-dolphin-orca-platypus-13b\n",
      "uukuguy/speechless-codellama-dolphin-orca-platypus-34b\n",
      "uukuguy/speechless-coder-ds-1.3b\n",
      "uukuguy/speechless-coder-ds-6.7b\n",
      "vicgalle/OpenHermes-Gemma-2B\n",
      "vicgalle/OpenHermes-Gemma-7B\n",
      "vicgalle/solarized-13B-dpo\n",
      "vicgalle/solarized-18B-dpo\n",
      "vicgalleorg/TruthfulQwen1.5-1.8B\n",
      "vicgalleorg/TruthfulQwen1.5-4B\n",
      "vihangd/dopeyshearedplats-1.3b-v1\n",
      "vihangd/dopeyshearedplats-2.7b-v1\n",
      "vihangd/smartyplats-3b-v2\n",
      "vihangd/smartyplats-7b-v2\n",
      "vmajor/Orca2-13B-selfmerge-26B\n",
      "vmajor/Orca2-13B-selfmerge-39B\n",
      "wahaha1987/llama_13b_sharegpt94k_fastchat\n",
      "wahaha1987/llama_7b_sharegpt94k_fastchat\n",
      "wandb/gemma-2b-zephyr-dpo\n",
      "wandb/gemma-2b-zephyr-sft\n",
      "wandb/gemma-7b-zephyr-dpo\n",
      "wandb/gemma-7b-zephyr-sft\n",
      "wenbopan/Faro-Yi-34B\n",
      "wenbopan/Faro-Yi-34B-200K\n",
      "wenbopan/Faro-Yi-9B\n",
      "wenbopan/Faro-Yi-9B-200K\n",
      "wenge-research/yayi-13b-llama2\n",
      "wenge-research/yayi-7b-llama2\n",
      "xaviviro/FLOR-1.3B-xat\n",
      "xaviviro/FLOR-6.3B-xat\n",
      "yanolja/EEVE-Korean-Instruct-10.8B-v1.0\n",
      "yanolja/EEVE-Korean-Instruct-2.8B-v1.0\n",
      "ziqingyang/chinese-alpaca-2-13b\n",
      "ziqingyang/chinese-alpaca-2-7b\n",
      "ziqingyang/chinese-llama-2-13b\n",
      "ziqingyang/chinese-llama-2-7b\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None,\n",
       " None]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_list = [item for sublist in old_families for item in sublist]\n",
    "[print(m) for m in np.sort(merged_list).tolist()]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57f0d5f9-a105-445d-9610-276098ce6ce4",
   "metadata": {},
   "source": [
    "## Compiling the data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85c91659-8b46-49a5-be6e-e4e139d7018b",
   "metadata": {},
   "source": [
    "### Loading dataset containing scores for subtasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c8921076-875a-4d00-8ab0-189aff6245ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "subscenario_scores = pd.read_csv('subscenario_scores.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ceff7449-8ece-4cbe-9e1a-b5cc6cdbe013",
   "metadata": {},
   "source": [
    "### Processing new LB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "89ce5b75-4561-47c7-85a6-caacddedea94",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('new_lb_annotated.txt', 'r') as file:\n",
    "    new_lb_annotated = file.readlines()\n",
    "new_lb_annotated = [line.strip() for line in new_lb_annotated]\n",
    "new_lb_annotated = [m[:-2] for m in new_lb_annotated if m[-2:] in [' Y',' ?']]\n",
    "new_lb_annotated = [re.sub(r\"\\s*\\[.*?\\]\", \"\", m) for m in new_lb_annotated]\n",
    "\n",
    "new_lb = pd.read_csv(\"open-llm-leaderboard_new.csv\")\n",
    "new_lb = new_lb.iloc[[np.argmax(np.array(new_lb.Model)==m) for m in new_lb_annotated]].loc[:,['Model','#Params (B)','Upload To Hub Date','IFEval Raw','BBH Raw','MATH Lvl 5 Raw','GPQA Raw','MUSR Raw','MMLU-PRO Raw']]\n",
    "new_lb.columns = new_lb.columns.str.replace(' Raw', '', regex=False)\n",
    "new_lb = new_lb.reset_index(drop=True)\n",
    "\n",
    "new_families_names = [remove_params(m) for m in new_lb_annotated]\n",
    "instruct = []\n",
    "for i,f in enumerate(new_families_names):\n",
    "    f_new = f.replace('-Chat','').replace('-chat','')\n",
    "    f_new = f_new.replace('-Instruct','').replace('-instruct','')\n",
    "    f_new = f_new.replace('pankajmathur/orca_mini_v3_','meta-llama/Llama-2-hf')\n",
    "    f_new = f_new.replace('pankajmathur/orca_mini_v7_','Qwen/Qwen2')\n",
    "    f_new = f_new.replace('microsoft/Orca-2','meta-llama/Llama-2-hf')\n",
    "    f_new = f_new.replace('teknium/OpenHermes','meta-llama/Llama-2-hf')\n",
    "    f_new = f_new.replace('lmsys/vicuna','huggyllama/llama')\n",
    "    f_new = f_new.replace('databricks/dolly-v2','EleutherAI/pythia')\n",
    "    f_new = f_new.replace('WizardLMTeam/WizardLM-V1.0','meta-llama/Llama-2-hf')\n",
    "    f_new = f_new.replace('Azure99/blossom-v5.1','01-ai/Yi-1.5')\n",
    "    f_new = f_new.replace('VAGOsolutions/SauerkrautLM-Gemma','google/gemma')\n",
    "    f_new = f_new.replace('VAGOsolutions/Llama-3-SauerkrautLM','meta-llama/Meta-Llama-3')\n",
    "    f_new = f_new.replace('cognitivecomputations/dolphin-2.9.1-yi-1.5','01-ai/Yi-1.5')\n",
    "    f_new = f_new.replace('cognitivecomputations/dolphin-2.9.2-qwen2','Qwen/Qwen2')\n",
    "    f_new = f_new.replace('gemma-it','gemma')\n",
    "    f_new = f_new.replace('gemma-1.1-it','gemma-1.1')\n",
    "    f_new = f_new.replace('gemma-2-it','gemma-2')\n",
    "    new_families_names[i] = f_new\n",
    "    if f!=f_new:\n",
    "        instruct.append(True)\n",
    "    else:\n",
    "        instruct.append(False)\n",
    "        \n",
    "new_lb['Family'] = new_families_names\n",
    "new_lb['Instruct'] = instruct\n",
    "new_lb =  consolidate_columns(new_lb.merge(subscenario_scores, on='Model', how='left'))\n",
    "new_lb = new_lb.loc[:,['Model', 'Family', 'Instruct', '#Params (B)', 'date', 'IFEval', 'BBH', 'MATH Lvl 5', 'GPQA', 'MUSR', 'MMLU-PRO',\n",
    "                       'bbh_boolean_expressions','bbh_causal_judgement','bbh_date_understanding','bbh_disambiguation_qa',\n",
    "                       'bbh_formal_fallacies','bbh_geometric_shapes','bbh_hyperbaton','bbh_logical_deduction_five_objects',\n",
    "                       'bbh_logical_deduction_seven_objects','bbh_logical_deduction_three_objects','bbh_movie_recommendation',\n",
    "                       'bbh_navigate','bbh_object_counting','bbh_penguins_in_a_table','bbh_reasoning_about_colored_objects',\n",
    "                       'bbh_ruin_names','bbh_salient_translation_error_detection','bbh_snarks','bbh_sports_understanding',\n",
    "                       'bbh_temporal_sequences','bbh_tracking_shuffled_objects_five_objects','bbh_tracking_shuffled_objects_seven_objects',\n",
    "                       'bbh_tracking_shuffled_objects_three_objects','bbh_web_of_lies','gpqa_diamond','gpqa_extended','gpqa_main',\n",
    "                       'math_algebra_hard','math_counting_and_prob_hard','math_geometry_hard','math_intermediate_algebra_hard',\n",
    "                       'math_num_theory_hard','math_prealgebra_hard','math_precalculus_hard','musr_murder_mysteries',\n",
    "                       'musr_object_placements','musr_team_allocation']]\n",
    "new_lb['Family'] = [standard_name(m) for m in new_lb.Family]\n",
    "new_lb['Family'] = new_lb['Family'].replace({'gpt-neo':'gpt-neo/j','bloom-1':'bloom'}) #to match Tatsu data\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a72b46db-bae4-48f9-be2b-16d55772c2df",
   "metadata": {},
   "source": [
    "### Processing old LB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4a828493-4b99-473a-ad39-1d1551173cf3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6811, 65)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "old_lb = pd.read_csv(\"open-llm-leaderboard_old.csv\")\n",
    "old_lb = old_lb.sort_values(by=['date']).reset_index(drop=True)\n",
    "old_lb =  consolidate_columns(old_lb.merge(subscenario_scores, on='Model', how='left'))\n",
    "old_lb = old_lb.drop_duplicates(subset=['Model'], keep='last')\n",
    "old_lb = old_lb.loc[:,['Model', '#Params (B)', 'ARC', 'HellaSwag', 'MMLU', 'TruthfulQA', 'Winogrande', 'GSM8K',\n",
    "                       'hendrycksTest-abstract_algebra','hendrycksTest-anatomy','hendrycksTest-astronomy','hendrycksTest-business_ethics',\n",
    "                       'hendrycksTest-clinical_knowledge','hendrycksTest-college_biology','hendrycksTest-college_chemistry','hendrycksTest-college_computer_science',\n",
    "                       'hendrycksTest-college_mathematics','hendrycksTest-college_medicine','hendrycksTest-college_physics',\n",
    "                       'hendrycksTest-computer_security','hendrycksTest-conceptual_physics','hendrycksTest-econometrics',\n",
    "                       'hendrycksTest-electrical_engineering','hendrycksTest-elementary_mathematics','hendrycksTest-formal_logic',\n",
    "                       'hendrycksTest-global_facts','hendrycksTest-high_school_biology','hendrycksTest-high_school_chemistry',\n",
    "                       'hendrycksTest-high_school_computer_science','hendrycksTest-high_school_european_history','hendrycksTest-high_school_geography',\n",
    "                       'hendrycksTest-high_school_government_and_politics','hendrycksTest-high_school_macroeconomics','hendrycksTest-high_school_mathematics',\n",
    "                       'hendrycksTest-high_school_microeconomics','hendrycksTest-high_school_physics','hendrycksTest-high_school_psychology',\n",
    "                       'hendrycksTest-high_school_statistics','hendrycksTest-high_school_us_history','hendrycksTest-high_school_world_history',\n",
    "                       'hendrycksTest-human_aging','hendrycksTest-human_sexuality','hendrycksTest-international_law','hendrycksTest-jurisprudence',\n",
    "                       'hendrycksTest-logical_fallacies','hendrycksTest-machine_learning','hendrycksTest-management','hendrycksTest-marketing',\n",
    "                       'hendrycksTest-medical_genetics','hendrycksTest-miscellaneous','hendrycksTest-moral_disputes','hendrycksTest-moral_scenarios',\n",
    "                       'hendrycksTest-nutrition','hendrycksTest-philosophy','hendrycksTest-prehistory','hendrycksTest-professional_accounting',\n",
    "                       'hendrycksTest-professional_law','hendrycksTest-professional_medicine','hendrycksTest-professional_psychology',\n",
    "                       'hendrycksTest-public_relations','hendrycksTest-security_studies','hendrycksTest-sociology','hendrycksTest-us_foreign_policy',\n",
    "                       'hendrycksTest-virology','hendrycksTest-world_religions']]\n",
    "old_lb.loc[:,['ARC', 'HellaSwag', 'MMLU', 'TruthfulQA', 'Winogrande', 'GSM8K']]=old_lb.loc[:,['ARC', 'HellaSwag', 'MMLU', 'TruthfulQA', 'Winogrande', 'GSM8K']]/100\n",
    "old_lb.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79a479df-b26a-4177-9e37-cd7f06ad1b63",
   "metadata": {},
   "source": [
    "### Processing Tatsu data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "588cdf66-61d8-4ea4-996c-57f23d450b0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "tatsu_base = pd.read_csv('base_llm_benchmark_eval.csv')\n",
    "tatsu_base['Instruct'] = False\n",
    "tatsu_base['Model Family'] = [standard_name(m,slash=False) for m in tatsu_base['Model Family']]\n",
    "tatsu_base = tatsu_base.rename(columns={'ARC-C': 'ARC','Model Family':'Family','Model Size (B)':'#Params (B)','Winograd':'Winogrande'})\n",
    "tatsu_inst = pd.read_csv('instruct_llm_benchmark_eval.csv')\n",
    "tatsu_inst['Instruct'] = True\n",
    "tatsu_inst['Model Family'] = [standard_name(m,slash=False) for m in tatsu_inst['Model Family']]\n",
    "tatsu_inst = tatsu_inst.rename(columns={'ARC-C': 'ARC','Model Family':'Family','Model Size (B)':'#Params (B)','Winograd':'Winogrande'})\n",
    "tatsu = pd.concat((tatsu_base, tatsu_inst), axis=0).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "67800f73-c3eb-424c-a1a2-e43a03e8fa2f",
   "metadata": {},
   "source": [
    "Comparing model names from Tatsu data and new lb data (we do not worry about the old cause tatsu data was build from the old lb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "464aab51-9811-4d9c-b83b-b3c2b131efac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                  0                                             1    2\n",
      "72                            bigcode/starcoder2-7b                         bigcode/starcoder2-7b  100\n",
      "73                            bigcode/starcoder2-3b                         bigcode/starcoder2-3b  100\n",
      "8                        meta-llama/Meta-Llama-3-8B                    meta-llama/Meta-Llama-3-8B  100\n",
      "10                                 Qwen/Qwen1.5-32B                              Qwen/Qwen1.5-32B  100\n",
      "11                                 Qwen/Qwen1.5-14B                              Qwen/Qwen1.5-14B  100\n",
      "12                                  Qwen/Qwen1.5-7B                               Qwen/Qwen1.5-7B  100\n",
      "13                                  Qwen/Qwen1.5-4B                               Qwen/Qwen1.5-4B  100\n",
      "20                      mistralai/Mixtral-8x7B-v0.1                   mistralai/Mixtral-8x7B-v0.1  100\n",
      "14                                Qwen/Qwen1.5-1.8B                             Qwen/Qwen1.5-1.8B  100\n",
      "15                                Qwen/Qwen1.5-0.5B                             Qwen/Qwen1.5-0.5B  100\n",
      "21                                      01-ai/Yi-6B                                   01-ai/Yi-6B  100\n",
      "26                                tiiuae/falcon-40b                             tiiuae/falcon-40b  100\n",
      "24                                  google/gemma-2b                               google/gemma-2b  100\n",
      "23                                  google/gemma-7b                               google/gemma-7b  100\n",
      "22                                     01-ai/Yi-34B                                  01-ai/Yi-34B  100\n",
      "4                              huggyllama/llama-13b                          huggyllama/llama-13b  100\n",
      "6                              huggyllama/llama-65b                          huggyllama/llama-65b  100\n",
      "2                         meta-llama/Llama-2-70b-hf                     meta-llama/Llama-2-70b-hf  100\n",
      "3                               huggyllama/llama-7b                           huggyllama/llama-7b  100\n",
      "27                                 tiiuae/falcon-7b                              tiiuae/falcon-7b  100\n",
      "50                                facebook/opt-1.3b                             facebook/opt-1.3b  100\n",
      "39                            bigscience/bloom-560m                         bigscience/bloom-560m  100\n",
      "71                           bigcode/starcoder2-15b                        bigcode/starcoder2-15b  100\n",
      "0                          meta-llama/Llama-2-7b-hf                      meta-llama/Llama-2-7b-hf  100\n",
      "1                         meta-llama/Llama-2-13b-hf                     meta-llama/Llama-2-13b-hf  100\n",
      "45                          EleutherAI/gpt-neo-2.7B                       EleutherAI/gpt-neo-2.7B  100\n",
      "46                          EleutherAI/gpt-neo-1.3B                       EleutherAI/gpt-neo-1.3B  100\n",
      "42                             bigscience/bloom-7b1                          bigscience/bloom-7b1  100\n",
      "41                              bigscience/bloom-3b                           bigscience/bloom-3b  100\n",
      "40                             bigscience/bloom-1b1                          bigscience/bloom-1b1  100\n",
      "54                                 facebook/opt-30b                              facebook/opt-30b  100\n",
      "7                       meta-llama/Meta-Llama-3-70B                   meta-llama/Meta-Llama-3-70B  100\n",
      "52                                 facebook/opt-13b                             facebook/opt-1.3b   97\n",
      "9                                  Qwen/Qwen1.5-72B                               Qwen/Qwen1.5-7B   97\n",
      "16                                    Qwen/Qwen-72B                                Qwen/Qwen2-72B   96\n",
      "18                                     Qwen/Qwen-7B                                 Qwen/Qwen2-7B   96\n",
      "5                              huggyllama/llama-30b                          huggyllama/llama-13b   95\n",
      "43                                 bigscience/bloom                           bigscience/bloom-3b   91\n",
      "51                                facebook/opt-350m                              facebook/opt-30b   91\n",
      "44                          EleutherAI/gpt-neox-20b                       EleutherAI/gpt-neo-2.7B   91\n",
      "25                               tiiuae/falcon-180B                             tiiuae/falcon-40b   91\n",
      "17                                    Qwen/Qwen-14B                              Qwen/Qwen1.5-14B   90\n",
      "68                         bigcode/starcoderbase-3b                         bigcode/starcoder2-3b   89\n",
      "69                         bigcode/starcoderbase-7b                         bigcode/starcoder2-7b   89\n",
      "53                                facebook/opt-2.7b                             facebook/opt-1.3b   88\n",
      "56                                 facebook/opt-66b                              facebook/opt-30b   88\n",
      "49                                facebook/opt-6.7b                             facebook/opt-1.3b   88\n",
      "67                         bigcode/starcoderbase-1b                        bigcode/starcoder2-15b   87\n",
      "47                          EleutherAI/gpt-neo-125m                       EleutherAI/gpt-neo-1.3B   87\n",
      "28                              tiiuae/falcon-rw-1b                              tiiuae/falcon-7b   86\n",
      "34                   EleutherAI/pythia-2.8b-deduped                        EleutherAI/pythia-2.8b   85\n",
      "38                   EleutherAI/pythia-160m-deduped                        EleutherAI/pythia-160m   85\n",
      "19                        mistralai/Mistral-7B-v0.1                   mistralai/Mixtral-8x7B-v0.1   85\n",
      "32                   EleutherAI/pythia-410m-deduped                        EleutherAI/pythia-410m   85\n",
      "33                   EleutherAI/pythia-6.9b-deduped                        EleutherAI/pythia-6.9b   85\n",
      "35                    EleutherAI/pythia-12b-deduped                         EleutherAI/pythia-12b   84\n",
      "78                                01-ai/Yi-34B-200K                                  01-ai/Yi-34B   83\n",
      "31                     EleutherAI/pythia-1b-deduped                         EleutherAI/pythia-12b   82\n",
      "55                                facebook/opt-125m                             facebook/opt-1.3b   82\n",
      "77                                 01-ai/Yi-6B-200K                                   01-ai/Yi-6B   81\n",
      "48                              EleutherAI/gpt-j-6b                       EleutherAI/gpt-neo-1.3B   81\n",
      "70                            bigcode/starcoderbase                         bigcode/starcoder2-3b   81\n",
      "132                        mistral-7b-instruct-v0.1          mistralai/Mixtral-8x7B-Instruct-v0.1   80\n",
      "36                    EleutherAI/pythia-70m-deduped                        EleutherAI/pythia-160m   78\n",
      "37                   EleutherAI/pythia-1.4b-deduped                         EleutherAI/pythia-12b   78\n",
      "113                                 vicuna-13b-v1.5                         lmsys/vicuna-13b-v1.3   78\n",
      "120                                  vicuna-7b-v1.5                          lmsys/vicuna-7b-v1.3   76\n",
      "116                                 vicuna-33b-v1.3                          lmsys/vicuna-7b-v1.3   74\n",
      "60                               facebook/xglm-1.7B                             facebook/opt-1.3b   74\n",
      "29                                  microsoft/phi-2                           microsoft/Orca-2-7b   71\n",
      "114                                llama-2-70b-chat                meta-llama/Llama-2-70b-chat-hf   70\n",
      "115                                llama-2-13b-chat                meta-llama/Llama-2-13b-chat-hf   70\n",
      "133                                  vicuna-13b-16k                         lmsys/vicuna-13b-v1.3   69\n",
      "127                                    dolly-v2-12b                       databricks/dolly-v2-12b   69\n",
      "61                               facebook/xglm-4.5B                             facebook/opt-1.3b   69\n",
      "126                                 llama-2-7b-chat                 meta-llama/Llama-2-7b-chat-hf   68\n",
      "123                               wizardlm-30b-v1.0                WizardLMTeam/WizardLM-13B-V1.0   68\n",
      "30                                microsoft/phi-1_5                          microsoft/Orca-2-13b   65\n",
      "62                               facebook/xglm-7.5B                              facebook/opt-30b   65\n",
      "64                       codellama/CodeLlama-13b-hf                          huggyllama/llama-13b   65\n",
      "63                        codellama/CodeLlama-7b-hf                           huggyllama/llama-7b   64\n",
      "118                               wizardlm-13b-v1.2                WizardLMTeam/WizardLM-13B-V1.0   64\n",
      "66                       codellama/CodeLlama-70b-hf                     meta-llama/Llama-2-70b-hf   63\n",
      "122                           codellama-7b-instruct                     tiiuae/falcon-7b-instruct   61\n",
      "65                       codellama/CodeLlama-34b-hf                          huggyllama/llama-13b   61\n",
      "131                               lemur-70b-chat-v1                          Qwen/Qwen1.5-7B-Chat   59\n",
      "59                               facebook/xglm-564M                              facebook/opt-30b   59\n",
      "112                          codellama-34b-instruct                    tiiuae/falcon-40b-instruct   58\n",
      "87                            internlm/internlm2-7b                         teknium/OpenHermes-7B   57\n",
      "119                          codellama-13b-instruct                         bigcode/starcoder2-3b   56\n",
      "57                                 mosaicml/mpt-30b                              facebook/opt-30b   56\n",
      "117                               openchat-13b-v3.2                              01-ai/Yi-6B-Chat   55\n",
      "106                       Salesforce/codegen-16B-nl                        bigcode/starcoder2-15b   55\n",
      "128                oasst-sft-4-pythia-12b-epoch-3.5                         01-ai/Yi-1.5-34B-Chat   53\n",
      "105                        Salesforce/codegen-6B-nl                         bigcode/starcoder2-3b   53\n",
      "90                                   Deci/DeciLM-7B                          bigscience/bloom-7b1   53\n",
      "86                           internlm/internlm2-20b                       Qwen/Qwen2-72B-Instruct   53\n",
      "94                      stabilityai/stablelm-2-1_6b                  mistralai/Mixtral-8x22B-v0.1   51\n",
      "110                              gpt-3.5-turbo-0613                       EleutherAI/gpt-neo-1.3B   49\n",
      "103  togethercomputer/RedPajama-INCITE-Base-7B-v0.1          mistralai/Mixtral-8x7B-Instruct-v0.1   49\n",
      "130                           deepseek-llm-67b-chat                          Qwen/Qwen1.5-7B-Chat   49\n",
      "125                                       koala-13b                                  01-ai/Yi-34B   48\n",
      "58                                  mosaicml/mpt-7b                            google/gemma-7b-it   48\n",
      "91            stabilityai/stablelm-base-alpha-7b-v2                        databricks/dolly-v2-7b   47\n",
      "124                                     guanaco-33b                           bigscience/bloom-3b   47\n",
      "81                   openlm-research/open_llama_13b                        teknium/OpenHermes-13B   46\n",
      "102    togethercomputer/RedPajama-INCITE-Base-3B-v1           meta-llama/Meta-Llama-3-8B-Instruct   46\n",
      "98                              RWKV/rwkv-4-7b-pile                          lmsys/vicuna-7b-v1.3   46\n",
      "100                             RWKV/rwkv-raven-14b                              Qwen/Qwen1.5-14B   46\n",
      "101                             RWKV/rwkv-4-3b-pile                         lmsys/vicuna-13b-v1.3   45\n",
      "121                                     guanaco-65b                          huggyllama/llama-65b   45\n",
      "129                                      gpt-4-0314                                  01-ai/Yi-34B   45\n",
      "111                              claude-instant-1.1                        bigcode/starcoder2-15b   45\n",
      "107                                      gpt-4-0613                                  01-ai/Yi-34B   45\n",
      "109                                      claude-1.3                                  01-ai/Yi-34B   45\n",
      "82                    openlm-research/open_llama_7b                    meta-llama/Meta-Llama-3-8B   44\n",
      "99                             RWKV/rwkv-4-14b-pile                              Qwen/Qwen1.5-14B   44\n",
      "83                    openlm-research/open_llama_3b                    meta-llama/Meta-Llama-3-8B   44\n",
      "93               stabilityai/stablelm-base-alpha-7b                     HuggingFaceTB/SmolLM-1.7B   44\n",
      "97                             RWKV/rwkv-4-1b5-pile                         lmsys/vicuna-13b-v1.3   44\n",
      "76              deepseek-ai/deepseek-coder-33b-base                         bigcode/starcoder2-3b   43\n",
      "92               stabilityai/stablelm-base-alpha-3b                   meta-llama/Meta-Llama-3-70B   43\n",
      "79                 openlm-research/open_llama_7b_v2                 pankajmathur/orca_mini_v7_72b   43\n",
      "74             deepseek-ai/deepseek-coder-1.3b-base                         bigcode/starcoder2-3b   42\n",
      "75             deepseek-ai/deepseek-coder-6.7b-base                         bigcode/starcoder2-7b   42\n",
      "80                 openlm-research/open_llama_3b_v2                   meta-llama/Meta-Llama-3-70B   41\n",
      "88                deepseek-ai/deepseek-llm-67b-base                         teknium/OpenHermes-7B   41\n",
      "89                deepseek-ai/deepseek-moe-16b-base                        teknium/OpenHermes-13B   40\n",
      "104                                    LLM360/Amber                           huggyllama/llama-7b   39\n",
      "85                      openai-community/gpt2-large                          google/gemma-2-2b-it   38\n",
      "95                            RWKV/rwkv-4-169m-pile                              Qwen/Qwen1.5-14B   38\n",
      "96                            RWKV/rwkv-4-430m-pile                         lmsys/vicuna-13b-v1.3   38\n",
      "108                                      claude-2.0                                   01-ai/Yi-6B   38\n",
      "84                            openai-community/gpt2  cognitivecomputations/dolphin-2.9.2-qwen2-7b   37\n"
     ]
    }
   ],
   "source": [
    "interdata = [[m1]+search(m1, list(new_lb.Model)) for m1 in list(tatsu.Model)]\n",
    "interdata = pd.DataFrame(interdata).sort_values(by=[2], ascending=False)\n",
    "print(interdata.to_string())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ba4669f9-1d04-4b0d-b120-cbec0e941745",
   "metadata": {},
   "outputs": [],
   "source": [
    "#it seems that only the chat llama-2 family and dolly is not matching\n",
    "tatsu = tatsu.replace({'dolly-v2-12b': 'databricks/dolly-v2-12b',\n",
    "                       'llama-2-7b-chat': 'meta-llama/Llama-2-7b-chat-hf',\n",
    "                       'llama-2-13b-chat': 'meta-llama/Llama-2-13b-chat-hf',\n",
    "                       'llama-2-70b-chat': 'meta-llama/Llama-2-70b-chat-hf'})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7a559ff3-fec9-4530-9cfa-b1fc53462634",
   "metadata": {},
   "source": [
    "### Merging LB and Tatsu data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5732d27c-b9a1-4a76-ba73-43afda8b97d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(219, 117)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb = consolidate_columns((new_lb.merge(tatsu, on='Model', how='outer')))\n",
    "cons_lb =  consolidate_columns(cons_lb.merge(old_lb, on='Model', how='left'))\n",
    "cons_lb.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "134bddf4-6e53-41cc-9fbe-fc12d2c780e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "cons_lb.loc[cons_lb.Family=='mistral-instruct','Family'] = 'mistral'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "06fa58d2-eeef-4a24-91d5-977cb00f726a",
   "metadata": {},
   "source": [
    "Getting more instruct models from the old lb (we do not care about the new lb, cause we have already filtered all models of interest from that lb). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "97b9849c-9c40-481a-8ba6-1566c021c8e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(232, 117)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "old_lb_instruct = old_lb.loc[['-instruct' in m or '-Instruct' in m or '-chat' in m or '-Chat' in m or '-it' in m or '-It' in m for m in old_lb.Model]].reset_index(drop=True)\n",
    "old_lb_instruct['Model2'] = [m.replace('-instruct','').replace('-chat','').replace('-it','').replace('-Instruct','').replace('-Chat','').replace('-It','') for m in old_lb_instruct.Model]\n",
    "old_lb_instruct = old_lb_instruct.loc[[m in list(cons_lb.Model) for m in old_lb_instruct['Model2']]]\n",
    "old_lb_instruct = old_lb_instruct.drop(['Model2'], axis=1)\n",
    "old_lb_instruct['Instruct'] = True\n",
    "cons_lb = pd.concat((cons_lb,old_lb_instruct), axis=0)\n",
    "cons_lb = cons_lb.drop_duplicates(subset=['Model'], keep='first')\n",
    "cons_lb.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6db6d396-131a-405b-9bb2-130922863475",
   "metadata": {},
   "source": [
    "We also include more models from the 'rwkv-raven' family (missing from Tatsu data)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a212f599-5fbe-48a6-b3d7-753c7dc3d372",
   "metadata": {},
   "source": [
    "more_data = old_lb.loc[['rwkv-raven' in m for m in old_lb.Model]].reset_index(drop=True)\n",
    "more_data['Instruct'] = False\n",
    "cons_lb = pd.concat((cons_lb,more_data), axis=0)\n",
    "cons_lb = cons_lb.drop_duplicates(subset=['Model'], keep='first').reset_index(drop=True)\n",
    "cons_lb.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "78cf5c00-b865-4e4b-8819-3eed4f55ee7d",
   "metadata": {},
   "source": [
    "cons_lb.loc[['rwkv-raven' in m for m in cons_lb.Model],'Family'] = 'rwkv-raven' "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5fdafb9d-26fb-4f4a-bd64-aa7aa2e9b721",
   "metadata": {},
   "outputs": [],
   "source": [
    "cons_lb = cons_lb.loc[['rwkv-raven' not in m for m in cons_lb.Model]] # we delete 'rwkv-raven' cause there is only one model from this family"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ecf98ce-ad9c-4655-8303-e3c2370327ba",
   "metadata": {},
   "source": [
    "Filling family for instruct models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d1dd49ee-510b-457e-813c-3aa84d9b922f",
   "metadata": {},
   "outputs": [],
   "source": [
    "for m in cons_lb.loc[cons_lb.Instruct].Model:\n",
    "    model_name = m.replace('-instruct','').replace('-chat','').replace('-it','').replace('-Instruct','').replace('-Chat','').replace('-It','')\n",
    "    for var in ['Family']:\n",
    "        if cons_lb.loc[cons_lb.Model==m, var].isnull().iloc[0]:\n",
    "            if not list(cons_lb.loc[cons_lb.Model==model_name, var])==[]:\n",
    "                cons_lb.loc[cons_lb.Model==m, var] = cons_lb.loc[cons_lb.Model==model_name, var].iloc[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b16ebb6-869b-4be4-831c-54f0e9898bb7",
   "metadata": {},
   "source": [
    "Getting model size from the names (the data from the lb can be misleading sometimes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "09bf3c80-65b1-436a-9547-6a0bca01e2a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "ind = ~np.array([re.search(r'\\d+(\\.\\d+)?[BbMm]', m) is None for m in list(cons_lb.Model)]) \n",
    "sizes = [re.search(r'\\d+(\\.\\d+)?[BbMm]', m).group() for m in list(cons_lb.loc[ind].Model)]\n",
    "for i in range(len(sizes)):\n",
    "    if sizes[i][-1].lower()=='b':\n",
    "        sizes[i] = float(sizes[i][:-1])\n",
    "    else:\n",
    "        sizes[i] = float(sizes[i][:-1])/1000\n",
    "cons_lb.loc[ind,['#Params (B)']] = sizes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4cdcc09b-7866-40ad-ba2e-67a8aae52ccc",
   "metadata": {},
   "source": [
    "Standardizing model names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "83e8b163-4d7c-4733-b7f7-c750b4d8338e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['amber', 'bloom', 'claude-1', 'claude-2', 'claude-instant',\n",
       "       'codegen', 'codellama', 'codellama-instruct', 'decilm',\n",
       "       'deepseek-coder', 'deepseek-llm', 'deepseek-llm-chat',\n",
       "       'deepseek-moe', 'falcon', 'gemma', 'gemma-1.1', 'gemma-2', 'gpt-2',\n",
       "       'gpt-3.5-turbo', 'gpt-4', 'gpt-neo/j', 'guanaco', 'internlm2',\n",
       "       'koala', 'lemur-chat', 'llama', 'llama-2', 'llama-v1.3',\n",
       "       'meta-llama-3', 'meta-llama-3.1', 'mistral', 'mixtral-8x-v0.1',\n",
       "       'mpt', 'oasst-sft', 'olmo', 'openchat', 'openllama', 'openllamav2',\n",
       "       'opt', 'phi', 'pythia', 'qwen', 'qwen1.5', 'qwen2',\n",
       "       'recurrentgemma', 'redpajama-incite-base', 'rwkv', 'smollm',\n",
       "       'stablelm', 'starcoder', 'starcoder2', 'vicuna', 'wizardlm',\n",
       "       'xglm', 'yi', 'yi-1.5', 'yi-200k'], dtype=object)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb['Model'] = [standard_name(m) for m in cons_lb.Model]\n",
    "np.unique(cons_lb['Family'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ffff8aa-e623-4e5f-a568-a8ccacebbcc3",
   "metadata": {},
   "source": [
    "### Merging HumanEval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a03143d5-736e-4809-b9df-af5ba8ce48fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "humaneval = [['GPT-4-Turbo (April 2024)',90.2],\n",
    "['GPT-4 (May 2023)',88.4],\n",
    "['DeepSeek-Coder-V2-Instruct',85.4],\n",
    "['GPT-4-Turbo (Nov 2023)',85.4],\n",
    "['CodeQwen1.5-7B-Chat',83.5],\n",
    "['claude-3-opus (Mar 2024)',82.9],\n",
    "['DeepSeek-Coder-33B-instruct',81.1],\n",
    "['WizardCoder-33B-V1.1',79.9],\n",
    "['OpenCodeInterpreter-DS-33B',79.3],\n",
    "['Llama3-70B-instruct',77.4],\n",
    "['OpenCodeInterpreter-DS-6.7B',77.4],\n",
    "['speechless-codellama-34B-v2.0',77.4],\n",
    "['GPT-3.5-Turbo (Nov 2023)',76.8],\n",
    "['Magicoder-S-DS-6.7B',76.8],\n",
    "['claude-3-haiku (Mar 2024)',76.8],\n",
    "['Mixtral-8x22B-Instruct-v0.1',76.2],\n",
    "['Artigenz-Coder-DS-6.7B',75.6],\n",
    "['DeepSeek-Coder-7B-instruct-v1.5',75.6],\n",
    "['XwinCoder-34B',75.6],\n",
    "['WaveCoder-Ultra-6.7B',75],\n",
    "['databricks/dbrx-instruct',75],\n",
    "['DeepSeek-Coder-6.7B-instruct',74.4],\n",
    "['code-millenials-34B',74.4],\n",
    "['starchat2-15b-v0.1',73.8],\n",
    "['GPT-3.5 (May 2023)',73.2],\n",
    "['WizardCoder-Python-34B-V1.0',73.2],\n",
    "['OpenChat-3.5-7B-0106',72.6],\n",
    "['CodeLlama-70B-Instruct',72],\n",
    "['WhiteRabbitNeo-33B-v1',72],\n",
    "['Phind-CodeLlama-34B-v2',71.3],\n",
    "['speechless-coder-ds-6.7B',71.3],\n",
    "['Magicoder-S-CL-7B',70.7],\n",
    "['claude-3-sonnet (Mar 2024)',70.7],\n",
    "['Mistral Large (Mar 2024)',69.5],\n",
    "['claude-2 (Mar 2024)',69.5],\n",
    "['Qwen1.5-72B-Chat',68.3],\n",
    "['Gemini Pro 1.5',68.3],\n",
    "['starcoder2-15b-instruct-v0.1',67.7],\n",
    "['speechless-starcoder2-15b',67.1],\n",
    "['DeepSeek-Coder-1.3B-instruct',65.9],\n",
    "['Code-290k-6.7B-Instruct',64.6],\n",
    "['Phi-3-mini-4k-instruct',64.6],\n",
    "['Command-R+',64],\n",
    "['dolphin-2.6-mixtral-8x7b',64],\n",
    "['Gemini Pro 1.0',63.4],\n",
    "['Llama3-8B-instruct',61.6],\n",
    "['codegemma-7b-it',60.4],\n",
    "['claude-instant-1 (Mar 2024)',57.3],\n",
    "['WizardCoder-15B-V1.0',56.7],\n",
    "['Code-13B',56.1],\n",
    "['speechless-starcoder2-7b',56.1],\n",
    "['CodeLlama-70B',55.5],\n",
    "['Code-33B',54.9],\n",
    "['speechless-coding-7B-16k-tora',54.9],\n",
    "['OpenHermes-2.5-Code-290k-13B',54.3],\n",
    "['CodeLlama-34B',51.8],\n",
    "['CodeQwen1.5-7B',51.8],\n",
    "['DeepSeek-Coder-33B-base',51.2],\n",
    "['WizardCoder-Python-7B-V1.0',50.6],\n",
    "['phi-2-2.7B',49.4],\n",
    "['Mistral-codealpaca-7B',48.2],\n",
    "['speechless-code-mistral-7B-v1.0',48.2],\n",
    "['DeepSeek-Coder-6.7B-base',47.6],\n",
    "['MistralHermes-CodePro-7B-v1',47.6],\n",
    "['StarCoder2-15B',46.3],\n",
    "['Mixtral-8x7B-Instruct-v0.1',45.1],\n",
    "['codegemma-7b',44.5],\n",
    "['SOLAR-10.7B-Instruct-v1.0',43.3],\n",
    "['CodeLlama-13B',42.7],\n",
    "['gemma-1.1-7b-it',42.7],\n",
    "['Mistral-7B-Instruct-v0.2',42.1],\n",
    "['xDAN-L1-Chat-RL-v1-7B',40.2],\n",
    "['CodeLlama-7B',37.8],\n",
    "['StarCoder2-7B',35.4],\n",
    "['gemma-7b',35.4],\n",
    "['StarCoder-15B',34.1],\n",
    "['Llama3-8B-base',33.5],\n",
    "['CodeGen-16B',32.9],\n",
    "['Python-Code-13B',32.9],\n",
    "['CodeT5+-16B',31.7],\n",
    "['StarCoder2-3B',31.7],\n",
    "['Zephyr β-7B',30],\n",
    "['CodeGen-6B',29.3],\n",
    "['CodeT5+-6B',29.3],\n",
    "['stable-code-3B',29.3],\n",
    "['DeepSeek-Coder-1.3B-base',28.7],\n",
    "['Mistral-7B',28.7],\n",
    "['gemma-7b-it',28.7],\n",
    "['codegemma-2b',26.8],\n",
    "['CodeT5+-2B',25],\n",
    "['gemma-2b',25],\n",
    "['CodeGen-2B',24.4],\n",
    "['StarCoderBase-7B',24.4],\n",
    "['gemma-1.1-2b-it',22.6],\n",
    "['CodeGen2-16B',19.5],\n",
    "['CodeGen2-7B',18.3],\n",
    "['StarCoderBase-3B',17.7],\n",
    "['gemma-2b-it',17.7],\n",
    "['Vicuna-13B',17.1],\n",
    "['CodeGen2-3B',15.9],\n",
    "['InCoder-6.7B',15.9],\n",
    "['SantaCoder-1.1B',14.6],\n",
    "['StarCoderBase-1B',14.6],\n",
    "['GPT-J-6B',12.2],\n",
    "['InCoder-1.3B',12.2],\n",
    "['Vicuna-7B',11.6],\n",
    "['CodeGen2-1B',11],\n",
    "['GPT-Neo-2.7B',7.9],\n",
    "['PolyCoder-2.7B',6.1],\n",
    "['StableLM-7B',2.4],\n",
    "['zyte-1B',2.4]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "832b723a-e043-419d-85cc-ea0c81491044",
   "metadata": {},
   "outputs": [],
   "source": [
    "humaneval = pd.DataFrame(np.array(humaneval), columns = ['Model','HumanEval'])\n",
    "humaneval['Model'] = [m.lower() for m in humaneval['Model']]\n",
    "humaneval['HumanEval'] = humaneval['HumanEval'].astype(float)/100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "31812e93-e53f-47b4-846e-2dc3bea17641",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                   0                            1    2\n",
      "55                     codellama-34b                codellama-34b  100\n",
      "57           deepseek-coder-33b-base      deepseek-coder-33b-base  100\n",
      "62          deepseek-coder-6.7b-base     deepseek-coder-6.7b-base  100\n",
      "64                    starcoder2-15b               starcoder2-15b  100\n",
      "65        mixtral-8x7b-instruct-v0.1   mixtral-8x7b-instruct-v0.1  100\n",
      "96                  starcoderbase-3b             starcoderbase-3b  100\n",
      "97                       gemma-2b-it                  gemma-2b-it  100\n",
      "15       mixtral-8x22b-instruct-v0.1  mixtral-8x22b-instruct-v0.1  100\n",
      "51                     codellama-70b                codellama-70b  100\n",
      "85          deepseek-coder-1.3b-base     deepseek-coder-1.3b-base  100\n",
      "87                       gemma-7b-it                  gemma-7b-it  100\n",
      "90                          gemma-2b                     gemma-2b  100\n",
      "27            codellama-70b-instruct       codellama-70b-instruct  100\n",
      "72                      codellama-7b                 codellama-7b  100\n",
      "73                     starcoder2-7b                starcoder2-7b  100\n",
      "74                          gemma-7b                     gemma-7b  100\n",
      "35                  qwen1.5-72b-chat             qwen1.5-72b-chat  100\n",
      "80                     starcoder2-3b                starcoder2-3b  100\n",
      "68                     codellama-13b                codellama-13b  100\n",
      "69                   gemma-1.1-7b-it              gemma-1.1-7b-it  100\n",
      "102                 starcoderbase-1b             starcoderbase-1b  100\n",
      "103                         gpt-j-6b                     gpt-j-6b  100\n",
      "107                     gpt-neo-2.7b                 gpt-neo-2.7b  100\n",
      "92                  starcoderbase-7b             starcoderbase-7b  100\n",
      "93                   gemma-1.1-2b-it              gemma-1.1-2b-it  100\n",
      "70          mistral-7b-instruct-v0.2     mistral-7b-instruct-v0.1   96\n",
      "75                     starcoder-15b               starcoder2-15b   96\n",
      "77                       codegen-16b               codegen-16b-nl   88\n",
      "4                codeqwen1.5-7b-chat              qwen1.5-7b-chat   88\n",
      "82                        codegen-6b                codegen-6b-nl   87\n",
      "46                   codegemma-7b-it                  gemma-7b-it   85\n",
      "94                      codegen2-16b               codegen-16b-nl   85\n",
      "56                    codeqwen1.5-7b                   qwen1.5-7b   83\n",
      "98                        vicuna-13b               vicuna-13b-16k   83\n",
      "9                llama3-70b-instruct    meta-llama-3-70b-instruct   82\n",
      "45                llama3-8b-instruct     meta-llama-3-8b-instruct   81\n",
      "66                      codegemma-7b                     gemma-7b   80\n",
      "12          gpt-3.5-turbo (nov 2023)           gpt-3.5-turbo-0613   80\n",
      "88                      codegemma-2b                     gemma-2b   80\n",
      "86                        mistral-7b              mistral-7b-v0.1   80\n",
      "106                      codegen2-1b               codegen-16b-nl   80\n",
      "47       claude-instant-1 (mar 2024)           claude-instant-1.1   79\n",
      "91                        codegen-2b                codegen-6b-nl   78\n",
      "105                        vicuna-7b               vicuna-7b-v1.3   78\n",
      "109                      stablelm-7b              stablelm-2-1_6b   77\n",
      "101                  santacoder-1.1b               starcoder2-15b   76\n",
      "48              wizardcoder-15b-v1.0            wizardlm-13b-v1.0   76\n",
      "49                          code-13b                codellama-13b   76\n",
      "67         solar-10.7b-instruct-v1.0         smollm-1.7b-instruct   76\n",
      "95                       codegen2-7b                codegen-6b-nl   75\n",
      "99                       codegen2-3b                codegen-6b-nl   75\n",
      "29            phind-codellama-34b-v2                codellama-34b   74\n",
      "40           code-290k-6.7b-instruct        codellama-7b-instruct   73\n",
      "21      deepseek-coder-6.7b-instruct     deepseek-coder-6.7b-base   73\n",
      "39      deepseek-coder-1.3b-instruct     deepseek-coder-1.3b-base   73\n",
      "37      starcoder2-15b-instruct-v0.1     mistral-7b-instruct-v0.1   73\n",
      "6        deepseek-coder-33b-instruct      deepseek-coder-33b-base   72\n",
      "38         speechless-starcoder2-15b               starcoder2-15b   72\n",
      "60             mistral-codealpaca-7b        codellama-7b-instruct   71\n",
      "7               wizardcoder-33b-v1.1            wizardlm-13b-v1.0   70\n",
      "50          speechless-starcoder2-7b                starcoder2-7b   70\n",
      "16            artigenz-coder-ds-6.7b     deepseek-coder-6.7b-base   70\n",
      "22               code-millenials-34b                codellama-34b   69\n",
      "3             gpt-4-turbo (nov 2023)           gpt-3.5-turbo-0613   68\n",
      "41            phi-3-mini-4k-instruct             mpt-30b-instruct   68\n",
      "71             xdan-l1-chat-rl-v1-7b            lemur-70b-chat-v1   68\n",
      "59                        phi-2-2.7b                  pythia-2.8b   67\n",
      "52                          code-33b                starcoder2-3b   67\n",
      "54      openhermes-2.5-code-290k-13b               openhermes-13b   67\n",
      "76                    llama3-8b-base                   llama-2-7b   67\n",
      "79                       codet5+-16b               codegen-16b-nl   67\n",
      "23                starchat2-15b-v0.1              mistral-7b-v0.1   67\n",
      "34               claude-2 (mar 2024)                   claude-2.0   67\n",
      "1                   gpt-4 (may 2023)                   gpt-4-0314   67\n",
      "26              openchat-3.5-7b-0106            openchat-13b-v3.2   65\n",
      "17   deepseek-coder-7b-instruct-v1.5     deepseek-coder-6.7b-base   65\n",
      "24                gpt-3.5 (may 2023)           gpt-3.5-turbo-0613   65\n",
      "58        wizardcoder-python-7b-v1.0            wizardlm-70b-v1.0   65\n",
      "0           gpt-4-turbo (april 2024)           gpt-3.5-turbo-0613   65\n",
      "13               magicoder-s-ds-6.7b     deepseek-coder-6.7b-base   65\n",
      "104                     incoder-1.3b                starcoder2-3b   64\n",
      "25       wizardcoder-python-34b-v1.0            wizardlm-13b-v1.0   64\n",
      "43          dolphin-2.6-mixtral-8x7b   mixtral-8x7b-instruct-v0.1   64\n",
      "83                        codet5+-6b                codegen-6b-nl   64\n",
      "78                   python-code-13b                      opt-13b   64\n",
      "100                     incoder-6.7b                starcoder2-7b   64\n",
      "11     speechless-codellama-34b-v2.0       codellama-34b-instruct   63\n",
      "30          speechless-coder-ds-6.7b     deepseek-coder-6.7b-base   62\n",
      "18                     xwincoder-34b                starcoder2-3b   62\n",
      "84                    stable-code-3b              stablelm-2-1_6b   62\n",
      "19              wavecoder-ultra-6.7b                starcoder2-7b   61\n",
      "31                 magicoder-s-cl-7b             starcoderbase-7b   61\n",
      "108                   polycoder-2.7b                    orca-2-7b   61\n",
      "10       opencodeinterpreter-ds-6.7b     deepseek-coder-6.7b-base   59\n",
      "110                          zyte-1b                   pythia-12b   59\n",
      "89                        codet5+-2b                     opt-2.7b   59\n",
      "33          mistral large (mar 2024)                gemma-2-2b-it   57\n",
      "20          databricks/dbrx-instruct           decilm-7b-instruct   57\n",
      "2         deepseek-coder-v2-instruct      deepseek-coder-33b-base   57\n",
      "8         opencodeinterpreter-ds-33b      deepseek-coder-33b-base   57\n",
      "63       mistralhermes-codepro-7b-v1              mistral-7b-v0.1   57\n",
      "61   speechless-code-mistral-7b-v1.0              mistral-7b-v0.1   57\n",
      "32        claude-3-sonnet (mar 2024)           claude-instant-1.1   57\n",
      "5           claude-3-opus (mar 2024)                   claude-1.3   56\n",
      "14         claude-3-haiku (mar 2024)                   claude-1.3   55\n",
      "28             whiterabbitneo-33b-v1              vicuna-33b-v1.3   50\n",
      "36                    gemini pro 1.5                   qwen2-1.5b   50\n",
      "81                       zephyr β-7b                openhermes-7b   50\n",
      "53     speechless-coding-7b-16k-tora     mistral-7b-instruct-v0.1   49\n",
      "42                        command-r+                 falcon-rw-1b   48\n",
      "44                    gemini pro 1.0              gemma-1.1-2b-it   48\n"
     ]
    }
   ],
   "source": [
    "interdata = [[m1]+search(m1, list(cons_lb.Model)) for m1 in list(humaneval.Model)]\n",
    "interdata = pd.DataFrame(interdata).sort_values(by=[2], ascending=False)\n",
    "print(interdata.to_string())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "62b59bd3-e8ee-44e5-843f-3166832636f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "names_map = {'codegen-16b':'codegen-16b-nl',\n",
    "             'codegen-2b':'codegen-2b-nl',\n",
    "             'codegen-6b':'codegen-6b-nl',\n",
    "             'codegen2-16b':'codegen2-16b',\n",
    "             'codegen2-1b':'codegen2-1b',\n",
    "             'codegen2-3b':'codegen2-3b',\n",
    "             'codegen2-7b':'codegen2-7b',\n",
    "             'llama3-70b-instruct':'meta-llama-3-70b-instruct',\n",
    "             'llama3-8b-base':'meta-llama-3-8b',\n",
    "             'llama3-10b-base':'meta-llama-3-70b',\n",
    "             'llama3-8b-instruct':'meta-llama-3-8b-instruct',\n",
    "             'mistral-7b':'mistral-7b-v0.1',\n",
    "             'vicuna-13b-v1.1':'vicuna-13b',\n",
    "             'vicuna-7b-v1.1':'vicuna-7b'}\n",
    "humaneval = humaneval.replace(names_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2d854029-0b2e-4e41-a554-160bdd0ef837",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(231, 117)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb = consolidate_columns(humaneval.merge(cons_lb, on='Model', how='right'))\n",
    "cons_lb.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "58209a82-24d9-4890-94a1-64a845709bfc",
   "metadata": {},
   "source": [
    "### Merging training tokens info\n",
    "\n",
    "Some models do not have training tokens info (we needed to fill by hand)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "6301ceb9-130b-4897-8c2e-1de5232e0812",
   "metadata": {},
   "outputs": [],
   "source": [
    "training_tokens = pd.read_csv('training_tokens.csv').drop(['Family','Parameters (b)'], axis=1)\n",
    "training_tokens.columns = ['Model', 'Pretraining Data Size (T)']\n",
    "cons_lb = consolidate_columns(cons_lb.merge(training_tokens, on='Model', how='left'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "60909456-4764-43e4-b203-9fa5cb56dd1f",
   "metadata": {},
   "source": [
    "Filling some values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "7fad180f-725a-47fa-81a3-dc807cbcb234",
   "metadata": {},
   "outputs": [],
   "source": [
    "instruct_models = np.array(cons_lb.loc[cons_lb.Instruct].Model)\n",
    "\n",
    "for m in instruct_models:\n",
    "    family,size=tuple(cons_lb.loc[cons_lb.Model==m,['Family','#Params (B)']].iloc[0])\n",
    "    if m=='dolly-v2-3b':size = 2.8\n",
    "    if m=='dolly-v2-7b':size = 6.9\n",
    "        \n",
    "    for var in ['Pretraining Data Size (T)','#Params (B)']:\n",
    "        ind = np.array(cons_lb.Family==family)*np.array(cons_lb['#Params (B)']==size)*np.array(cons_lb['Instruct']==False)\n",
    "        if np.sum(ind)>0:\n",
    "            cons_lb.loc[cons_lb.Model==m,var] = cons_lb.loc[ind].loc[:,var].iloc[0]\n",
    "        else:\n",
    "            cons_lb.loc[cons_lb.Model==m,var] = cons_lb.loc[cons_lb.Model==m,var]\n",
    "cons_lb['FLOPs (1E21)'] = 6*cons_lb['#Params (B)']*cons_lb['Pretraining Data Size (T)']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "a20a020b-4cb2-437c-b405-d12be21a94b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['amber', 'bloom', 'claude-1', 'claude-2', 'claude-instant',\n",
       "       'codegen', 'codellama', 'codellama-instruct', 'decilm',\n",
       "       'deepseek-coder', 'deepseek-llm', 'deepseek-llm-chat',\n",
       "       'deepseek-moe', 'falcon', 'gemma', 'gemma-1.1', 'gemma-2', 'gpt-2',\n",
       "       'gpt-3.5-turbo', 'gpt-4', 'gpt-neo/j', 'guanaco', 'internlm2',\n",
       "       'koala', 'lemur-chat', 'llama', 'llama-2', 'llama-v1.3',\n",
       "       'meta-llama-3', 'meta-llama-3.1', 'mistral', 'mixtral-8x-v0.1',\n",
       "       'mpt', 'oasst-sft', 'olmo', 'openchat', 'openllama', 'openllamav2',\n",
       "       'opt', 'phi', 'pythia', 'qwen', 'qwen1.5', 'qwen2',\n",
       "       'recurrentgemma', 'redpajama-incite-base', 'rwkv', 'smollm',\n",
       "       'stablelm', 'starcoder', 'starcoder2', 'vicuna', 'wizardlm',\n",
       "       'xglm', 'yi', 'yi-1.5', 'yi-200k'], dtype=object)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(cons_lb['Family'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a6da06b4-835d-4837-8cec-49e2214c6127",
   "metadata": {},
   "source": [
    "### Filtering families\n",
    "Leaving models families that have at least two base models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "2c00001a-eb00-4575-ae0d-f3ef9947fe8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "ind = (1-(cons_lb.Instruct)).astype(bool)\n",
    "families, counts = np.unique(cons_lb.loc[ind].Family, return_counts=True)\n",
    "families = [f for (f,c) in zip(families, counts) if c > 1]\n",
    "cons_lb = cons_lb.loc[[f in families for f in list(cons_lb.Family)]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "1b27579d-8746-4a28-9764-45aa6980a426",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      False\n",
       "1       True\n",
       "2      False\n",
       "3       True\n",
       "4      False\n",
       "       ...  \n",
       "225     True\n",
       "226     True\n",
       "228     True\n",
       "229     True\n",
       "230     True\n",
       "Name: Instruct, Length: 197, dtype: object"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.Instruct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "201ee709-eac4-4bec-a631-c917065d4027",
   "metadata": {},
   "outputs": [],
   "source": [
    "cons_lb.loc[['dedup' in m for m in cons_lb.Model],'Family'] = 'pythia-deduped'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "9cb5eac8-149f-4dfd-86b8-77d961777d8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "vars = ['Model','Family','Instruct','date','#Params (B)','Pretraining Data Size (T)','FLOPs (1E21)']\n",
    "cons_lb = cons_lb.loc[:,vars+[c for c in cons_lb.columns if c not in vars]]\n",
    "cons_lb.to_csv('data_v1.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5cbdd87-6a64-402e-99bb-3cd4a5cdcc08",
   "metadata": {},
   "source": [
    "V2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "9e72faee-b19f-40a4-a336-6b2c4f1f6fd4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_775174/278588740.py:19: RuntimeWarning: Mean of empty slice\n",
      "  np.nanmean(diffs,0)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([       nan,        nan,        nan,        nan,        nan,\n",
       "              nan, 0.00826668, 0.01478111, 0.00976551, 0.01235135,\n",
       "       0.0050058 , 0.0022627 ,        nan])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb = cons_lb.sort_values(by=['Model','#Params (B)'])\n",
    "benchs = ['IFEval',\n",
    "         'BBH',\n",
    "         'MATH Lvl 5',\n",
    "         'GPQA',\n",
    "         'MUSR',\n",
    "         'MMLU-PRO'] + ['MMLU',\n",
    "         'ARC',\n",
    "         'HellaSwag',\n",
    "         'Winogrande',\n",
    "         'TruthfulQA',\n",
    "         'GSM8K',\n",
    "         'HumanEval']\n",
    "\n",
    "diffs = []\n",
    "for model in ['pythia-12b','pythia-160m','pythia-2.8b','pythia-410m','pythia-6.9b']:\n",
    "    diffs.append(np.abs(np.array(cons_lb.loc[cons_lb.Model==model,benchs])-np.array(cons_lb.loc[cons_lb.Model==model+'-deduped',benchs])))\n",
    "diffs = np.vstack(diffs)\n",
    "np.nanmean(diffs,0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "be498b86-e4da-4db4-99f2-5d5c499fc55b",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchs = ['IFEval',\n",
    " 'BBH',\n",
    " 'MATH Lvl 5',\n",
    " 'GPQA',\n",
    " 'MUSR',\n",
    " 'MMLU-PRO',\n",
    " 'bbh_boolean_expressions',\n",
    " 'bbh_causal_judgement',\n",
    " 'bbh_date_understanding',\n",
    " 'bbh_disambiguation_qa',\n",
    " 'bbh_formal_fallacies',\n",
    " 'bbh_geometric_shapes',\n",
    " 'bbh_hyperbaton',\n",
    " 'bbh_logical_deduction_five_objects',\n",
    " 'bbh_logical_deduction_seven_objects',\n",
    " 'bbh_logical_deduction_three_objects',\n",
    " 'bbh_movie_recommendation',\n",
    " 'bbh_navigate',\n",
    " 'bbh_object_counting',\n",
    " 'bbh_penguins_in_a_table',\n",
    " 'bbh_reasoning_about_colored_objects',\n",
    " 'bbh_ruin_names',\n",
    " 'bbh_salient_translation_error_detection',\n",
    " 'bbh_snarks',\n",
    " 'bbh_sports_understanding',\n",
    " 'bbh_temporal_sequences',\n",
    " 'bbh_tracking_shuffled_objects_five_objects',\n",
    " 'bbh_tracking_shuffled_objects_seven_objects',\n",
    " 'bbh_tracking_shuffled_objects_three_objects',\n",
    " 'bbh_web_of_lies',\n",
    " 'gpqa_diamond',\n",
    " 'gpqa_extended',\n",
    " 'gpqa_main',\n",
    " 'math_algebra_hard',\n",
    " 'math_counting_and_prob_hard',\n",
    " 'math_geometry_hard',\n",
    " 'math_intermediate_algebra_hard',\n",
    " 'math_num_theory_hard',\n",
    " 'math_prealgebra_hard',\n",
    " 'math_precalculus_hard',\n",
    " 'musr_murder_mysteries',\n",
    " 'musr_object_placements',\n",
    " 'musr_team_allocation',\n",
    " 'XWinograd',\n",
    " 'Arena-Elo',\n",
    " 'MTBench',\n",
    " 'hendrycksTest-abstract_algebra',\n",
    " 'hendrycksTest-anatomy',\n",
    " 'hendrycksTest-astronomy',\n",
    " 'hendrycksTest-business_ethics',\n",
    " 'hendrycksTest-clinical_knowledge',\n",
    " 'hendrycksTest-college_biology',\n",
    " 'hendrycksTest-college_chemistry',\n",
    " 'hendrycksTest-college_computer_science',\n",
    " 'hendrycksTest-college_mathematics',\n",
    " 'hendrycksTest-college_medicine',\n",
    " 'hendrycksTest-college_physics',\n",
    " 'hendrycksTest-computer_security',\n",
    " 'hendrycksTest-conceptual_physics',\n",
    " 'hendrycksTest-econometrics',\n",
    " 'hendrycksTest-electrical_engineering',\n",
    " 'hendrycksTest-elementary_mathematics',\n",
    " 'hendrycksTest-formal_logic',\n",
    " 'hendrycksTest-global_facts',\n",
    " 'hendrycksTest-high_school_biology',\n",
    " 'hendrycksTest-high_school_chemistry',\n",
    " 'hendrycksTest-high_school_computer_science',\n",
    " 'hendrycksTest-high_school_european_history',\n",
    " 'hendrycksTest-high_school_geography',\n",
    " 'hendrycksTest-high_school_government_and_politics',\n",
    " 'hendrycksTest-high_school_macroeconomics',\n",
    " 'hendrycksTest-high_school_mathematics',\n",
    " 'hendrycksTest-high_school_microeconomics',\n",
    " 'hendrycksTest-high_school_physics',\n",
    " 'hendrycksTest-high_school_psychology',\n",
    " 'hendrycksTest-high_school_statistics',\n",
    " 'hendrycksTest-high_school_us_history',\n",
    " 'hendrycksTest-high_school_world_history',\n",
    " 'hendrycksTest-human_aging',\n",
    " 'hendrycksTest-human_sexuality',\n",
    " 'hendrycksTest-international_law',\n",
    " 'hendrycksTest-jurisprudence',\n",
    " 'hendrycksTest-logical_fallacies',\n",
    " 'hendrycksTest-machine_learning',\n",
    " 'hendrycksTest-management',\n",
    " 'hendrycksTest-marketing',\n",
    " 'hendrycksTest-medical_genetics',\n",
    " 'hendrycksTest-miscellaneous',\n",
    " 'hendrycksTest-moral_disputes',\n",
    " 'hendrycksTest-moral_scenarios',\n",
    " 'hendrycksTest-nutrition',\n",
    " 'hendrycksTest-philosophy',\n",
    " 'hendrycksTest-prehistory',\n",
    " 'hendrycksTest-professional_accounting',\n",
    " 'hendrycksTest-professional_law',\n",
    " 'hendrycksTest-professional_medicine',\n",
    " 'hendrycksTest-professional_psychology',\n",
    " 'hendrycksTest-public_relations',\n",
    " 'hendrycksTest-security_studies',\n",
    " 'hendrycksTest-sociology',\n",
    " 'hendrycksTest-us_foreign_policy',\n",
    " 'hendrycksTest-virology',\n",
    " 'hendrycksTest-world_religions',\n",
    " 'MMLU',\n",
    " 'ARC',\n",
    " 'HellaSwag',\n",
    " 'Winogrande',\n",
    " 'TruthfulQA',\n",
    " 'GSM8K',\n",
    " 'HumanEval']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "a928124c-1f53-464c-b1df-4842290f01cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "for model in ['pythia-12b','pythia-160m','pythia-2.8b','pythia-410m','pythia-6.9b']:\n",
    "    for bench in benchs:\n",
    "        x = cons_lb.loc[cons_lb.Model==model,[bench]].iloc[0,0]\n",
    "        y = cons_lb.loc[cons_lb.Model==model+'-deduped',[bench]].iloc[0,0]\n",
    "        if np.isnan(x):\n",
    "            cons_lb.loc[cons_lb.Model==model,[bench]] = y\n",
    "    cons_lb = cons_lb.loc[cons_lb.Model!=model+'-deduped']\n",
    "for model in ['pythia-1.4b-deduped','pythia-1b-deduped','pythia-70m-deduped']:\n",
    "    cons_lb.loc[cons_lb.Model==model,['Family']] = cons_lb.loc[cons_lb.Model==model,['Family']].iloc[0,0].replace('-deduped','')\n",
    "    cons_lb.loc[cons_lb.Model==model,['Model']] = cons_lb.loc[cons_lb.Model==model,['Model']].iloc[0,0].replace('-deduped','')\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "13f1ce0e-a26f-443f-a642-6b0be94f4ff7",
   "metadata": {},
   "source": [
    "models_to_delete = ['blossom-v5.1-34b',\n",
    " 'blossom-v5.1-9b',\n",
    " 'meta-llama-3.1-70b',\n",
    " 'meta-llama-3.1-70b-instruct',\n",
    " 'meta-llama-3.1-8b',\n",
    " 'meta-llama-3.1-8b-instruct',\n",
    " 'falcon-rw-1b',\n",
    " 'sauerkrautlm-gemma-2b',\n",
    " 'sauerkrautlm-gemma-7b',\n",
    " 'openhermes-13b',\n",
    " 'openhermes-7b',\n",
    " 'orca-2-13b',\n",
    " 'orca-2-7b',\n",
    " 'orca_mini_v3_13b',\n",
    " 'orca_mini_v3_70b',\n",
    " 'orca_mini_v3_7b',\n",
    " 'wizardlm-13b-v1.0',\n",
    " 'wizardlm-70b-v1.0',\n",
    " 'llama-3-sauerkrautlm-70b-instruct',\n",
    " 'llama-3-sauerkrautlm-8b-instruct',\n",
    " 'mpt-30b-chat',\n",
    " 'mpt-7b-chat',\n",
    " 'dolphin-2.9.2-qwen2-72b',\n",
    " 'dolphin-2.9.2-qwen2-7b',\n",
    " 'dolphin-2.9.1-yi-1.5-34b',\n",
    " 'dolphin-2.9.1-yi-1.5-9b',\n",
    " 'orca_mini_v7_7b',\n",
    " 'orca_mini_v7_72b']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "8b445d3e-a637-4252-bdf7-9e6a68f3f774",
   "metadata": {},
   "outputs": [],
   "source": [
    "models_to_delete = ['meta-llama-3.1-70b',\n",
    " 'meta-llama-3.1-70b-instruct',\n",
    " 'meta-llama-3.1-8b',\n",
    " 'meta-llama-3.1-8b-instruct']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "69e6fde7-75b9-4c3b-8adf-6e249e7259f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(188, 117)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for model in models_to_delete:\n",
    "    cons_lb = cons_lb.loc[cons_lb.Model!=model]\n",
    "cons_lb.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "8c6b218c-1663-484c-b476-881e052f8379",
   "metadata": {},
   "outputs": [],
   "source": [
    "cons_lb = cons_lb.sort_values(by=['Model','#Params (B)'])\n",
    "cons_lb = cons_lb.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "fd94eadd-7f9f-44e4-9d52-8ef3234b6860",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        bloom\n",
       "1        bloom\n",
       "2        bloom\n",
       "3        bloom\n",
       "4        bloom\n",
       "        ...   \n",
       "183    yi-chat\n",
       "184         yi\n",
       "185    yi-200k\n",
       "186    yi-chat\n",
       "187         yi\n",
       "Name: Family2, Length: 188, dtype: object"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb['Family2'] = [remove_params(s) for s in cons_lb.Model]\n",
    "cons_lb['Family2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "7b91f5c9-fa34-4d7d-a051-bed918f86846",
   "metadata": {},
   "outputs": [],
   "source": [
    "for f in ['gpt-j', 'gpt-neo', 'gpt-neox']:\n",
    "    cons_lb.loc[cons_lb['Family2']==f,'Family2'] = 'gpt-j-neo-neox'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "3271d22f-8850-458a-a551-aef68a3b136f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['bloom', 'blossom-v5.1', 'codegen-nl', 'codellama',\n",
       "       'codellama-instruct', 'deepseek-coder-base', 'dolly-v2',\n",
       "       'dolphin-2.9.1-yi-1.5', 'dolphin-2.9.2-qwen2', 'falcon',\n",
       "       'falcon-instruct', 'falcon-rw', 'gemma', 'gemma-2', 'gemma-2-it',\n",
       "       'gemma-it', 'gpt-j-neo-neox', 'gpt2', 'gpt2-large', 'internlm2',\n",
       "       'llama', 'llama-2', 'llama-2-chat',\n",
       "       'llama-3-sauerkrautlm-instruct', 'meta-llama-3',\n",
       "       'meta-llama-3-instruct', 'mixtral-8x-instruct-v0.1',\n",
       "       'mixtral-8x-v0.1', 'mpt', 'mpt-chat', 'mpt-instruct', 'olmo',\n",
       "       'open_llama_', 'open_llama__v2', 'openhermes', 'opt', 'orca-2',\n",
       "       'orca_mini_v3_', 'orca_mini_v7_', 'phi-1_5', 'phi-2', 'pythia',\n",
       "       'qwen', 'qwen1.5', 'qwen1.5-chat', 'qwen2', 'qwen2-instruct',\n",
       "       'recurrentgemma', 'recurrentgemma-it',\n",
       "       'redpajama-incite-base-v0.1', 'redpajama-incite-base-v1',\n",
       "       'rwkv-4-pile', 'sauerkrautlm-gemma', 'smollm', 'smollm-instruct',\n",
       "       'stablelm-2-1_', 'stablelm-2-1_-chat', 'stablelm-base-alpha',\n",
       "       'stablelm-base-alpha-v2', 'starcoder2', 'starcoderbase',\n",
       "       'wizardlm-v1.0', 'xglm', 'yi', 'yi-1.5', 'yi-1.5-chat', 'yi-200k',\n",
       "       'yi-chat'], dtype=object)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(cons_lb['Family2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "144df4a9-b55d-4c0a-8839-44ad6fb28e25",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['bloom', 'codegen', 'codellama', 'deepseek-coder', 'falcon',\n",
       "       'gemma', 'gemma-2', 'gpt-2', 'gpt-neo/j', 'internlm2', 'llama',\n",
       "       'llama-2', 'meta-llama-3', 'mixtral-8x-v0.1', 'mpt', 'olmo',\n",
       "       'openllama', 'openllamav2', 'opt', 'phi', 'pythia', 'qwen',\n",
       "       'qwen1.5', 'qwen2', 'recurrentgemma', 'redpajama-incite-base',\n",
       "       'rwkv', 'smollm', 'stablelm', 'starcoder', 'starcoder2', 'xglm',\n",
       "       'yi', 'yi-1.5', 'yi-200k'], dtype=object)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(cons_lb['Family'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "e395a9d1-e0da-44b1-9fba-d97eb5f3ff8e",
   "metadata": {},
   "outputs": [],
   "source": [
    "cons_lb.to_csv('data_v2.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "480d944d-b1de-49d2-ab3d-6b13c31551b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Family</th>\n",
       "      <th>Instruct</th>\n",
       "      <th>date</th>\n",
       "      <th>#Params (B)</th>\n",
       "      <th>Pretraining Data Size (T)</th>\n",
       "      <th>FLOPs (1E21)</th>\n",
       "      <th>IFEval</th>\n",
       "      <th>BBH</th>\n",
       "      <th>MATH Lvl 5</th>\n",
       "      <th>...</th>\n",
       "      <th>hendrycksTest-virology</th>\n",
       "      <th>hendrycksTest-world_religions</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>ARC</th>\n",
       "      <th>HellaSwag</th>\n",
       "      <th>Winogrande</th>\n",
       "      <th>TruthfulQA</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>HumanEval</th>\n",
       "      <th>Family2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>llama-3-sauerkrautlm-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-07-21</td>\n",
       "      <td>70.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>6300.0</td>\n",
       "      <td>0.80</td>\n",
       "      <td>0.67</td>\n",
       "      <td>0.22</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>llama-3-sauerkrautlm-instruct</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>llama-3-sauerkrautlm-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-04-23</td>\n",
       "      <td>8.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>720.0</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.49</td>\n",
       "      <td>0.06</td>\n",
       "      <td>...</td>\n",
       "      <td>0.481928</td>\n",
       "      <td>0.777778</td>\n",
       "      <td>0.680700</td>\n",
       "      <td>0.7372</td>\n",
       "      <td>0.894100</td>\n",
       "      <td>0.800300</td>\n",
       "      <td>0.662500</td>\n",
       "      <td>0.649700</td>\n",
       "      <td>NaN</td>\n",
       "      <td>llama-3-sauerkrautlm-instruct</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>meta-llama-3-70b</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-04-21</td>\n",
       "      <td>70.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>6300.0</td>\n",
       "      <td>0.16</td>\n",
       "      <td>0.65</td>\n",
       "      <td>0.17</td>\n",
       "      <td>...</td>\n",
       "      <td>0.584337</td>\n",
       "      <td>0.906433</td>\n",
       "      <td>0.792329</td>\n",
       "      <td>0.6877</td>\n",
       "      <td>0.879805</td>\n",
       "      <td>0.853197</td>\n",
       "      <td>0.455624</td>\n",
       "      <td>0.768764</td>\n",
       "      <td>0.52439</td>\n",
       "      <td>meta-llama-3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>meta-llama-3-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-04-21</td>\n",
       "      <td>70.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>6300.0</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.65</td>\n",
       "      <td>0.23</td>\n",
       "      <td>...</td>\n",
       "      <td>0.566265</td>\n",
       "      <td>0.900585</td>\n",
       "      <td>0.800600</td>\n",
       "      <td>0.7142</td>\n",
       "      <td>0.856900</td>\n",
       "      <td>0.828700</td>\n",
       "      <td>0.618100</td>\n",
       "      <td>0.854400</td>\n",
       "      <td>0.77400</td>\n",
       "      <td>meta-llama-3-instruct</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>meta-llama-3-8b</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-04-19</td>\n",
       "      <td>8.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>720.0</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.03</td>\n",
       "      <td>...</td>\n",
       "      <td>0.560241</td>\n",
       "      <td>0.830409</td>\n",
       "      <td>0.664950</td>\n",
       "      <td>0.6024</td>\n",
       "      <td>0.820155</td>\n",
       "      <td>0.771113</td>\n",
       "      <td>0.439523</td>\n",
       "      <td>0.453374</td>\n",
       "      <td>0.33500</td>\n",
       "      <td>meta-llama-3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>meta-llama-3-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-04-19</td>\n",
       "      <td>8.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>720.0</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.09</td>\n",
       "      <td>...</td>\n",
       "      <td>0.512048</td>\n",
       "      <td>0.777778</td>\n",
       "      <td>0.670700</td>\n",
       "      <td>0.6075</td>\n",
       "      <td>0.785500</td>\n",
       "      <td>0.745100</td>\n",
       "      <td>0.516500</td>\n",
       "      <td>0.686900</td>\n",
       "      <td>0.61600</td>\n",
       "      <td>meta-llama-3-instruct</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6 rows × 118 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                Model        Family Instruct        date  \\\n",
       "57  llama-3-sauerkrautlm-70b-instruct  meta-llama-3     True  2024-07-21   \n",
       "58   llama-3-sauerkrautlm-8b-instruct  meta-llama-3     True  2024-04-23   \n",
       "62                   meta-llama-3-70b  meta-llama-3    False  2024-04-21   \n",
       "63          meta-llama-3-70b-instruct  meta-llama-3     True  2024-04-21   \n",
       "64                    meta-llama-3-8b  meta-llama-3    False  2024-04-19   \n",
       "65           meta-llama-3-8b-instruct  meta-llama-3     True  2024-04-19   \n",
       "\n",
       "    #Params (B)  Pretraining Data Size (T)  FLOPs (1E21)  IFEval   BBH  \\\n",
       "57         70.0                       15.0        6300.0    0.80  0.67   \n",
       "58          8.0                       15.0         720.0    0.74  0.49   \n",
       "62         70.0                       15.0        6300.0    0.16  0.65   \n",
       "63         70.0                       15.0        6300.0    0.81  0.65   \n",
       "64          8.0                       15.0         720.0    0.15  0.46   \n",
       "65          8.0                       15.0         720.0    0.74  0.50   \n",
       "\n",
       "    MATH Lvl 5  ...  hendrycksTest-virology  hendrycksTest-world_religions  \\\n",
       "57        0.22  ...                     NaN                            NaN   \n",
       "58        0.06  ...                0.481928                       0.777778   \n",
       "62        0.17  ...                0.584337                       0.906433   \n",
       "63        0.23  ...                0.566265                       0.900585   \n",
       "64        0.03  ...                0.560241                       0.830409   \n",
       "65        0.09  ...                0.512048                       0.777778   \n",
       "\n",
       "        MMLU     ARC  HellaSwag  Winogrande  TruthfulQA     GSM8K  HumanEval  \\\n",
       "57       NaN     NaN        NaN         NaN         NaN       NaN        NaN   \n",
       "58  0.680700  0.7372   0.894100    0.800300    0.662500  0.649700        NaN   \n",
       "62  0.792329  0.6877   0.879805    0.853197    0.455624  0.768764    0.52439   \n",
       "63  0.800600  0.7142   0.856900    0.828700    0.618100  0.854400    0.77400   \n",
       "64  0.664950  0.6024   0.820155    0.771113    0.439523  0.453374    0.33500   \n",
       "65  0.670700  0.6075   0.785500    0.745100    0.516500  0.686900    0.61600   \n",
       "\n",
       "                          Family2  \n",
       "57  llama-3-sauerkrautlm-instruct  \n",
       "58  llama-3-sauerkrautlm-instruct  \n",
       "62                   meta-llama-3  \n",
       "63          meta-llama-3-instruct  \n",
       "64                   meta-llama-3  \n",
       "65          meta-llama-3-instruct  \n",
       "\n",
       "[6 rows x 118 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.loc[cons_lb.Family=='meta-llama-3']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5568cdb3-8e7d-4cac-b65c-15087ae66871",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "id": "4be50440-77df-461e-96ad-fd1db64a6220",
   "metadata": {},
   "outputs": [],
   "source": [
    "cons_lb = cons_lb.sort_values(by=['Family','#Params (B)','Instruct','Model'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "af18021a-006e-42d2-b29e-30f6b8464843",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                    Family                              Model  #Params (B) Instruct\n",
      "3                    bloom                         bloom-560m        0.560    False\n",
      "1                    bloom                          bloom-1b1        1.000    False\n",
      "2                    bloom                           bloom-3b        3.000    False\n",
      "4                    bloom                          bloom-7b1        7.000    False\n",
      "0                    bloom                              bloom      176.000    False\n",
      "8                  codegen                      codegen-6b-nl        6.000    False\n",
      "7                  codegen                     codegen-16b-nl       16.000    False\n",
      "15               codellama                       codellama-7b        7.000    False\n",
      "16               codellama              codellama-7b-instruct        7.000     True\n",
      "9                codellama                      codellama-13b       13.000    False\n",
      "10               codellama             codellama-13b-instruct       13.000     True\n",
      "11               codellama                      codellama-34b       34.000    False\n",
      "12               codellama             codellama-34b-instruct       34.000     True\n",
      "13               codellama                      codellama-70b       70.000    False\n",
      "14               codellama             codellama-70b-instruct       70.000     True\n",
      "17          deepseek-coder           deepseek-coder-1.3b-base        1.300    False\n",
      "19          deepseek-coder           deepseek-coder-6.7b-base        6.700    False\n",
      "18          deepseek-coder            deepseek-coder-33b-base       33.000    False\n",
      "32                  falcon                       falcon-rw-1b        1.000    False\n",
      "30                  falcon                          falcon-7b        7.000    False\n",
      "31                  falcon                 falcon-7b-instruct        7.000     True\n",
      "28                  falcon                         falcon-40b       40.000    False\n",
      "29                  falcon                falcon-40b-instruct       40.000     True\n",
      "27                  falcon                        falcon-180b      180.000    False\n",
      "37                   gemma                           gemma-2b        2.000    False\n",
      "38                   gemma                        gemma-2b-it        2.000     True\n",
      "149                  gemma              sauerkrautlm-gemma-2b        2.000     True\n",
      "39                   gemma                           gemma-7b        7.000    False\n",
      "40                   gemma                        gemma-7b-it        7.000     True\n",
      "150                  gemma              sauerkrautlm-gemma-7b        7.000     True\n",
      "33                 gemma-2                         gemma-2-2b        2.000    False\n",
      "34                 gemma-2                      gemma-2-2b-it        2.000     True\n",
      "35                 gemma-2                         gemma-2-9b        9.000    False\n",
      "36                 gemma-2                      gemma-2-9b-it        9.000     True\n",
      "46                   gpt-2                               gpt2        0.124    False\n",
      "47                   gpt-2                         gpt2-large        0.774    False\n",
      "43               gpt-neo/j                       gpt-neo-125m        0.125    False\n",
      "42               gpt-neo/j                       gpt-neo-1.3b        1.300    False\n",
      "44               gpt-neo/j                       gpt-neo-2.7b        2.700    False\n",
      "41               gpt-neo/j                           gpt-j-6b        6.000    False\n",
      "45               gpt-neo/j                       gpt-neox-20b       20.000    False\n",
      "49               internlm2                       internlm2-7b        7.000    False\n",
      "48               internlm2                      internlm2-20b       20.000    False\n",
      "61                   llama                           llama-7b        7.000    False\n",
      "50                   llama                          llama-13b       13.000    False\n",
      "59                   llama                          llama-30b       30.000    False\n",
      "60                   llama                          llama-65b       65.000    False\n",
      "55                 llama-2                         llama-2-7b        7.000    False\n",
      "56                 llama-2                    llama-2-7b-chat        7.000     True\n",
      "84                 llama-2                      openhermes-7b        7.000     True\n",
      "94                 llama-2                          orca-2-7b        7.000     True\n",
      "97                 llama-2                    orca_mini_v3_7b        7.000     True\n",
      "51                 llama-2                        llama-2-13b       13.000    False\n",
      "52                 llama-2                   llama-2-13b-chat       13.000     True\n",
      "83                 llama-2                     openhermes-13b       13.000     True\n",
      "93                 llama-2                         orca-2-13b       13.000     True\n",
      "95                 llama-2                   orca_mini_v3_13b       13.000     True\n",
      "169                llama-2                  wizardlm-13b-v1.0       13.000     True\n",
      "53                 llama-2                        llama-2-70b       70.000    False\n",
      "54                 llama-2                   llama-2-70b-chat       70.000     True\n",
      "96                 llama-2                   orca_mini_v3_70b       70.000     True\n",
      "170                llama-2                  wizardlm-70b-v1.0       70.000     True\n",
      "64            meta-llama-3                    meta-llama-3-8b        8.000    False\n",
      "58            meta-llama-3   llama-3-sauerkrautlm-8b-instruct        8.000     True\n",
      "65            meta-llama-3           meta-llama-3-8b-instruct        8.000     True\n",
      "62            meta-llama-3                   meta-llama-3-70b       70.000    False\n",
      "57            meta-llama-3  llama-3-sauerkrautlm-70b-instruct       70.000     True\n",
      "63            meta-llama-3          meta-llama-3-70b-instruct       70.000     True\n",
      "69         mixtral-8x-v0.1                  mixtral-8x7b-v0.1        7.000    False\n",
      "68         mixtral-8x-v0.1         mixtral-8x7b-instruct-v0.1        7.000     True\n",
      "67         mixtral-8x-v0.1                 mixtral-8x22b-v0.1       22.000    False\n",
      "66         mixtral-8x-v0.1        mixtral-8x22b-instruct-v0.1       22.000     True\n",
      "73                     mpt                             mpt-7b        7.000    False\n",
      "74                     mpt                        mpt-7b-chat        7.000     True\n",
      "75                     mpt                    mpt-7b-instruct        7.000     True\n",
      "70                     mpt                            mpt-30b       30.000    False\n",
      "71                     mpt                       mpt-30b-chat       30.000     True\n",
      "72                     mpt                   mpt-30b-instruct       30.000     True\n",
      "76                    olmo                            olmo-1b        1.000    False\n",
      "77                    olmo                            olmo-7b        7.000    False\n",
      "79               openllama                      open_llama_3b        3.000    False\n",
      "81               openllama                      open_llama_7b        7.000    False\n",
      "78               openllama                     open_llama_13b       13.000    False\n",
      "80             openllamav2                   open_llama_3b_v2        3.000    False\n",
      "82             openllamav2                   open_llama_7b_v2        7.000    False\n",
      "86                     opt                           opt-125m        0.125    False\n",
      "90                     opt                           opt-350m        0.350    False\n",
      "85                     opt                           opt-1.3b        1.300    False\n",
      "88                     opt                           opt-2.7b        2.700    False\n",
      "91                     opt                           opt-6.7b        6.700    False\n",
      "87                     opt                            opt-13b       13.000    False\n",
      "89                     opt                            opt-30b       30.000    False\n",
      "92                     opt                            opt-66b       66.000    False\n",
      "100                    phi                            phi-1_5        1.300    False\n",
      "101                    phi                              phi-2        2.700    False\n",
      "109                 pythia                         pythia-70m        0.070    False\n",
      "104                 pythia                        pythia-160m        0.160    False\n",
      "107                 pythia                        pythia-410m        0.410    False\n",
      "105                 pythia                          pythia-1b        1.000    False\n",
      "102                 pythia                        pythia-1.4b        1.400    False\n",
      "106                 pythia                        pythia-2.8b        2.800    False\n",
      "21                  pythia                        dolly-v2-3b        2.800     True\n",
      "108                 pythia                        pythia-6.9b        6.900    False\n",
      "22                  pythia                        dolly-v2-7b        6.900     True\n",
      "103                 pythia                         pythia-12b       12.000    False\n",
      "20                  pythia                       dolly-v2-12b       12.000     True\n",
      "112                   qwen                            qwen-7b        7.000    False\n",
      "110                   qwen                           qwen-14b       14.000    False\n",
      "111                   qwen                           qwen-72b       72.000    False\n",
      "113                qwen1.5                       qwen1.5-0.5b        0.500    False\n",
      "114                qwen1.5                  qwen1.5-0.5b-chat        0.500     True\n",
      "115                qwen1.5                       qwen1.5-1.8b        1.800    False\n",
      "116                qwen1.5                  qwen1.5-1.8b-chat        1.800     True\n",
      "123                qwen1.5                         qwen1.5-4b        4.000    False\n",
      "124                qwen1.5                    qwen1.5-4b-chat        4.000     True\n",
      "127                qwen1.5                         qwen1.5-7b        7.000    False\n",
      "128                qwen1.5                    qwen1.5-7b-chat        7.000     True\n",
      "119                qwen1.5                        qwen1.5-14b       14.000    False\n",
      "120                qwen1.5                   qwen1.5-14b-chat       14.000     True\n",
      "121                qwen1.5                        qwen1.5-32b       32.000    False\n",
      "122                qwen1.5                   qwen1.5-32b-chat       32.000     True\n",
      "125                qwen1.5                        qwen1.5-72b       72.000    False\n",
      "126                qwen1.5                   qwen1.5-72b-chat       72.000     True\n",
      "117                qwen1.5                       qwen1.5-110b      110.000    False\n",
      "118                qwen1.5                  qwen1.5-110b-chat      110.000     True\n",
      "129                  qwen2                         qwen2-0.5b        0.500    False\n",
      "130                  qwen2                qwen2-0.5b-instruct        0.500     True\n",
      "131                  qwen2                         qwen2-1.5b        1.500    False\n",
      "132                  qwen2                qwen2-1.5b-instruct        1.500     True\n",
      "135                  qwen2                           qwen2-7b        7.000    False\n",
      "26                   qwen2             dolphin-2.9.2-qwen2-7b        7.000     True\n",
      "99                   qwen2                    orca_mini_v7_7b        7.000     True\n",
      "136                  qwen2                  qwen2-7b-instruct        7.000     True\n",
      "133                  qwen2                          qwen2-72b       72.000    False\n",
      "25                   qwen2            dolphin-2.9.2-qwen2-72b       72.000     True\n",
      "98                   qwen2                   orca_mini_v7_72b       72.000     True\n",
      "134                  qwen2                 qwen2-72b-instruct       72.000     True\n",
      "137         recurrentgemma                  recurrentgemma-2b        2.000    False\n",
      "138         recurrentgemma               recurrentgemma-2b-it        2.000     True\n",
      "139         recurrentgemma                  recurrentgemma-9b        9.000    False\n",
      "140         recurrentgemma               recurrentgemma-9b-it        9.000     True\n",
      "141  redpajama-incite-base        redpajama-incite-base-3b-v1        3.000    False\n",
      "142  redpajama-incite-base      redpajama-incite-base-7b-v0.1        7.000    False\n",
      "144                   rwkv                   rwkv-4-169m-pile        0.169    False\n",
      "147                   rwkv                   rwkv-4-430m-pile        0.430    False\n",
      "145                   rwkv                    rwkv-4-1b5-pile        1.000    False\n",
      "146                   rwkv                     rwkv-4-3b-pile        3.000    False\n",
      "148                   rwkv                     rwkv-4-7b-pile        7.000    False\n",
      "143                   rwkv                    rwkv-4-14b-pile       14.000    False\n",
      "153                 smollm                        smollm-135m        0.135    False\n",
      "154                 smollm               smollm-135m-instruct        0.135     True\n",
      "155                 smollm                        smollm-360m        0.360    False\n",
      "156                 smollm               smollm-360m-instruct        0.360     True\n",
      "151                 smollm                        smollm-1.7b        1.700    False\n",
      "152                 smollm               smollm-1.7b-instruct        1.700     True\n",
      "159               stablelm             stablelm-base-alpha-3b        3.000    False\n",
      "157               stablelm                    stablelm-2-1_6b        6.000    False\n",
      "158               stablelm               stablelm-2-1_6b-chat        6.000     True\n",
      "160               stablelm             stablelm-base-alpha-7b        7.000    False\n",
      "161               stablelm          stablelm-base-alpha-7b-v2        7.000    False\n",
      "166              starcoder                   starcoderbase-1b        1.000    False\n",
      "167              starcoder                   starcoderbase-3b        3.000    False\n",
      "168              starcoder                   starcoderbase-7b        7.000    False\n",
      "165              starcoder                      starcoderbase       15.500    False\n",
      "163             starcoder2                      starcoder2-3b        3.000    False\n",
      "164             starcoder2                      starcoder2-7b        7.000    False\n",
      "162             starcoder2                     starcoder2-15b       15.000    False\n",
      "173                   xglm                          xglm-564m        0.564    False\n",
      "171                   xglm                          xglm-1.7b        1.700    False\n",
      "172                   xglm                          xglm-4.5b        4.500    False\n",
      "174                   xglm                          xglm-7.5b        7.500    False\n",
      "184                     yi                              yi-6b        6.000    False\n",
      "186                     yi                         yi-6b-chat        6.000     True\n",
      "187                     yi                              yi-9b        9.000    False\n",
      "181                     yi                             yi-34b       34.000    False\n",
      "183                     yi                        yi-34b-chat       34.000     True\n",
      "177                 yi-1.5                          yi-1.5-6b        6.000    False\n",
      "178                 yi-1.5                     yi-1.5-6b-chat        6.000     True\n",
      "179                 yi-1.5                          yi-1.5-9b        9.000    False\n",
      "6                   yi-1.5                    blossom-v5.1-9b        9.000     True\n",
      "24                  yi-1.5            dolphin-2.9.1-yi-1.5-9b        9.000     True\n",
      "180                 yi-1.5                     yi-1.5-9b-chat        9.000     True\n",
      "175                 yi-1.5                         yi-1.5-34b       34.000    False\n",
      "5                   yi-1.5                   blossom-v5.1-34b       34.000     True\n",
      "23                  yi-1.5           dolphin-2.9.1-yi-1.5-34b       34.000     True\n",
      "176                 yi-1.5                    yi-1.5-34b-chat       34.000     True\n",
      "185                yi-200k                         yi-6b-200k        6.000    False\n",
      "182                yi-200k                        yi-34b-200k       34.000    False\n"
     ]
    }
   ],
   "source": [
    "print(cons_lb.loc[:,['Family','Model','#Params (B)','Instruct']].to_string())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f95d2977-f085-49d0-9083-be642ad23e65",
   "metadata": {},
   "source": [
    "Exploring the data 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "id": "6c83b8a2-759b-4695-86a1-688ebe7a8044",
   "metadata": {},
   "outputs": [],
   "source": [
    "ind = np.array(cons_lb.Instruct)\n",
    "ind = (1-ind).astype(bool)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "id": "384a73cf-5588-4f35-a404-df7180ee513a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(17,\n",
       " (array(['bloom', 'falcon', 'gemma', 'gemma-2', 'gpt-neo/j', 'llama',\n",
       "         'llama-2', 'meta-llama-3', 'olmo', 'opt', 'pythia', 'qwen1.5',\n",
       "         'qwen2', 'recurrentgemma', 'smollm', 'starcoder2', 'yi', 'yi-1.5'],\n",
       "        dtype=object),\n",
       "  array([4, 2, 2, 2, 2, 3, 3, 2, 2, 2, 5, 6, 4, 1, 3, 3, 3, 3])))"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vars1 = ['Model', 'Family', 'Instruct','Pretraining Data Size (T)','#Params (B)',\n",
    "        'ARC', 'HellaSwag', 'TruthfulQA', 'GSM8K', 'Winogrande']\n",
    "np.sum(np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)[1]>1), np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "id": "263de77a-076d-4f3c-9708-64d98437ad88",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(17,\n",
       " (array(['bloom', 'falcon', 'gemma', 'gemma-2', 'gpt-neo/j', 'llama',\n",
       "         'llama-2', 'meta-llama-3', 'olmo', 'opt', 'pythia', 'qwen1.5',\n",
       "         'qwen2', 'recurrentgemma', 'smollm', 'starcoder2', 'yi', 'yi-1.5'],\n",
       "        dtype=object),\n",
       "  array([4, 2, 2, 2, 2, 3, 3, 2, 2, 2, 5, 6, 4, 1, 3, 3, 3, 3])))"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vars2 = ['Model', 'Family', 'Instruct','Pretraining Data Size (T)','#Params (B)',\n",
    "        'IFEval', 'BBH', 'MATH Lvl 5', 'GPQA', 'MUSR', 'MMLU-PRO']\n",
    "np.sum(np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)[1]>1), np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "8f23fec6-800d-48fb-8395-cd476489c95f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(17,\n",
       " (array(['bloom', 'falcon', 'gemma', 'gemma-2', 'gpt-neo/j', 'llama',\n",
       "         'llama-2', 'meta-llama-3', 'olmo', 'opt', 'pythia', 'qwen1.5',\n",
       "         'qwen2', 'recurrentgemma', 'smollm', 'starcoder2', 'yi', 'yi-1.5'],\n",
       "        dtype=object),\n",
       "  array([4, 2, 2, 2, 2, 3, 3, 2, 2, 2, 5, 6, 4, 1, 3, 3, 3, 3])))"
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vars3 = vars2 + ['ARC', 'HellaSwag', 'TruthfulQA', 'GSM8K', 'Winogrande','MMLU']\n",
    "np.sum(np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)[1]>1), np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "b2e68c8d-061d-4dd9-b02c-8fbe95ae0823",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(17,\n",
       " (array(['bloom', 'falcon', 'gemma', 'gemma-2', 'gpt-neo/j', 'llama',\n",
       "         'llama-2', 'meta-llama-3', 'olmo', 'opt', 'pythia', 'qwen1.5',\n",
       "         'qwen2', 'recurrentgemma', 'smollm', 'starcoder2', 'yi', 'yi-1.5'],\n",
       "        dtype=object),\n",
       "  array([4, 2, 2, 2, 2, 3, 3, 2, 2, 2, 5, 6, 4, 1, 3, 3, 3, 3])))"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vars4 = vars3 + ['HumanEval']\n",
    "np.sum(np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)[1]>1), np.unique(cons_lb.loc[ind,vars].dropna().Family, return_counts=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ac3207f-4290-48d9-8b44-58e9492636e4",
   "metadata": {},
   "source": [
    "Exploring the data 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "id": "ac741023-0e52-4186-b549-544d55483bed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Model',\n",
       " 'Family',\n",
       " 'Instruct',\n",
       " 'Pretraining Data Size (T)',\n",
       " '#Params (B)',\n",
       " 'ARC',\n",
       " 'HellaSwag',\n",
       " 'TruthfulQA',\n",
       " 'GSM8K',\n",
       " 'Winogrande']"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ind = np.array(cons_lb.Instruct)\n",
    "vars1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "f403ab5a-ab17-4a65-aa9c-6a10bc3d60b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Family</th>\n",
       "      <th>Instruct</th>\n",
       "      <th>Pretraining Data Size (T)</th>\n",
       "      <th>#Params (B)</th>\n",
       "      <th>ARC</th>\n",
       "      <th>HellaSwag</th>\n",
       "      <th>TruthfulQA</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>Winogrande</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>codellama-70b-instruct</td>\n",
       "      <td>codellama</td>\n",
       "      <td>True</td>\n",
       "      <td>3.02</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.550300</td>\n",
       "      <td>0.772400</td>\n",
       "      <td>0.504400</td>\n",
       "      <td>0.462500</td>\n",
       "      <td>0.745100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>falcon-7b-instruct</td>\n",
       "      <td>falcon</td>\n",
       "      <td>True</td>\n",
       "      <td>1.50</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.458200</td>\n",
       "      <td>0.707800</td>\n",
       "      <td>0.440700</td>\n",
       "      <td>0.046200</td>\n",
       "      <td>0.680300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>gemma-2b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.439400</td>\n",
       "      <td>0.627000</td>\n",
       "      <td>0.458200</td>\n",
       "      <td>0.054600</td>\n",
       "      <td>0.609300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>sauerkrautlm-gemma-2b</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.487200</td>\n",
       "      <td>0.714100</td>\n",
       "      <td>0.357700</td>\n",
       "      <td>0.267600</td>\n",
       "      <td>0.679600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>gemma-7b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.514500</td>\n",
       "      <td>0.719600</td>\n",
       "      <td>0.472900</td>\n",
       "      <td>0.291900</td>\n",
       "      <td>0.679600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>sauerkrautlm-gemma-7b</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.599800</td>\n",
       "      <td>0.819100</td>\n",
       "      <td>0.610000</td>\n",
       "      <td>0.636800</td>\n",
       "      <td>0.766400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>llama-2-7b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.529010</td>\n",
       "      <td>0.785501</td>\n",
       "      <td>0.455704</td>\n",
       "      <td>0.073541</td>\n",
       "      <td>0.717443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>openhermes-7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.561400</td>\n",
       "      <td>0.783200</td>\n",
       "      <td>0.450000</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>0.745100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>orca-2-7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.541000</td>\n",
       "      <td>0.761900</td>\n",
       "      <td>0.524500</td>\n",
       "      <td>0.147100</td>\n",
       "      <td>0.734800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>orca_mini_v3_7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.569100</td>\n",
       "      <td>0.796400</td>\n",
       "      <td>0.505100</td>\n",
       "      <td>0.071300</td>\n",
       "      <td>0.742700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>llama-2-13b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.590444</td>\n",
       "      <td>0.819359</td>\n",
       "      <td>0.441179</td>\n",
       "      <td>0.152388</td>\n",
       "      <td>0.745067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>openhermes-13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.598100</td>\n",
       "      <td>0.822400</td>\n",
       "      <td>0.460100</td>\n",
       "      <td>0.116000</td>\n",
       "      <td>0.754500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>orca-2-13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.609200</td>\n",
       "      <td>0.798500</td>\n",
       "      <td>0.564200</td>\n",
       "      <td>0.378300</td>\n",
       "      <td>0.765600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>orca_mini_v3_13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.631400</td>\n",
       "      <td>0.823500</td>\n",
       "      <td>0.518100</td>\n",
       "      <td>0.131200</td>\n",
       "      <td>0.764800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>llama-2-70b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.645904</td>\n",
       "      <td>0.858793</td>\n",
       "      <td>0.528047</td>\n",
       "      <td>0.266869</td>\n",
       "      <td>0.805051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>llama-3-sauerkrautlm-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.737200</td>\n",
       "      <td>0.894100</td>\n",
       "      <td>0.662500</td>\n",
       "      <td>0.649700</td>\n",
       "      <td>0.800300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>meta-llama-3-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.607500</td>\n",
       "      <td>0.785500</td>\n",
       "      <td>0.516500</td>\n",
       "      <td>0.686900</td>\n",
       "      <td>0.745100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>meta-llama-3-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.714200</td>\n",
       "      <td>0.856900</td>\n",
       "      <td>0.618100</td>\n",
       "      <td>0.854400</td>\n",
       "      <td>0.828700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>mpt-7b-chat</td>\n",
       "      <td>mpt</td>\n",
       "      <td>True</td>\n",
       "      <td>1.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.465000</td>\n",
       "      <td>0.755100</td>\n",
       "      <td>0.401600</td>\n",
       "      <td>0.040900</td>\n",
       "      <td>0.684300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>mpt-7b-instruct</td>\n",
       "      <td>mpt</td>\n",
       "      <td>True</td>\n",
       "      <td>1.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.503400</td>\n",
       "      <td>0.779100</td>\n",
       "      <td>0.350800</td>\n",
       "      <td>0.028100</td>\n",
       "      <td>0.704800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>mpt-30b-chat</td>\n",
       "      <td>mpt</td>\n",
       "      <td>True</td>\n",
       "      <td>1.00</td>\n",
       "      <td>30.0</td>\n",
       "      <td>0.587000</td>\n",
       "      <td>0.825400</td>\n",
       "      <td>0.524200</td>\n",
       "      <td>0.121300</td>\n",
       "      <td>0.753000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>mpt-30b-instruct</td>\n",
       "      <td>mpt</td>\n",
       "      <td>True</td>\n",
       "      <td>1.00</td>\n",
       "      <td>30.0</td>\n",
       "      <td>0.584500</td>\n",
       "      <td>0.843100</td>\n",
       "      <td>0.380500</td>\n",
       "      <td>0.153100</td>\n",
       "      <td>0.751400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>dolly-v2-7b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>6.9</td>\n",
       "      <td>0.445400</td>\n",
       "      <td>0.696400</td>\n",
       "      <td>0.348800</td>\n",
       "      <td>0.011400</td>\n",
       "      <td>0.600600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.424061</td>\n",
       "      <td>0.725254</td>\n",
       "      <td>0.338271</td>\n",
       "      <td>0.012130</td>\n",
       "      <td>0.608524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>qwen1.5-0.5b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.305500</td>\n",
       "      <td>0.440700</td>\n",
       "      <td>0.429500</td>\n",
       "      <td>0.076600</td>\n",
       "      <td>0.546200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>qwen1.5-1.8b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0.387400</td>\n",
       "      <td>0.600200</td>\n",
       "      <td>0.406200</td>\n",
       "      <td>0.190300</td>\n",
       "      <td>0.596700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>qwen1.5-4b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.432600</td>\n",
       "      <td>0.697300</td>\n",
       "      <td>0.447900</td>\n",
       "      <td>0.024300</td>\n",
       "      <td>0.649600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>qwen1.5-7b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.558900</td>\n",
       "      <td>0.785600</td>\n",
       "      <td>0.536500</td>\n",
       "      <td>0.131900</td>\n",
       "      <td>0.678000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>qwen1.5-14b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.587000</td>\n",
       "      <td>0.822700</td>\n",
       "      <td>0.603600</td>\n",
       "      <td>0.306300</td>\n",
       "      <td>0.730900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>qwen1.5-32b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.660400</td>\n",
       "      <td>0.854900</td>\n",
       "      <td>0.669500</td>\n",
       "      <td>0.070500</td>\n",
       "      <td>0.771900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>qwen1.5-72b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.00</td>\n",
       "      <td>72.0</td>\n",
       "      <td>0.685200</td>\n",
       "      <td>0.864200</td>\n",
       "      <td>0.639000</td>\n",
       "      <td>0.203900</td>\n",
       "      <td>0.790800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>recurrentgemma-2b-it</td>\n",
       "      <td>recurrentgemma</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.309700</td>\n",
       "      <td>0.562600</td>\n",
       "      <td>0.428100</td>\n",
       "      <td>0.100800</td>\n",
       "      <td>0.641700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>stablelm-2-1_6b-chat</td>\n",
       "      <td>stablelm</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.435200</td>\n",
       "      <td>0.692400</td>\n",
       "      <td>0.465000</td>\n",
       "      <td>0.388200</td>\n",
       "      <td>0.647200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>yi-34b-chat</td>\n",
       "      <td>yi</td>\n",
       "      <td>True</td>\n",
       "      <td>3.00</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.654400</td>\n",
       "      <td>0.841600</td>\n",
       "      <td>0.553700</td>\n",
       "      <td>0.319200</td>\n",
       "      <td>0.801100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>yi-1.5-6b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.606700</td>\n",
       "      <td>0.788700</td>\n",
       "      <td>0.525700</td>\n",
       "      <td>0.671000</td>\n",
       "      <td>0.735600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>dolphin-2.9.1-yi-1.5-9b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.656100</td>\n",
       "      <td>0.810200</td>\n",
       "      <td>0.537200</td>\n",
       "      <td>0.653500</td>\n",
       "      <td>0.770300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>yi-1.5-9b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.636500</td>\n",
       "      <td>0.809400</td>\n",
       "      <td>0.526700</td>\n",
       "      <td>0.718700</td>\n",
       "      <td>0.771900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>blossom-v5.1-34b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.675800</td>\n",
       "      <td>0.858300</td>\n",
       "      <td>0.618400</td>\n",
       "      <td>0.655800</td>\n",
       "      <td>0.834300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>dolphin-2.9.1-yi-1.5-34b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.693700</td>\n",
       "      <td>0.855300</td>\n",
       "      <td>0.623400</td>\n",
       "      <td>0.730100</td>\n",
       "      <td>0.825600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176</th>\n",
       "      <td>yi-1.5-34b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.704800</td>\n",
       "      <td>0.859700</td>\n",
       "      <td>0.621600</td>\n",
       "      <td>0.716500</td>\n",
       "      <td>0.816100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                Model          Family Instruct  \\\n",
       "14             codellama-70b-instruct       codellama     True   \n",
       "31                 falcon-7b-instruct          falcon     True   \n",
       "38                        gemma-2b-it           gemma     True   \n",
       "149             sauerkrautlm-gemma-2b           gemma     True   \n",
       "40                        gemma-7b-it           gemma     True   \n",
       "150             sauerkrautlm-gemma-7b           gemma     True   \n",
       "56                    llama-2-7b-chat         llama-2     True   \n",
       "84                      openhermes-7b         llama-2     True   \n",
       "94                          orca-2-7b         llama-2     True   \n",
       "97                    orca_mini_v3_7b         llama-2     True   \n",
       "52                   llama-2-13b-chat         llama-2     True   \n",
       "83                     openhermes-13b         llama-2     True   \n",
       "93                         orca-2-13b         llama-2     True   \n",
       "95                   orca_mini_v3_13b         llama-2     True   \n",
       "54                   llama-2-70b-chat         llama-2     True   \n",
       "58   llama-3-sauerkrautlm-8b-instruct    meta-llama-3     True   \n",
       "65           meta-llama-3-8b-instruct    meta-llama-3     True   \n",
       "63          meta-llama-3-70b-instruct    meta-llama-3     True   \n",
       "74                        mpt-7b-chat             mpt     True   \n",
       "75                    mpt-7b-instruct             mpt     True   \n",
       "71                       mpt-30b-chat             mpt     True   \n",
       "72                   mpt-30b-instruct             mpt     True   \n",
       "22                        dolly-v2-7b          pythia     True   \n",
       "20                       dolly-v2-12b          pythia     True   \n",
       "114                 qwen1.5-0.5b-chat         qwen1.5     True   \n",
       "116                 qwen1.5-1.8b-chat         qwen1.5     True   \n",
       "124                   qwen1.5-4b-chat         qwen1.5     True   \n",
       "128                   qwen1.5-7b-chat         qwen1.5     True   \n",
       "120                  qwen1.5-14b-chat         qwen1.5     True   \n",
       "122                  qwen1.5-32b-chat         qwen1.5     True   \n",
       "126                  qwen1.5-72b-chat         qwen1.5     True   \n",
       "138              recurrentgemma-2b-it  recurrentgemma     True   \n",
       "158              stablelm-2-1_6b-chat        stablelm     True   \n",
       "183                       yi-34b-chat              yi     True   \n",
       "178                    yi-1.5-6b-chat          yi-1.5     True   \n",
       "24            dolphin-2.9.1-yi-1.5-9b          yi-1.5     True   \n",
       "180                    yi-1.5-9b-chat          yi-1.5     True   \n",
       "5                    blossom-v5.1-34b          yi-1.5     True   \n",
       "23           dolphin-2.9.1-yi-1.5-34b          yi-1.5     True   \n",
       "176                   yi-1.5-34b-chat          yi-1.5     True   \n",
       "\n",
       "     Pretraining Data Size (T)  #Params (B)       ARC  HellaSwag  TruthfulQA  \\\n",
       "14                        3.02         70.0  0.550300   0.772400    0.504400   \n",
       "31                        1.50          7.0  0.458200   0.707800    0.440700   \n",
       "38                        6.00          2.0  0.439400   0.627000    0.458200   \n",
       "149                       6.00          2.0  0.487200   0.714100    0.357700   \n",
       "40                        6.00          7.0  0.514500   0.719600    0.472900   \n",
       "150                       6.00          7.0  0.599800   0.819100    0.610000   \n",
       "56                        2.00          7.0  0.529010   0.785501    0.455704   \n",
       "84                        2.00          7.0  0.561400   0.783200    0.450000   \n",
       "94                        2.00          7.0  0.541000   0.761900    0.524500   \n",
       "97                        2.00          7.0  0.569100   0.796400    0.505100   \n",
       "52                        2.00         13.0  0.590444   0.819359    0.441179   \n",
       "83                        2.00         13.0  0.598100   0.822400    0.460100   \n",
       "93                        2.00         13.0  0.609200   0.798500    0.564200   \n",
       "95                        2.00         13.0  0.631400   0.823500    0.518100   \n",
       "54                        2.00         70.0  0.645904   0.858793    0.528047   \n",
       "58                       15.00          8.0  0.737200   0.894100    0.662500   \n",
       "65                       15.00          8.0  0.607500   0.785500    0.516500   \n",
       "63                       15.00         70.0  0.714200   0.856900    0.618100   \n",
       "74                        1.00          7.0  0.465000   0.755100    0.401600   \n",
       "75                        1.00          7.0  0.503400   0.779100    0.350800   \n",
       "71                        1.00         30.0  0.587000   0.825400    0.524200   \n",
       "72                        1.00         30.0  0.584500   0.843100    0.380500   \n",
       "22                        0.25          6.9  0.445400   0.696400    0.348800   \n",
       "20                        0.25         12.0  0.424061   0.725254    0.338271   \n",
       "114                       2.40          0.5  0.305500   0.440700    0.429500   \n",
       "116                       2.40          1.8  0.387400   0.600200    0.406200   \n",
       "124                       2.40          4.0  0.432600   0.697300    0.447900   \n",
       "128                       4.00          7.0  0.558900   0.785600    0.536500   \n",
       "120                       4.00         14.0  0.587000   0.822700    0.603600   \n",
       "122                       4.00         32.0  0.660400   0.854900    0.669500   \n",
       "126                       3.00         72.0  0.685200   0.864200    0.639000   \n",
       "138                       2.00          2.0  0.309700   0.562600    0.428100   \n",
       "158                       2.00          6.0  0.435200   0.692400    0.465000   \n",
       "183                       3.00         34.0  0.654400   0.841600    0.553700   \n",
       "178                       3.50          6.0  0.606700   0.788700    0.525700   \n",
       "24                        3.50          9.0  0.656100   0.810200    0.537200   \n",
       "180                       3.50          9.0  0.636500   0.809400    0.526700   \n",
       "5                         3.50         34.0  0.675800   0.858300    0.618400   \n",
       "23                        3.50         34.0  0.693700   0.855300    0.623400   \n",
       "176                       3.50         34.0  0.704800   0.859700    0.621600   \n",
       "\n",
       "        GSM8K  Winogrande  \n",
       "14   0.462500    0.745100  \n",
       "31   0.046200    0.680300  \n",
       "38   0.054600    0.609300  \n",
       "149  0.267600    0.679600  \n",
       "40   0.291900    0.679600  \n",
       "150  0.636800    0.766400  \n",
       "56   0.073541    0.717443  \n",
       "84   0.050000    0.745100  \n",
       "94   0.147100    0.734800  \n",
       "97   0.071300    0.742700  \n",
       "52   0.152388    0.745067  \n",
       "83   0.116000    0.754500  \n",
       "93   0.378300    0.765600  \n",
       "95   0.131200    0.764800  \n",
       "54   0.266869    0.805051  \n",
       "58   0.649700    0.800300  \n",
       "65   0.686900    0.745100  \n",
       "63   0.854400    0.828700  \n",
       "74   0.040900    0.684300  \n",
       "75   0.028100    0.704800  \n",
       "71   0.121300    0.753000  \n",
       "72   0.153100    0.751400  \n",
       "22   0.011400    0.600600  \n",
       "20   0.012130    0.608524  \n",
       "114  0.076600    0.546200  \n",
       "116  0.190300    0.596700  \n",
       "124  0.024300    0.649600  \n",
       "128  0.131900    0.678000  \n",
       "120  0.306300    0.730900  \n",
       "122  0.070500    0.771900  \n",
       "126  0.203900    0.790800  \n",
       "138  0.100800    0.641700  \n",
       "158  0.388200    0.647200  \n",
       "183  0.319200    0.801100  \n",
       "178  0.671000    0.735600  \n",
       "24   0.653500    0.770300  \n",
       "180  0.718700    0.771900  \n",
       "5    0.655800    0.834300  \n",
       "23   0.730100    0.825600  \n",
       "176  0.716500    0.816100  "
      ]
     },
     "execution_count": 193,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.loc[ind,vars1].dropna().sort_values(by=['Family','#Params (B)'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "7a7876b7-41c7-40a2-89dd-22c1085a32ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Family</th>\n",
       "      <th>Instruct</th>\n",
       "      <th>Pretraining Data Size (T)</th>\n",
       "      <th>#Params (B)</th>\n",
       "      <th>IFEval</th>\n",
       "      <th>BBH</th>\n",
       "      <th>MATH Lvl 5</th>\n",
       "      <th>GPQA</th>\n",
       "      <th>MUSR</th>\n",
       "      <th>MMLU-PRO</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>falcon-7b-instruct</td>\n",
       "      <td>falcon</td>\n",
       "      <td>True</td>\n",
       "      <td>1.50</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>falcon-40b-instruct</td>\n",
       "      <td>falcon</td>\n",
       "      <td>True</td>\n",
       "      <td>1.00</td>\n",
       "      <td>40.000</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.41</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>gemma-2b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.000</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>sauerkrautlm-gemma-2b</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.000</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>gemma-7b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>sauerkrautlm-gemma-7b</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>gemma-2-2b-it</td>\n",
       "      <td>gemma-2</td>\n",
       "      <td>True</td>\n",
       "      <td>8.00</td>\n",
       "      <td>2.000</td>\n",
       "      <td>0.57</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>gemma-2-9b-it</td>\n",
       "      <td>gemma-2</td>\n",
       "      <td>True</td>\n",
       "      <td>13.00</td>\n",
       "      <td>9.000</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.41</td>\n",
       "      <td>0.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>llama-2-7b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>openhermes-7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>orca-2-7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.22</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>orca_mini_v3_7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.41</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>llama-2-13b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>openhermes-13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.000</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>orca-2-13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.000</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.49</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>orca_mini_v3_13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.000</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.47</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>wizardlm-13b-v1.0</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.000</td>\n",
       "      <td>0.19</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>llama-2-70b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>70.000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>orca_mini_v3_70b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>70.000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.59</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>wizardlm-70b-v1.0</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>70.000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0.03</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>llama-3-sauerkrautlm-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.000</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.49</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>meta-llama-3-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.000</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>llama-3-sauerkrautlm-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>70.000</td>\n",
       "      <td>0.80</td>\n",
       "      <td>0.67</td>\n",
       "      <td>0.22</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>meta-llama-3-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>70.000</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.65</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>dolly-v2-3b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.800</td>\n",
       "      <td>0.22</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>dolly-v2-7b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>6.900</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>12.000</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>qwen1.5-0.5b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>0.500</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>qwen1.5-1.8b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1.800</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>qwen1.5-4b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>4.000</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>qwen1.5-7b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>qwen1.5-14b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>14.000</td>\n",
       "      <td>0.48</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>qwen1.5-32b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>32.000</td>\n",
       "      <td>0.55</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.07</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>qwen2-0.5b-instruct</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>12.00</td>\n",
       "      <td>0.500</td>\n",
       "      <td>0.22</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>qwen2-1.5b-instruct</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>1.500</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>dolphin-2.9.2-qwen2-7b</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.49</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>orca_mini_v7_7b</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.53</td>\n",
       "      <td>0.03</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>qwen2-7b-instruct</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.57</td>\n",
       "      <td>0.55</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>dolphin-2.9.2-qwen2-72b</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>72.000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.63</td>\n",
       "      <td>0.21</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>orca_mini_v7_72b</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>72.000</td>\n",
       "      <td>0.59</td>\n",
       "      <td>0.68</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>qwen2-72b-instruct</td>\n",
       "      <td>qwen2</td>\n",
       "      <td>True</td>\n",
       "      <td>7.00</td>\n",
       "      <td>72.000</td>\n",
       "      <td>0.80</td>\n",
       "      <td>0.70</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>recurrentgemma-2b-it</td>\n",
       "      <td>recurrentgemma</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>2.000</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>smollm-135m-instruct</td>\n",
       "      <td>smollm</td>\n",
       "      <td>True</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.135</td>\n",
       "      <td>0.16</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>smollm-360m-instruct</td>\n",
       "      <td>smollm</td>\n",
       "      <td>True</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.360</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>smollm-1.7b-instruct</td>\n",
       "      <td>smollm</td>\n",
       "      <td>True</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.700</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>yi-6b-chat</td>\n",
       "      <td>yi</td>\n",
       "      <td>True</td>\n",
       "      <td>3.00</td>\n",
       "      <td>6.000</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.41</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>yi-1.5-6b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>6.000</td>\n",
       "      <td>0.48</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.13</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>blossom-v5.1-9b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.000</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.53</td>\n",
       "      <td>0.10</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>dolphin-2.9.1-yi-1.5-9b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.000</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.55</td>\n",
       "      <td>0.10</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>yi-1.5-9b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.000</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>blossom-v5.1-34b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.000</td>\n",
       "      <td>0.57</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.14</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>dolphin-2.9.1-yi-1.5-34b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.000</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176</th>\n",
       "      <td>yi-1.5-34b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.000</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.45</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 Model          Family Instruct  \\\n",
       "31                  falcon-7b-instruct          falcon     True   \n",
       "29                 falcon-40b-instruct          falcon     True   \n",
       "38                         gemma-2b-it           gemma     True   \n",
       "149              sauerkrautlm-gemma-2b           gemma     True   \n",
       "40                         gemma-7b-it           gemma     True   \n",
       "150              sauerkrautlm-gemma-7b           gemma     True   \n",
       "34                       gemma-2-2b-it         gemma-2     True   \n",
       "36                       gemma-2-9b-it         gemma-2     True   \n",
       "56                     llama-2-7b-chat         llama-2     True   \n",
       "84                       openhermes-7b         llama-2     True   \n",
       "94                           orca-2-7b         llama-2     True   \n",
       "97                     orca_mini_v3_7b         llama-2     True   \n",
       "52                    llama-2-13b-chat         llama-2     True   \n",
       "83                      openhermes-13b         llama-2     True   \n",
       "93                          orca-2-13b         llama-2     True   \n",
       "95                    orca_mini_v3_13b         llama-2     True   \n",
       "169                  wizardlm-13b-v1.0         llama-2     True   \n",
       "54                    llama-2-70b-chat         llama-2     True   \n",
       "96                    orca_mini_v3_70b         llama-2     True   \n",
       "170                  wizardlm-70b-v1.0         llama-2     True   \n",
       "58    llama-3-sauerkrautlm-8b-instruct    meta-llama-3     True   \n",
       "65            meta-llama-3-8b-instruct    meta-llama-3     True   \n",
       "57   llama-3-sauerkrautlm-70b-instruct    meta-llama-3     True   \n",
       "63           meta-llama-3-70b-instruct    meta-llama-3     True   \n",
       "21                         dolly-v2-3b          pythia     True   \n",
       "22                         dolly-v2-7b          pythia     True   \n",
       "20                        dolly-v2-12b          pythia     True   \n",
       "114                  qwen1.5-0.5b-chat         qwen1.5     True   \n",
       "116                  qwen1.5-1.8b-chat         qwen1.5     True   \n",
       "124                    qwen1.5-4b-chat         qwen1.5     True   \n",
       "128                    qwen1.5-7b-chat         qwen1.5     True   \n",
       "120                   qwen1.5-14b-chat         qwen1.5     True   \n",
       "122                   qwen1.5-32b-chat         qwen1.5     True   \n",
       "130                qwen2-0.5b-instruct           qwen2     True   \n",
       "132                qwen2-1.5b-instruct           qwen2     True   \n",
       "26              dolphin-2.9.2-qwen2-7b           qwen2     True   \n",
       "99                     orca_mini_v7_7b           qwen2     True   \n",
       "136                  qwen2-7b-instruct           qwen2     True   \n",
       "25             dolphin-2.9.2-qwen2-72b           qwen2     True   \n",
       "98                    orca_mini_v7_72b           qwen2     True   \n",
       "134                 qwen2-72b-instruct           qwen2     True   \n",
       "138               recurrentgemma-2b-it  recurrentgemma     True   \n",
       "154               smollm-135m-instruct          smollm     True   \n",
       "156               smollm-360m-instruct          smollm     True   \n",
       "152               smollm-1.7b-instruct          smollm     True   \n",
       "186                         yi-6b-chat              yi     True   \n",
       "178                     yi-1.5-6b-chat          yi-1.5     True   \n",
       "6                      blossom-v5.1-9b          yi-1.5     True   \n",
       "24             dolphin-2.9.1-yi-1.5-9b          yi-1.5     True   \n",
       "180                     yi-1.5-9b-chat          yi-1.5     True   \n",
       "5                     blossom-v5.1-34b          yi-1.5     True   \n",
       "23            dolphin-2.9.1-yi-1.5-34b          yi-1.5     True   \n",
       "176                    yi-1.5-34b-chat          yi-1.5     True   \n",
       "\n",
       "     Pretraining Data Size (T)  #Params (B)  IFEval   BBH  MATH Lvl 5  GPQA  \\\n",
       "31                        1.50        7.000    0.20  0.32        0.01  0.25   \n",
       "29                        1.00       40.000    0.25  0.41        0.02  0.25   \n",
       "38                        6.00        2.000    0.27  0.32        0.00  0.28   \n",
       "149                       6.00        2.000    0.25  0.34        0.02  0.26   \n",
       "40                        6.00        7.000    0.39  0.36        0.02  0.28   \n",
       "150                       6.00        7.000    0.34  0.42        0.05  0.29   \n",
       "34                        8.00        2.000    0.57  0.42        0.00  0.27   \n",
       "36                       13.00        9.000    0.75  0.60        0.00  0.35   \n",
       "56                        2.00        7.000    0.40  0.31        0.01  0.25   \n",
       "84                        2.00        7.000    0.18  0.36        0.01  0.27   \n",
       "94                        2.00        7.000    0.22  0.45        0.01  0.26   \n",
       "97                        2.00        7.000    0.28  0.41        0.00  0.25   \n",
       "52                        2.00       13.000    0.40  0.33        0.01  0.23   \n",
       "83                        2.00       13.000    0.27  0.42        0.01  0.27   \n",
       "93                        2.00       13.000    0.31  0.49        0.01  0.28   \n",
       "95                        2.00       13.000    0.29  0.47        0.02  0.27   \n",
       "169                       2.00       13.000    0.19  0.29        0.00  0.26   \n",
       "54                        2.00       70.000    0.50  0.30        0.01  0.26   \n",
       "96                        2.00       70.000    0.40  0.59        0.04  0.32   \n",
       "170                       2.00       70.000    0.50  0.56        0.03  0.27   \n",
       "58                       15.00        8.000    0.74  0.49        0.06  0.31   \n",
       "65                       15.00        8.000    0.74  0.50        0.09  0.26   \n",
       "57                       15.00       70.000    0.80  0.67        0.22  0.33   \n",
       "63                       15.00       70.000    0.81  0.65        0.23  0.29   \n",
       "21                        0.25        2.800    0.22  0.31        0.01  0.26   \n",
       "22                        0.25        6.900    0.20  0.32        0.01  0.27   \n",
       "20                        0.25       12.000    0.24  0.33        0.01  0.24   \n",
       "114                       2.40        0.500    0.18  0.32        0.00  0.27   \n",
       "116                       2.40        1.800    0.20  0.33        0.00  0.30   \n",
       "124                       2.40        4.000    0.32  0.40        0.01  0.27   \n",
       "128                       4.00        7.000    0.44  0.45        0.00  0.30   \n",
       "120                       4.00       14.000    0.48  0.52        0.00  0.27   \n",
       "122                       4.00       32.000    0.55  0.61        0.07  0.31   \n",
       "130                      12.00        0.500    0.22  0.32        0.02  0.25   \n",
       "132                       7.00        1.500    0.34  0.39        0.06  0.26   \n",
       "26                        7.00        7.000    0.35  0.49        0.12  0.29   \n",
       "99                        7.00        7.000    0.44  0.53        0.03  0.30   \n",
       "136                       7.00        7.000    0.57  0.55        0.09  0.30   \n",
       "25                        7.00       72.000    0.40  0.63        0.21  0.37   \n",
       "98                        7.00       72.000    0.59  0.68        0.26  0.39   \n",
       "134                       7.00       72.000    0.80  0.70        0.35  0.37   \n",
       "138                       2.00        2.000    0.29  0.33        0.02  0.26   \n",
       "154                       0.60        0.135    0.16  0.29        0.00  0.26   \n",
       "156                       0.60        0.360    0.20  0.29        0.00  0.26   \n",
       "152                       1.00        1.700    0.23  0.29        0.00  0.26   \n",
       "186                       3.00        6.000    0.34  0.41        0.01  0.29   \n",
       "178                       3.50        6.000    0.48  0.46        0.13  0.32   \n",
       "6                         3.50        9.000    0.51  0.53        0.10  0.34   \n",
       "24                        3.50        9.000    0.45  0.55        0.10  0.34   \n",
       "180                       3.50        9.000    0.60  0.56        0.12  0.33   \n",
       "5                         3.50       34.000    0.57  0.61        0.14  0.31   \n",
       "23                        3.50       34.000    0.39  0.61        0.15  0.34   \n",
       "176                       3.50       34.000    0.61  0.61        0.23  0.36   \n",
       "\n",
       "     MUSR  MMLU-PRO  \n",
       "31   0.36      0.12  \n",
       "29   0.38      0.23  \n",
       "38   0.33      0.14  \n",
       "149  0.37      0.15  \n",
       "40   0.43      0.17  \n",
       "150  0.36      0.30  \n",
       "34   0.39      0.25  \n",
       "36   0.41      0.39  \n",
       "56   0.37      0.17  \n",
       "84   0.43      0.19  \n",
       "94   0.50      0.23  \n",
       "97   0.50      0.21  \n",
       "52   0.40      0.19  \n",
       "83   0.40      0.24  \n",
       "93   0.51      0.27  \n",
       "95   0.46      0.23  \n",
       "169  0.35      0.12  \n",
       "54   0.37      0.24  \n",
       "96   0.51      0.38  \n",
       "170  0.44      0.34  \n",
       "58   0.42      0.39  \n",
       "65   0.36      0.37  \n",
       "57   0.43      0.54  \n",
       "63   0.42      0.52  \n",
       "21   0.33      0.11  \n",
       "22   0.36      0.11  \n",
       "20   0.37      0.11  \n",
       "114  0.38      0.12  \n",
       "116  0.43      0.18  \n",
       "124  0.40      0.24  \n",
       "128  0.38      0.30  \n",
       "120  0.44      0.36  \n",
       "122  0.42      0.45  \n",
       "130  0.34      0.15  \n",
       "132  0.43      0.25  \n",
       "26   0.42      0.41  \n",
       "99   0.44      0.42  \n",
       "136  0.39      0.38  \n",
       "25   0.45      0.55  \n",
       "98   0.51      0.56  \n",
       "134  0.46      0.54  \n",
       "138  0.34      0.14  \n",
       "154  0.37      0.12  \n",
       "156  0.35      0.12  \n",
       "152  0.35      0.12  \n",
       "186  0.37      0.31  \n",
       "178  0.44      0.32  \n",
       "6    0.40      0.40  \n",
       "24   0.43      0.40  \n",
       "180  0.43      0.40  \n",
       "5    0.39      0.45  \n",
       "23   0.46      0.45  \n",
       "176  0.43      0.45  "
      ]
     },
     "execution_count": 194,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.loc[ind,vars2].dropna().sort_values(by=['Family','#Params (B)'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "6cb59e9d-650e-4cbf-b30a-51ddfc1ccfa7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Family</th>\n",
       "      <th>Instruct</th>\n",
       "      <th>Pretraining Data Size (T)</th>\n",
       "      <th>#Params (B)</th>\n",
       "      <th>IFEval</th>\n",
       "      <th>BBH</th>\n",
       "      <th>MATH Lvl 5</th>\n",
       "      <th>GPQA</th>\n",
       "      <th>MUSR</th>\n",
       "      <th>MMLU-PRO</th>\n",
       "      <th>ARC</th>\n",
       "      <th>HellaSwag</th>\n",
       "      <th>TruthfulQA</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>Winogrande</th>\n",
       "      <th>MMLU</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>falcon-7b-instruct</td>\n",
       "      <td>falcon</td>\n",
       "      <td>True</td>\n",
       "      <td>1.50</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.458200</td>\n",
       "      <td>0.707800</td>\n",
       "      <td>0.440700</td>\n",
       "      <td>0.046200</td>\n",
       "      <td>0.680300</td>\n",
       "      <td>0.256600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>gemma-2b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.14</td>\n",
       "      <td>0.439400</td>\n",
       "      <td>0.627000</td>\n",
       "      <td>0.458200</td>\n",
       "      <td>0.054600</td>\n",
       "      <td>0.609300</td>\n",
       "      <td>0.376500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>sauerkrautlm-gemma-2b</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.487200</td>\n",
       "      <td>0.714100</td>\n",
       "      <td>0.357700</td>\n",
       "      <td>0.267600</td>\n",
       "      <td>0.679600</td>\n",
       "      <td>0.429000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>gemma-7b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.514500</td>\n",
       "      <td>0.719600</td>\n",
       "      <td>0.472900</td>\n",
       "      <td>0.291900</td>\n",
       "      <td>0.679600</td>\n",
       "      <td>0.535200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>sauerkrautlm-gemma-7b</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.599800</td>\n",
       "      <td>0.819100</td>\n",
       "      <td>0.610000</td>\n",
       "      <td>0.636800</td>\n",
       "      <td>0.766400</td>\n",
       "      <td>0.637600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>llama-2-7b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.529010</td>\n",
       "      <td>0.785501</td>\n",
       "      <td>0.455704</td>\n",
       "      <td>0.073541</td>\n",
       "      <td>0.717443</td>\n",
       "      <td>0.470594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>openhermes-7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.19</td>\n",
       "      <td>0.561400</td>\n",
       "      <td>0.783200</td>\n",
       "      <td>0.450000</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>0.745100</td>\n",
       "      <td>0.486200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>orca-2-7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.22</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.541000</td>\n",
       "      <td>0.761900</td>\n",
       "      <td>0.524500</td>\n",
       "      <td>0.147100</td>\n",
       "      <td>0.734800</td>\n",
       "      <td>0.563700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>orca_mini_v3_7b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.41</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.21</td>\n",
       "      <td>0.569100</td>\n",
       "      <td>0.796400</td>\n",
       "      <td>0.505100</td>\n",
       "      <td>0.071300</td>\n",
       "      <td>0.742700</td>\n",
       "      <td>0.523700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>llama-2-13b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.19</td>\n",
       "      <td>0.590444</td>\n",
       "      <td>0.819359</td>\n",
       "      <td>0.441179</td>\n",
       "      <td>0.152388</td>\n",
       "      <td>0.745067</td>\n",
       "      <td>0.541181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>openhermes-13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.598100</td>\n",
       "      <td>0.822400</td>\n",
       "      <td>0.460100</td>\n",
       "      <td>0.116000</td>\n",
       "      <td>0.754500</td>\n",
       "      <td>0.563500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>orca-2-13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.49</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.51</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.609200</td>\n",
       "      <td>0.798500</td>\n",
       "      <td>0.564200</td>\n",
       "      <td>0.378300</td>\n",
       "      <td>0.765600</td>\n",
       "      <td>0.603000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>orca_mini_v3_13b</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.47</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.631400</td>\n",
       "      <td>0.823500</td>\n",
       "      <td>0.518100</td>\n",
       "      <td>0.131200</td>\n",
       "      <td>0.764800</td>\n",
       "      <td>0.565200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>llama-2-70b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.645904</td>\n",
       "      <td>0.858793</td>\n",
       "      <td>0.528047</td>\n",
       "      <td>0.266869</td>\n",
       "      <td>0.805051</td>\n",
       "      <td>0.634535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>llama-3-sauerkrautlm-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.49</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.737200</td>\n",
       "      <td>0.894100</td>\n",
       "      <td>0.662500</td>\n",
       "      <td>0.649700</td>\n",
       "      <td>0.800300</td>\n",
       "      <td>0.680700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>meta-llama-3-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.607500</td>\n",
       "      <td>0.785500</td>\n",
       "      <td>0.516500</td>\n",
       "      <td>0.686900</td>\n",
       "      <td>0.745100</td>\n",
       "      <td>0.670700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>meta-llama-3-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.65</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.714200</td>\n",
       "      <td>0.856900</td>\n",
       "      <td>0.618100</td>\n",
       "      <td>0.854400</td>\n",
       "      <td>0.828700</td>\n",
       "      <td>0.800600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>dolly-v2-7b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>6.9</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.11</td>\n",
       "      <td>0.445400</td>\n",
       "      <td>0.696400</td>\n",
       "      <td>0.348800</td>\n",
       "      <td>0.011400</td>\n",
       "      <td>0.600600</td>\n",
       "      <td>0.251800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.11</td>\n",
       "      <td>0.424061</td>\n",
       "      <td>0.725254</td>\n",
       "      <td>0.338271</td>\n",
       "      <td>0.012130</td>\n",
       "      <td>0.608524</td>\n",
       "      <td>0.258084</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>qwen1.5-0.5b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.305500</td>\n",
       "      <td>0.440700</td>\n",
       "      <td>0.429500</td>\n",
       "      <td>0.076600</td>\n",
       "      <td>0.546200</td>\n",
       "      <td>0.338200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>qwen1.5-1.8b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.387400</td>\n",
       "      <td>0.600200</td>\n",
       "      <td>0.406200</td>\n",
       "      <td>0.190300</td>\n",
       "      <td>0.596700</td>\n",
       "      <td>0.458700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>qwen1.5-4b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>2.40</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.432600</td>\n",
       "      <td>0.697300</td>\n",
       "      <td>0.447900</td>\n",
       "      <td>0.024300</td>\n",
       "      <td>0.649600</td>\n",
       "      <td>0.555500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>qwen1.5-7b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.558900</td>\n",
       "      <td>0.785600</td>\n",
       "      <td>0.536500</td>\n",
       "      <td>0.131900</td>\n",
       "      <td>0.678000</td>\n",
       "      <td>0.617000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>qwen1.5-14b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.48</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.587000</td>\n",
       "      <td>0.822700</td>\n",
       "      <td>0.603600</td>\n",
       "      <td>0.306300</td>\n",
       "      <td>0.730900</td>\n",
       "      <td>0.685700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>qwen1.5-32b-chat</td>\n",
       "      <td>qwen1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>4.00</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.55</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.07</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.660400</td>\n",
       "      <td>0.854900</td>\n",
       "      <td>0.669500</td>\n",
       "      <td>0.070500</td>\n",
       "      <td>0.771900</td>\n",
       "      <td>0.749900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>recurrentgemma-2b-it</td>\n",
       "      <td>recurrentgemma</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.14</td>\n",
       "      <td>0.309700</td>\n",
       "      <td>0.562600</td>\n",
       "      <td>0.428100</td>\n",
       "      <td>0.100800</td>\n",
       "      <td>0.641700</td>\n",
       "      <td>0.408700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>yi-1.5-6b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.48</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.13</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.44</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.606700</td>\n",
       "      <td>0.788700</td>\n",
       "      <td>0.525700</td>\n",
       "      <td>0.671000</td>\n",
       "      <td>0.735600</td>\n",
       "      <td>0.642400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>dolphin-2.9.1-yi-1.5-9b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.55</td>\n",
       "      <td>0.10</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.656100</td>\n",
       "      <td>0.810200</td>\n",
       "      <td>0.537200</td>\n",
       "      <td>0.653500</td>\n",
       "      <td>0.770300</td>\n",
       "      <td>0.708200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>yi-1.5-9b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.636500</td>\n",
       "      <td>0.809400</td>\n",
       "      <td>0.526700</td>\n",
       "      <td>0.718700</td>\n",
       "      <td>0.771900</td>\n",
       "      <td>0.710100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>blossom-v5.1-34b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.57</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.14</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.675800</td>\n",
       "      <td>0.858300</td>\n",
       "      <td>0.618400</td>\n",
       "      <td>0.655800</td>\n",
       "      <td>0.834300</td>\n",
       "      <td>0.780500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>dolphin-2.9.1-yi-1.5-34b</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.34</td>\n",
       "      <td>0.46</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.693700</td>\n",
       "      <td>0.855300</td>\n",
       "      <td>0.623400</td>\n",
       "      <td>0.730100</td>\n",
       "      <td>0.825600</td>\n",
       "      <td>0.775200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176</th>\n",
       "      <td>yi-1.5-34b-chat</td>\n",
       "      <td>yi-1.5</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.61</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.45</td>\n",
       "      <td>0.704800</td>\n",
       "      <td>0.859700</td>\n",
       "      <td>0.621600</td>\n",
       "      <td>0.716500</td>\n",
       "      <td>0.816100</td>\n",
       "      <td>0.770800</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                Model          Family Instruct  \\\n",
       "31                 falcon-7b-instruct          falcon     True   \n",
       "38                        gemma-2b-it           gemma     True   \n",
       "149             sauerkrautlm-gemma-2b           gemma     True   \n",
       "40                        gemma-7b-it           gemma     True   \n",
       "150             sauerkrautlm-gemma-7b           gemma     True   \n",
       "56                    llama-2-7b-chat         llama-2     True   \n",
       "84                      openhermes-7b         llama-2     True   \n",
       "94                          orca-2-7b         llama-2     True   \n",
       "97                    orca_mini_v3_7b         llama-2     True   \n",
       "52                   llama-2-13b-chat         llama-2     True   \n",
       "83                     openhermes-13b         llama-2     True   \n",
       "93                         orca-2-13b         llama-2     True   \n",
       "95                   orca_mini_v3_13b         llama-2     True   \n",
       "54                   llama-2-70b-chat         llama-2     True   \n",
       "58   llama-3-sauerkrautlm-8b-instruct    meta-llama-3     True   \n",
       "65           meta-llama-3-8b-instruct    meta-llama-3     True   \n",
       "63          meta-llama-3-70b-instruct    meta-llama-3     True   \n",
       "22                        dolly-v2-7b          pythia     True   \n",
       "20                       dolly-v2-12b          pythia     True   \n",
       "114                 qwen1.5-0.5b-chat         qwen1.5     True   \n",
       "116                 qwen1.5-1.8b-chat         qwen1.5     True   \n",
       "124                   qwen1.5-4b-chat         qwen1.5     True   \n",
       "128                   qwen1.5-7b-chat         qwen1.5     True   \n",
       "120                  qwen1.5-14b-chat         qwen1.5     True   \n",
       "122                  qwen1.5-32b-chat         qwen1.5     True   \n",
       "138              recurrentgemma-2b-it  recurrentgemma     True   \n",
       "178                    yi-1.5-6b-chat          yi-1.5     True   \n",
       "24            dolphin-2.9.1-yi-1.5-9b          yi-1.5     True   \n",
       "180                    yi-1.5-9b-chat          yi-1.5     True   \n",
       "5                    blossom-v5.1-34b          yi-1.5     True   \n",
       "23           dolphin-2.9.1-yi-1.5-34b          yi-1.5     True   \n",
       "176                   yi-1.5-34b-chat          yi-1.5     True   \n",
       "\n",
       "     Pretraining Data Size (T)  #Params (B)  IFEval   BBH  MATH Lvl 5  GPQA  \\\n",
       "31                        1.50          7.0    0.20  0.32        0.01  0.25   \n",
       "38                        6.00          2.0    0.27  0.32        0.00  0.28   \n",
       "149                       6.00          2.0    0.25  0.34        0.02  0.26   \n",
       "40                        6.00          7.0    0.39  0.36        0.02  0.28   \n",
       "150                       6.00          7.0    0.34  0.42        0.05  0.29   \n",
       "56                        2.00          7.0    0.40  0.31        0.01  0.25   \n",
       "84                        2.00          7.0    0.18  0.36        0.01  0.27   \n",
       "94                        2.00          7.0    0.22  0.45        0.01  0.26   \n",
       "97                        2.00          7.0    0.28  0.41        0.00  0.25   \n",
       "52                        2.00         13.0    0.40  0.33        0.01  0.23   \n",
       "83                        2.00         13.0    0.27  0.42        0.01  0.27   \n",
       "93                        2.00         13.0    0.31  0.49        0.01  0.28   \n",
       "95                        2.00         13.0    0.29  0.47        0.02  0.27   \n",
       "54                        2.00         70.0    0.50  0.30        0.01  0.26   \n",
       "58                       15.00          8.0    0.74  0.49        0.06  0.31   \n",
       "65                       15.00          8.0    0.74  0.50        0.09  0.26   \n",
       "63                       15.00         70.0    0.81  0.65        0.23  0.29   \n",
       "22                        0.25          6.9    0.20  0.32        0.01  0.27   \n",
       "20                        0.25         12.0    0.24  0.33        0.01  0.24   \n",
       "114                       2.40          0.5    0.18  0.32        0.00  0.27   \n",
       "116                       2.40          1.8    0.20  0.33        0.00  0.30   \n",
       "124                       2.40          4.0    0.32  0.40        0.01  0.27   \n",
       "128                       4.00          7.0    0.44  0.45        0.00  0.30   \n",
       "120                       4.00         14.0    0.48  0.52        0.00  0.27   \n",
       "122                       4.00         32.0    0.55  0.61        0.07  0.31   \n",
       "138                       2.00          2.0    0.29  0.33        0.02  0.26   \n",
       "178                       3.50          6.0    0.48  0.46        0.13  0.32   \n",
       "24                        3.50          9.0    0.45  0.55        0.10  0.34   \n",
       "180                       3.50          9.0    0.60  0.56        0.12  0.33   \n",
       "5                         3.50         34.0    0.57  0.61        0.14  0.31   \n",
       "23                        3.50         34.0    0.39  0.61        0.15  0.34   \n",
       "176                       3.50         34.0    0.61  0.61        0.23  0.36   \n",
       "\n",
       "     MUSR  MMLU-PRO       ARC  HellaSwag  TruthfulQA     GSM8K  Winogrande  \\\n",
       "31   0.36      0.12  0.458200   0.707800    0.440700  0.046200    0.680300   \n",
       "38   0.33      0.14  0.439400   0.627000    0.458200  0.054600    0.609300   \n",
       "149  0.37      0.15  0.487200   0.714100    0.357700  0.267600    0.679600   \n",
       "40   0.43      0.17  0.514500   0.719600    0.472900  0.291900    0.679600   \n",
       "150  0.36      0.30  0.599800   0.819100    0.610000  0.636800    0.766400   \n",
       "56   0.37      0.17  0.529010   0.785501    0.455704  0.073541    0.717443   \n",
       "84   0.43      0.19  0.561400   0.783200    0.450000  0.050000    0.745100   \n",
       "94   0.50      0.23  0.541000   0.761900    0.524500  0.147100    0.734800   \n",
       "97   0.50      0.21  0.569100   0.796400    0.505100  0.071300    0.742700   \n",
       "52   0.40      0.19  0.590444   0.819359    0.441179  0.152388    0.745067   \n",
       "83   0.40      0.24  0.598100   0.822400    0.460100  0.116000    0.754500   \n",
       "93   0.51      0.27  0.609200   0.798500    0.564200  0.378300    0.765600   \n",
       "95   0.46      0.23  0.631400   0.823500    0.518100  0.131200    0.764800   \n",
       "54   0.37      0.24  0.645904   0.858793    0.528047  0.266869    0.805051   \n",
       "58   0.42      0.39  0.737200   0.894100    0.662500  0.649700    0.800300   \n",
       "65   0.36      0.37  0.607500   0.785500    0.516500  0.686900    0.745100   \n",
       "63   0.42      0.52  0.714200   0.856900    0.618100  0.854400    0.828700   \n",
       "22   0.36      0.11  0.445400   0.696400    0.348800  0.011400    0.600600   \n",
       "20   0.37      0.11  0.424061   0.725254    0.338271  0.012130    0.608524   \n",
       "114  0.38      0.12  0.305500   0.440700    0.429500  0.076600    0.546200   \n",
       "116  0.43      0.18  0.387400   0.600200    0.406200  0.190300    0.596700   \n",
       "124  0.40      0.24  0.432600   0.697300    0.447900  0.024300    0.649600   \n",
       "128  0.38      0.30  0.558900   0.785600    0.536500  0.131900    0.678000   \n",
       "120  0.44      0.36  0.587000   0.822700    0.603600  0.306300    0.730900   \n",
       "122  0.42      0.45  0.660400   0.854900    0.669500  0.070500    0.771900   \n",
       "138  0.34      0.14  0.309700   0.562600    0.428100  0.100800    0.641700   \n",
       "178  0.44      0.32  0.606700   0.788700    0.525700  0.671000    0.735600   \n",
       "24   0.43      0.40  0.656100   0.810200    0.537200  0.653500    0.770300   \n",
       "180  0.43      0.40  0.636500   0.809400    0.526700  0.718700    0.771900   \n",
       "5    0.39      0.45  0.675800   0.858300    0.618400  0.655800    0.834300   \n",
       "23   0.46      0.45  0.693700   0.855300    0.623400  0.730100    0.825600   \n",
       "176  0.43      0.45  0.704800   0.859700    0.621600  0.716500    0.816100   \n",
       "\n",
       "         MMLU  \n",
       "31   0.256600  \n",
       "38   0.376500  \n",
       "149  0.429000  \n",
       "40   0.535200  \n",
       "150  0.637600  \n",
       "56   0.470594  \n",
       "84   0.486200  \n",
       "94   0.563700  \n",
       "97   0.523700  \n",
       "52   0.541181  \n",
       "83   0.563500  \n",
       "93   0.603000  \n",
       "95   0.565200  \n",
       "54   0.634535  \n",
       "58   0.680700  \n",
       "65   0.670700  \n",
       "63   0.800600  \n",
       "22   0.251800  \n",
       "20   0.258084  \n",
       "114  0.338200  \n",
       "116  0.458700  \n",
       "124  0.555500  \n",
       "128  0.617000  \n",
       "120  0.685700  \n",
       "122  0.749900  \n",
       "138  0.408700  \n",
       "178  0.642400  \n",
       "24   0.708200  \n",
       "180  0.710100  \n",
       "5    0.780500  \n",
       "23   0.775200  \n",
       "176  0.770800  "
      ]
     },
     "execution_count": 195,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.loc[ind,vars3].dropna().sort_values(by=['Family','#Params (B)'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "id": "ee7b2d8f-9446-4460-a23b-3911d3250469",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Family</th>\n",
       "      <th>Instruct</th>\n",
       "      <th>Pretraining Data Size (T)</th>\n",
       "      <th>#Params (B)</th>\n",
       "      <th>IFEval</th>\n",
       "      <th>BBH</th>\n",
       "      <th>MATH Lvl 5</th>\n",
       "      <th>GPQA</th>\n",
       "      <th>MUSR</th>\n",
       "      <th>MMLU-PRO</th>\n",
       "      <th>ARC</th>\n",
       "      <th>HellaSwag</th>\n",
       "      <th>TruthfulQA</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>Winogrande</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>HumanEval</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>gemma-2b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.27</td>\n",
       "      <td>0.32</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.14</td>\n",
       "      <td>0.439400</td>\n",
       "      <td>0.627000</td>\n",
       "      <td>0.458200</td>\n",
       "      <td>0.054600</td>\n",
       "      <td>0.609300</td>\n",
       "      <td>0.376500</td>\n",
       "      <td>0.177000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>gemma-7b-it</td>\n",
       "      <td>gemma</td>\n",
       "      <td>True</td>\n",
       "      <td>6.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.39</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.28</td>\n",
       "      <td>0.43</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.514500</td>\n",
       "      <td>0.719600</td>\n",
       "      <td>0.472900</td>\n",
       "      <td>0.291900</td>\n",
       "      <td>0.679600</td>\n",
       "      <td>0.535200</td>\n",
       "      <td>0.287000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>llama-2-7b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.31</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.529010</td>\n",
       "      <td>0.785501</td>\n",
       "      <td>0.455704</td>\n",
       "      <td>0.073541</td>\n",
       "      <td>0.717443</td>\n",
       "      <td>0.470594</td>\n",
       "      <td>0.121951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>llama-2-13b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.19</td>\n",
       "      <td>0.590444</td>\n",
       "      <td>0.819359</td>\n",
       "      <td>0.441179</td>\n",
       "      <td>0.152388</td>\n",
       "      <td>0.745067</td>\n",
       "      <td>0.541181</td>\n",
       "      <td>0.182927</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>llama-2-70b-chat</td>\n",
       "      <td>llama-2</td>\n",
       "      <td>True</td>\n",
       "      <td>2.00</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.645904</td>\n",
       "      <td>0.858793</td>\n",
       "      <td>0.528047</td>\n",
       "      <td>0.266869</td>\n",
       "      <td>0.805051</td>\n",
       "      <td>0.634535</td>\n",
       "      <td>0.317073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>meta-llama-3-8b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.36</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.607500</td>\n",
       "      <td>0.785500</td>\n",
       "      <td>0.516500</td>\n",
       "      <td>0.686900</td>\n",
       "      <td>0.745100</td>\n",
       "      <td>0.670700</td>\n",
       "      <td>0.616000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>meta-llama-3-70b-instruct</td>\n",
       "      <td>meta-llama-3</td>\n",
       "      <td>True</td>\n",
       "      <td>15.00</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.65</td>\n",
       "      <td>0.23</td>\n",
       "      <td>0.29</td>\n",
       "      <td>0.42</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.714200</td>\n",
       "      <td>0.856900</td>\n",
       "      <td>0.618100</td>\n",
       "      <td>0.854400</td>\n",
       "      <td>0.828700</td>\n",
       "      <td>0.800600</td>\n",
       "      <td>0.774000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>pythia</td>\n",
       "      <td>True</td>\n",
       "      <td>0.25</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.24</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.11</td>\n",
       "      <td>0.424061</td>\n",
       "      <td>0.725254</td>\n",
       "      <td>0.338271</td>\n",
       "      <td>0.012130</td>\n",
       "      <td>0.608524</td>\n",
       "      <td>0.258084</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        Model        Family Instruct  \\\n",
       "38                gemma-2b-it         gemma     True   \n",
       "40                gemma-7b-it         gemma     True   \n",
       "56            llama-2-7b-chat       llama-2     True   \n",
       "52           llama-2-13b-chat       llama-2     True   \n",
       "54           llama-2-70b-chat       llama-2     True   \n",
       "65   meta-llama-3-8b-instruct  meta-llama-3     True   \n",
       "63  meta-llama-3-70b-instruct  meta-llama-3     True   \n",
       "20               dolly-v2-12b        pythia     True   \n",
       "\n",
       "    Pretraining Data Size (T)  #Params (B)  IFEval   BBH  MATH Lvl 5  GPQA  \\\n",
       "38                       6.00          2.0    0.27  0.32        0.00  0.28   \n",
       "40                       6.00          7.0    0.39  0.36        0.02  0.28   \n",
       "56                       2.00          7.0    0.40  0.31        0.01  0.25   \n",
       "52                       2.00         13.0    0.40  0.33        0.01  0.23   \n",
       "54                       2.00         70.0    0.50  0.30        0.01  0.26   \n",
       "65                      15.00          8.0    0.74  0.50        0.09  0.26   \n",
       "63                      15.00         70.0    0.81  0.65        0.23  0.29   \n",
       "20                       0.25         12.0    0.24  0.33        0.01  0.24   \n",
       "\n",
       "    MUSR  MMLU-PRO       ARC  HellaSwag  TruthfulQA     GSM8K  Winogrande  \\\n",
       "38  0.33      0.14  0.439400   0.627000    0.458200  0.054600    0.609300   \n",
       "40  0.43      0.17  0.514500   0.719600    0.472900  0.291900    0.679600   \n",
       "56  0.37      0.17  0.529010   0.785501    0.455704  0.073541    0.717443   \n",
       "52  0.40      0.19  0.590444   0.819359    0.441179  0.152388    0.745067   \n",
       "54  0.37      0.24  0.645904   0.858793    0.528047  0.266869    0.805051   \n",
       "65  0.36      0.37  0.607500   0.785500    0.516500  0.686900    0.745100   \n",
       "63  0.42      0.52  0.714200   0.856900    0.618100  0.854400    0.828700   \n",
       "20  0.37      0.11  0.424061   0.725254    0.338271  0.012130    0.608524   \n",
       "\n",
       "        MMLU  HumanEval  \n",
       "38  0.376500   0.177000  \n",
       "40  0.535200   0.287000  \n",
       "56  0.470594   0.121951  \n",
       "52  0.541181   0.182927  \n",
       "54  0.634535   0.317073  \n",
       "65  0.670700   0.616000  \n",
       "63  0.800600   0.774000  \n",
       "20  0.258084   0.000000  "
      ]
     },
     "execution_count": 196,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.loc[ind,vars4].dropna().sort_values(by=['Family','#Params (B)'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b1368617-0d80-4bf8-85d4-13e40293f7fe",
   "metadata": {},
   "source": [
    "# Inlcuding Llama 3.1 data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 748,
   "id": "13bf1e48-bf90-4c3a-aab7-55a1d7bbedab",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/skunk/miniconda3/envs/arena/lib/python3.12/site-packages/datasets/load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n",
      "/home/skunk/miniconda3/envs/arena/lib/python3.12/site-packages/datasets/load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n",
      "/home/skunk/miniconda3/envs/arena/lib/python3.12/site-packages/datasets/load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n",
      "/home/skunk/miniconda3/envs/arena/lib/python3.12/site-packages/datasets/load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n",
      "/home/skunk/miniconda3/envs/arena/lib/python3.12/site-packages/datasets/load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n",
      "/home/skunk/miniconda3/envs/arena/lib/python3.12/site-packages/datasets/load.py:2552: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n",
      "You can remove this warning by passing 'token=<use_auth_token>' instead.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "llama_data = pd.DataFrame({'models':['meta-llama/Meta-Llama-3.1-8B',\n",
    "                                     'meta-llama/Meta-Llama-3.1-8B-Instruct',\n",
    "                                     'meta-llama/Meta-Llama-3.1-70B',\n",
    "                                     'meta-llama/Meta-Llama-3.1-70B-Instruct',\n",
    "                                     'meta-llama/Meta-Llama-3.1-405B',\n",
    "                                     'meta-llama/Meta-Llama-3.1-405B-Instruct']})\n",
    "\n",
    "metrics = {'BIG-Bench Hard':'average/em',\n",
    "           'Winogrande':'acc_char',\n",
    "           'MMLU': 'macro_avg/acc_char',\n",
    "           'MMLU-Pro': 'macro_avg/em',\n",
    "           'ARC-C': 'acc_char'}\n",
    "\n",
    "metrics_instruct = {'MMLU': 'macro_avg/acc', #'macro_avg/acc_char',\n",
    "                   'MMLU-Pro': 'micro_avg/acc',\n",
    "                   'ARC-C': 'acc',\n",
    "                   'MATH-HARD':'final_em',\n",
    "                   'GSM8K':'em_maj1@1',\n",
    "                    'GPQA':'acc',\n",
    "                    'IFEval Strict':'startend_total',\n",
    "                    'HumanEval':'pass@1'}\n",
    "\n",
    "benchs = np.unique(list(metrics.keys())+list(metrics_instruct.keys())).tolist()\n",
    "for b in benchs:\n",
    "    llama_data[b]=None\n",
    "\n",
    "for m in list(llama_data.models):\n",
    "    eval_data = load_dataset(f\"{m}-evals\", f'{m.replace('meta-llama/','')}-evals__metrics', split=\"latest\", use_auth_token=\"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\")\n",
    "    eval_data = eval_data.to_pandas()\n",
    "\n",
    "    for b in benchs:\n",
    "        if b in np.unique(eval_data.benchmark_label).tolist():\n",
    "            if 'Instruct' in m:\n",
    "                llama_data.loc[llama_data.models==m, b] = eval_data.loc[np.array(eval_data.benchmark_label==b)*np.array(eval_data.metric_tag==metrics_instruct[b])].metric_value_computed.iloc[0]\n",
    "            else:\n",
    "                llama_data.loc[llama_data.models==m, b] = eval_data.loc[np.array(eval_data.benchmark_label==b)*np.array(eval_data.metric_tag==metrics[b])].metric_value_computed.iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 749,
   "id": "48aa96d7-f688-4639-b33c-2bcc49d2225d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>models</th>\n",
       "      <th>ARC-C</th>\n",
       "      <th>BIG-Bench Hard</th>\n",
       "      <th>GPQA</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>HumanEval</th>\n",
       "      <th>IFEval Strict</th>\n",
       "      <th>MATH-HARD</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>MMLU-Pro</th>\n",
       "      <th>Winogrande</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>meta-llama/Meta-Llama-3.1-8B</td>\n",
       "      <td>79.7</td>\n",
       "      <td>64.2</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>66.7</td>\n",
       "      <td>37.1</td>\n",
       "      <td>60.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>meta-llama/Meta-Llama-3.1-8B-Instruct</td>\n",
       "      <td>83.4</td>\n",
       "      <td>None</td>\n",
       "      <td>32.8</td>\n",
       "      <td>84.5</td>\n",
       "      <td>72.6</td>\n",
       "      <td>12.4</td>\n",
       "      <td>25.4</td>\n",
       "      <td>69.4</td>\n",
       "      <td>47.0</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>meta-llama/Meta-Llama-3.1-70B</td>\n",
       "      <td>92.9</td>\n",
       "      <td>81.6</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>79.3</td>\n",
       "      <td>53.8</td>\n",
       "      <td>83.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>meta-llama/Meta-Llama-3.1-70B-Instruct</td>\n",
       "      <td>94.8</td>\n",
       "      <td>None</td>\n",
       "      <td>46.7</td>\n",
       "      <td>95.1</td>\n",
       "      <td>80.5</td>\n",
       "      <td>12.4</td>\n",
       "      <td>43.8</td>\n",
       "      <td>83.6</td>\n",
       "      <td>65.1</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>meta-llama/Meta-Llama-3.1-405B</td>\n",
       "      <td>96.1</td>\n",
       "      <td>85.9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>85.2</td>\n",
       "      <td>61.6</td>\n",
       "      <td>86.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>meta-llama/Meta-Llama-3.1-405B-Instruct</td>\n",
       "      <td>96.9</td>\n",
       "      <td>None</td>\n",
       "      <td>51.1</td>\n",
       "      <td>96.8</td>\n",
       "      <td>89.0</td>\n",
       "      <td>12.4</td>\n",
       "      <td>53.4</td>\n",
       "      <td>87.3</td>\n",
       "      <td>72.2</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    models ARC-C BIG-Bench Hard  GPQA GSM8K  \\\n",
       "0             meta-llama/Meta-Llama-3.1-8B  79.7           64.2  None  None   \n",
       "1    meta-llama/Meta-Llama-3.1-8B-Instruct  83.4           None  32.8  84.5   \n",
       "2            meta-llama/Meta-Llama-3.1-70B  92.9           81.6  None  None   \n",
       "3   meta-llama/Meta-Llama-3.1-70B-Instruct  94.8           None  46.7  95.1   \n",
       "4           meta-llama/Meta-Llama-3.1-405B  96.1           85.9  None  None   \n",
       "5  meta-llama/Meta-Llama-3.1-405B-Instruct  96.9           None  51.1  96.8   \n",
       "\n",
       "  HumanEval IFEval Strict MATH-HARD  MMLU MMLU-Pro Winogrande  \n",
       "0      None          None      None  66.7     37.1       60.5  \n",
       "1      72.6          12.4      25.4  69.4     47.0       None  \n",
       "2      None          None      None  79.3     53.8       83.3  \n",
       "3      80.5          12.4      43.8  83.6     65.1       None  \n",
       "4      None          None      None  85.2     61.6       86.7  \n",
       "5      89.0          12.4      53.4  87.3     72.2       None  "
      ]
     },
     "execution_count": 749,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llama_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 756,
   "id": "ba3f0cd0-db00-4c3c-abe9-619ebd859d31",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>BBH</th>\n",
       "      <th>GPQA</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>HumanEval</th>\n",
       "      <th>IFEval</th>\n",
       "      <th>MATH Lvl 5</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>MMLU-PRO</th>\n",
       "      <th>Winogrande</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>meta-llama-3.1-70b</td>\n",
       "      <td>0.63</td>\n",
       "      <td>0.39</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.47</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>meta-llama-3.1-70b-instruct</td>\n",
       "      <td>0.68</td>\n",
       "      <td>0.32</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.84</td>\n",
       "      <td>0.03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.53</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>meta-llama-3.1-8b</td>\n",
       "      <td>0.47</td>\n",
       "      <td>0.30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.13</td>\n",
       "      <td>0.05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.32</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>meta-llama-3.1-8b-instruct</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.77</td>\n",
       "      <td>0.16</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.37</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          Model   BBH  GPQA  GSM8K  HumanEval  IFEval  \\\n",
       "82           meta-llama-3.1-70b  0.63  0.39    NaN        NaN    0.17   \n",
       "83  meta-llama-3.1-70b-instruct  0.68  0.32    NaN        NaN    0.84   \n",
       "84            meta-llama-3.1-8b  0.47  0.30    NaN        NaN    0.13   \n",
       "85   meta-llama-3.1-8b-instruct  0.50  0.27    NaN        NaN    0.77   \n",
       "\n",
       "    MATH Lvl 5  MMLU  MMLU-PRO  Winogrande  \n",
       "82        0.17   NaN      0.47         NaN  \n",
       "83        0.03   NaN      0.53         NaN  \n",
       "84        0.05   NaN      0.32         NaN  \n",
       "85        0.16   NaN      0.37         NaN  "
      ]
     },
     "execution_count": 756,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cons_lb.loc[cons_lb.Family=='meta-llama-3.1',['Model']+['BBH', 'GPQA', 'GSM8K','HumanEval', 'IFEval','MATH Lvl 5', 'MMLU', 'MMLU-PRO', 'Winogrande']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05bdd357-0ce2-4d21-b7f5-27cb878b8703",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
